Python
import pytesseract from PIL import Image import pandas as pd import re defextract_invoice_info(image_path): img = Image.open(image_path) text = pytesseract.image_to_string(img, lang='chi_sim') invoice_code = re.search(r'发票代码[::](\d+)', text) invoice_no = re.search(r'发票号码[::](\d+)', text) amount = re.search(r'金额[::]\s*([\d.]+)', text) tax_rate = re.search(r'税率[::](\d+)%', text) return { 'invoice_code': invoice_code.group(1) if invoice_code else'', 'amount': float(amount.group(1)) if amount else0, 'tax_rate': float(tax_rate.group(1))/100if tax_rate else0 }
Python
import pandas as pd from pathlib import Path defprocess_bank_statement(file_path, bank_name): # 统一处理不同格式的银行对账单if file_path.endswith('.xlsx'): df = pd.read_excel(file_path) elif file_path.endswith('.csv'): df = pd.read_csv(file_path, encoding='gbk') column_mapping = {'交易日期': 'date', '发生额': 'amount'} df = df.rename(columns=column_mapping) df['bank'] = bank_name return df # 处理5家银行的对账单 bank_files = {'工商银行': 'data/icbc_2024_01.xlsx', '建设银行': 'data/ccb_2024_01.csv'} combined = pd.concat([process_bank_statement(f, b) for b, f in bank_files.items()]) combined.to_excel('银行余额调节表.xlsx')
Python
import pandas as pd import openpyxl from openpyxl.styles import Font, Alignment, PatternFill defauto_generate_report(dept_files, output_path): all_data = [pd.read_excel(f).assign(部门=d) for d, f in dept_files.items()] combined = pd.concat(all_data) summary = combined.groupby('部门').agg({ '合计': 'sum' }).reset_index() summary.to_excel(output_path, sheet_name='汇总表', index=False) return summary
Python
from sklearn.ensemble import IsolationForest import pandas as pd defdetect_anomalies(df, column_name, contamination=0.01): features = df[[column_name]].fillna(0) model = IsolationForest(contamination=contamination, random_state=42) df['is_anomaly'] = model.fit_predict(features) anomalies = df[df['is_anomaly'] == -1] return anomalies # 示例:检测费用异常 expense_data = pd.read_excel('费用数据.xlsx') anomalies = detect_anomalies(expense_data, '差旅费单笔金额')
Python
# 安装必要库: pip install pytesseract pillow pandas openpyxlimport os from pathlib import Path import pandas as pd invoice_dir = Path('D:/invoices') images = list(invoice_dir.glob('*.png')) + list(invoice_dir.glob('*.jpg')) print(f"找到 {len(images)} 张发票") results = [{'文件名': img.name, '文件路径': str(img)} for img in images] df = pd.DataFrame(results) df.to_excel('发票清单.xlsx', index=False)