import pandas as pdimport redef advanced_split_columns(df: pd.DataFrame, rules: dict) -> pd.DataFrame:    """    高级分列函数：支持正则分隔和位置切片    """    df = df.copy()    for col_name, rule in rules.items():        if col_name not in df.columns:            print(f"⚠️ 警告：列 '{col_name}' 不存在，跳过。")            continue        # 1. 正则分隔模式 (type='regex_split')        if rule['type'] == 'regex_split':            # expand=True 将分割结果扩展为多列            # rule['sep'] 可以是正则表达式，如 r'[ ,-]+' 表示逗号空格或横杠            new_cols = df[col_name].astype(str).str.split(rule['sep'], expand=True)            # 如果分割出来的列数多于定义的列名，截取前面的；如果少于，自动补None            # 办公场景通常只关心前几列            limit = len(rule['new_columns'])            if new_cols.shape[1] > limit:                new_cols = new_cols.iloc[:, :limit]            # 赋值新列名            new_cols.columns = rule['new_columns']            # 将新列拼接到原表            df = pd.concat([df, new_cols], axis=1)        # 2. 位置切片模式 (type='slice')        # 适用于固定格式字符串，如 "2023-12-01" 或 身份证号        elif rule['type'] == 'slice':            for target_col, (start, end) in rule['slices'].items():                # .str[start:end] 类似Python切片                df[target_col] = df[col_name].astype(str).str[start:end]    return dfif __name__ == "__main__":    # 模拟读取数据（办公时换成 pd.read_excel('原表.xlsx')）    # 这里手动造点脏数据演示    data = {        '姓名': ['张三,Zhang', '李四-Li', '王五 Wang', '赵六;Zhao'],        '出生日期': ['1990-05-20', '1992.12.01', '1988/08/08', '19950101']         # 注意：切片法对格式要求严格，如果格式太乱建议也用正则提取    }    df = pd.read_excel('test.xlsx', dtype=str) # 读取时建议全转str，防止日期变成datetime对象导致切片失效    # --- 核心：在这里配置你的规则 ---    my_rules = {        '姓名': {            'type': 'regex_split',            'sep': r'[ ,-；;]+',  # 正则：匹配 逗号/空格/横杠/中文分号/英文分号 任意一次或多次            'new_columns': ['中文名', '英文名']        },        '出生日期': {            'type': 'slice',            # 字典格式：'新列名': (起始下标, 结束下标)            'slices': {                '年': (0, 4),                '月': (5, 7),                '日': (8, 10)            }        }    }    # 执行清洗    df_result = advanced_split_columns(df, my_rules)    # 导出    df_result.to_excel('分列结果.xlsx', index=False)    print("✅ 处理完成！已生成：分列结果.xlsx")    print(df_result.head())