############ISD-Lite气象数据批量转换(自动识别站点文件、解析固定宽度格式、添加站点ID与时间戳后导出CSV)import osimport pandas as pdimport re# ===== 配置参数 =====root = r"E:\master_data\china_isd_lite1942-2025\china_isd_lite_2023\china_isd_lite_2023"output_dir = r"E:\master_data\china_isd_lite1942-2025\china_isd_lite_2023\2023txt"target_year = "2023"# ===== 准备输出目录 =====os.makedirs(output_dir, exist_ok=True)# ===== ISD-Lite 固定宽度格式定义 =====colspecs = [ (0, 4), (5, 7), (8, 10), (11, 13), (13, 19), (19, 25), (25, 31), (31, 37), (37, 43), (43, 49), (49, 55), (55, 61)]columns = [ "year", "month", "day", "hour", "air_temp", "dew_point", "pressure", "wind_dir", "wind_speed", "sky_condition", "precip_1h", "precip_6h"]# ===== 自动收集所有目标年份的文件 =====print("正在扫描目录: {}".format(root))target_files = [] # 存储 (文件路径, 站点ID)for dirpath, dirnames, filenames in os.walk(root): for filename in filenames: if filename.endswith(target_year) or filename.endswith(target_year + ".txt"): match = re.match(r"^(\d+)", filename) if match: station_id = match.group(1) target_files.append((os.path.join(dirpath, filename), station_id)) else: print("警告:文件名 {} 无法提取站点ID,已跳过".format(filename))if not target_files: raise FileNotFoundError("在 {} 下未找到任何 {} 年的ISD-Lite文件".format(root, target_year))print("共发现 {} 个文件,对应 {} 个站点".format( len(target_files), len(set(s[1] for s in target_files))))# ===== 批量处理每个文件 =====for file_path, station_id in target_files: print("\n正在处理站点: {}, 文件: {}".format(station_id, os.path.basename(file_path))) # 读取固定宽度文件 df = pd.read_fwf( file_path, colspecs=colspecs, names=columns, na_values=-9999 ) # 将需要除以10的字段转换 for col in ["air_temp", "dew_point", "pressure", "wind_speed", "precip_1h", "precip_6h"]: df[col] = df[col] / 10 # 构造时间戳列 df["datetime"] = pd.to_datetime(df[["year", "month", "day", "hour"]]) # ---------- 新增:添加站点ID列 ---------- df["station_id"] = station_id # 调整列顺序(可选:将站点ID放在第一列,方便查看) cols = ["station_id"] + [c for c in df.columns if c != "station_id"] df = df[cols] # 保存为CSV output_file = os.path.join(output_dir, "{}_{}.csv".format(station_id, target_year)) df.to_csv(output_file, index=False, encoding="utf-8-sig") print("已保存:{} (包含站点ID列)".format(output_file))print("\n全部处理完成!")