import pandas as pdimport numpy as np# 1月订单分片df1 = pd.DataFrame({ "order_id": range(1,41), "uid": np.random.randint(1001,1020,40), "amount": np.random.uniform(100,3000,40).round(2), "month": ["01月"]*40})# 2月订单分片df2 = pd.DataFrame({ "order_id": range(41,81), "uid": np.random.randint(1001,1020,40), "amount": np.random.uniform(100,3000,40).round(2), "month": ["02月"]*40})# 用户基础信息表user_df = pd.DataFrame({ "uid": list(range(1001,1021)), "username": [f"用户{x}" for x in range(1001,1021)], "channel": np.random.choice(["APP","小程序","官网"],20)})df1.to_excel("order_01.xlsx", index=False)df2.to_excel("order_02.xlsx", index=False)user_df.to_excel("user_info.xlsx", index=False)print("多表合并测试分片数据生成完成")
import pandas as pddf1 = pd.read_excel("order_01.xlsx")df2 = pd.read_excel("order_02.xlsx")user_df = pd.read_excel("user_info.xlsx")# 场景1:concat纵向拼接同结构分片数据order_all = pd.concat([df1, df2], ignore_index=True)print("==== 1、2月订单纵向合并总行数:", len(order_all))# 场景2:merge左关联,保留全部订单,匹配用户信息order_user_wide = pd.merge( left=order_all, right=user_df, left_on="uid", right_on="uid", how="left" # 左连接:保留所有订单,无匹配用户填充空值)print("\n==== 订单+用户宽表前8行 ====")print(order_user_wide[["order_id","uid","amount","username","channel"]].head(8))