当前位置：首页>python>【重复测量纵向数据】Python16.状态空间模型(State Space Models,SSM)

【重复测量纵向数据】Python16.状态空间模型(State Space Models,SSM)

2026-06-30 01:37:20

【重复测量纵向数据】

Python16.状态空间模型(State Space Models,SSM)

纵向数据是在不同时间点上对同一组个体、物体或实体进行重复观测所收集的数据。它就像给研究对象拍摄“动态影像”，能记录其随时间变化的过程，帮助我们理解趋势、模式和影响因素。

🔬 模型的概念、原理、思想

16.状态空间模型

概念：一种将动态系统建模为不可观测的潜在状态和含有噪声的观测值的时间序列模型。包含描述状态演变和观测过程的两个方程。

原理：状态方程：`α_t=Tα_{t-1}+Rη_t`；观测方程：`Y_t=Zα_t+ε_t`。使用卡尔曼滤波进行递归的递推、滤波和平滑，以估计潜状态。

思想：“信号与噪声分离”。核心是将观测值分解为有意义的系统动态（信号）和随机扰动（测量误差和过程误差），特别适合处理含噪声的时序数据和进行实时预测。

可视化：滤波/平滑状态估计图：绘制卡尔曼滤波或平滑后得到的潜状态估计轨迹（平滑曲线）与原始观测值（散点）的对比，显示模型如何“去噪”并揭示潜在趋势。

公共卫生意义：传染病实时监测：将每日报告的病例数视为含有报告延迟和随机波动的观测值，估计不可见的、真实的有效再生数这一关键潜状态，以动态评估疫情传播强度，为实时防控决策提供依据。

💡 22种重复测量纵向数据分析模型

核心思想进阶：纵向数据分析方法的发展，正从处理相关性（如GEE、混合模型），走向揭示异质性（如GBTM、LCM），再迈向整合动态机制与预测（如联合模型、状态空间模型、贝叶斯方法）。

1.数据预处理：

2.模型拟合：

3.模型可视化：

4.结果保存与报告生成

💎模型实现代码介绍

总的来说，纵向数据因其时间延续性和对象一致性，成为了解事物动态发展过程、探究因果关系的强大工具。

当然，处理纵向数据也伴随一些挑战，例如成本较高、可能存在数据缺失，且分析方法通常比处理横截面数据更为复杂。

下面我们使用R语言进行纵向数据Python16.状态空间模型(State Space Models,SSM)：

# ===========================================# 16. 状态空间模型 (State Space Model, SSM) 分析 - Python实现# ===========================================import osimport sysimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltfrom pathlib import Pathfrom datetime import datetimeimport warningswarnings.filterwarnings('ignore')# 设置matplotlib中文支持plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans']plt.rcParams['axes.unicode_minus'] = False# 导入必要的统计和机器学习库try:    import statsmodels.api as sm    from statsmodels.tsa.statespace.structural import UnobservedComponents    from statsmodels.tsa.statespace.sarimax import SARIMAX    from statsmodels.tsa.stattools import acf, pacf    from scipy import stats    from scipy.stats import shapiro, norm    import seaborn as sns    from sklearn.linear_model import LinearRegression    from sklearn.metrics import mean_squared_error, mean_absolute_error    from docx import Document    from docx.shared import Inches, Pt    from docx.enum.text import WD_ALIGN_PARAGRAPH    from docx.enum.table import WD_TABLE_ALIGNMENTexcept ImportError as e:print(f"缺少必要的库: {e}")print("正在安装必要的库...")    import subprocess    subprocess.check_call([sys.executable, "-m", "pip", "install","statsmodels", "scipy", "seaborn","scikit-learn", "python-docx", "openpyxl"])    import statsmodels.api as sm    from statsmodels.tsa.statespace.structural import UnobservedComponents    from statsmodels.tsa.statespace.sarimax import SARIMAX    from statsmodels.tsa.stattools import acf, pacf    from scipy import stats    from scipy.stats import shapiro, norm    import seaborn as sns    from sklearn.linear_model import LinearRegression    from sklearn.metrics import mean_squared_error, mean_absolute_error    from docx import Document    from docx.shared import Inches, Pt    from docx.enum.text import WD_ALIGN_PARAGRAPH    from docx.enum.table import WD_TABLE_ALIGNMENT# 获取桌面路径if sys.platform == "win32":    desktop_path = Path.home() / "Desktop"else:    desktop_path = Path.home() / "桌面"if (Path.home() / "桌面").exists() else Path.home() / "Desktop"# 设置工作目录os.chdir(desktop_path)# 创建结果文件夹results_dir = desktop_path / "16-SSM结果"results_dir.mkdir(exist_ok=True)os.chdir(results_dir)print("=" * 70)print("状态空间模型(SSM)分析 - Python实现")print("=" * 70)# 1. 读取数据print("\n1. 正在读取数据...")try:# 读取Excel文件    data_path = desktop_path / "longitudinal_data.xlsx"    data_full = pd.read_excel(data_path, sheet_name="Full_Dataset")# 检查数据结构print("\n数据结构:")print(f"数据维度: {data_full.shape}")print(f"变量数: {len(data_full.columns)}")print(f"时间点: {', '.join(map(str, sorted(data_full['Time'].unique())))}")print(f"个体数: {len(data_full['ID'].unique())}")except FileNotFoundError:print(f"错误: 找不到数据文件 {data_path}")print("正在创建示例数据...")# 创建示例数据    np.random.seed(42)    n_ids = 5    n_time_points = 10    data = []for id_num in range(1, n_ids + 1):for time_point in range(1, n_time_points + 1):            data.append({'ID': id_num,'Time': time_point,'Outcome_Continuous': np.random.normal(50 + id_num * 2, 5),'Indicator1': np.random.normal(30, 3),'Indicator2': np.random.normal(20, 2),'Indicator3': np.random.normal(40, 4),'Medication_Adherence': np.random.uniform(0.5, 1.0),'Stress_Level': np.random.uniform(1, 10),'Quality_of_Life': np.random.uniform(30, 90)            })    data_full = pd.DataFrame(data)print("已创建示例数据")# 2. 数据准备与预处理print("\n2. 数据准备与预处理...")# 2.1 创建时间序列数据集ts_data = data_full[['ID', 'Time', 'Outcome_Continuous', 'Indicator1','Indicator2', 'Indicator3', 'Medication_Adherence','Stress_Level', 'Quality_of_Life']].sort_values(['ID', 'Time'])# 检查缺失值print("\n缺失值统计:")missing_stats = ts_data.isnull().sum()if missing_stats.any():print(missing_stats[missing_stats > 0])else:print("无缺失值")# 2.2 处理缺失值（向前向后填充）ts_data_clean = ts_data.copy()for col in ts_data_clean.columns:if ts_data_clean[col].dtype in ['float64', 'int64']:# 对每个ID分别进行填充for id_val in ts_data_clean['ID'].unique():            mask = ts_data_clean['ID'] == id_val# 向前填充            ts_data_clean.loc[mask, col] = ts_data_clean.loc[mask, col].fillna(method='ffill')# 向后填充            ts_data_clean.loc[mask, col] = ts_data_clean.loc[mask, col].fillna(method='bfill')# 如果还有缺失值，用组均值填充if ts_data_clean.loc[mask, col].isnull().any():                group_mean = ts_data_clean.loc[mask, col].mean()                ts_data_clean.loc[mask, col] = ts_data_clean.loc[mask, col].fillna(group_mean)# 检查处理后的缺失值print("\n处理后缺失值统计:")missing_stats_clean = ts_data_clean.isnull().sum()if missing_stats_clean.any():print(missing_stats_clean[missing_stats_clean > 0])else:print("无缺失值")# 2.3 创建时间序列对象sample_id = 1sample_ts = ts_data_clean[ts_data_clean['ID'] == sample_id].drop('ID', axis=1)# 转换为时间序列ts_outcome = sample_ts['Outcome_Continuous'].valuests_indicator1 = sample_ts['Indicator1'].valuests_indicator2 = sample_ts['Indicator2'].valuests_indicator3 = sample_ts['Indicator3'].valuestime_points = sample_ts['Time'].valuesprint(f"\n示例个体(ID = {sample_id})的时间序列数据:")print(sample_ts.head())# 3. 状态空间模型分析print("\n3. 状态空间模型(SSM)分析...")# 3.1 构建状态空间模型print("\n3.1 使用statsmodels构建状态空间模型...")model_used = Nonefitted_model = Nonefiltered_level = Nonesmoothed_level = Nonemodel_params = Noneparam_names = Noneprint("\n尝试方案1：局部水平模型...")

except Exception as e:print(f"局部水平模型失败: {e}")print("\n尝试方案2：使用SARIMAX作为备选...")    try:# 使用SARIMAX(0,1,0)作为局部水平模型的近似        model = SARIMAX(ts_outcome, order=(0, 1, 0), seasonal_order=(0, 0, 0, 0))        fitted_model = model.fit(disp=False)# SARIMAX没有直接的滤波状态，使用预测作为近似        filtered_level = fitted_model.predict(start=0, end=len(ts_outcome) - 1)        smoothed_level = filtered_level.copy()  # SARIMAX没有平滑，使用滤波作为近似# 标记使用的模型        model_used = "SARIMAX(0,1,0) 近似模型"# 提取参数        model_params = [            fitted_model.params.get('sigma2', np.nan),            np.nan  # SARIMAX没有单独的水平方差        ]        param_names = ["残差方差", "水平过程方差"]print("SARIMAX模型拟合成功!")    except Exception as e2:print(f"SARIMAX模型也失败: {e2}")print("\n尝试方案3：使用线性回归作为替代...")# 创建简单的时间序列回归        X = time_points.reshape(-1, 1)        lm_model = LinearRegression()        lm_model.fit(X, ts_outcome)# 创建模拟的滤波和平滑状态        filtered_level = lm_model.predict(X)        smoothed_level = filtered_level.copy()# 标记使用的模型        model_used = "线性回归替代模型"# 计算残差方差        residuals = ts_outcome - filtered_level        residual_var = np.var(residuals)# 提取参数        model_params = [            residual_var,            0  # 线性回归没有系统方差        ]        param_names = ["残差方差", "系统方差"]# 创建模拟的fitted_model对象        fitted_model = lm_modelprint("线性回归模型拟合成功!")print(f"\n使用的模型: {model_used}")# 3.2 创建参数表print("\n3.2 创建参数表...")# 确保参数表被创建if model_params is not None and param_names is not None:    param_table = pd.DataFrame({'参数': param_names,'估计值': np.round(model_params, 4),'解释': ['观测值的随机波动/测量误差','潜在状态的变化程度/系统噪声'        ]    })print("\n模型参数表:")print(param_table.to_string(index=False))else:# 创建默认参数表    param_table = pd.DataFrame({'参数': ['观测误差方差', '系统过程方差'],'估计值': [np.nan, np.nan],'解释': ['观测值的随机波动/测量误差', '潜在状态的变化程度/系统噪声']    })print("警告: 无法提取模型参数，创建空参数表")# 3.3 计算模型拟合指标print("\n3.3 计算模型拟合指标...")# 计算残差if filtered_level is not None and len(filtered_level) > 0:    residuals_val = ts_outcome - filtered_levelelse:    residuals_val = np.full_like(ts_outcome, np.nan)# 初始化拟合指标logLik_val = np.nanaic_val = np.nanbic_val = np.nan# 根据模型类型计算拟合指标if model_used == "局部水平模型 (UnobservedComponents)" and fitted_model is not None:    try:        logLik_val = fitted_model.llf        aic_val = fitted_model.aic        bic_val = fitted_model.bic    except Exception as e:print(f"计算拟合指标失败: {e}")elif model_used == "SARIMAX(0,1,0) 近似模型" and fitted_model is not None:    try:        logLik_val = fitted_model.llf        aic_val = fitted_model.aic        bic_val = fitted_model.bic    except Exception as e:print(f"计算拟合指标失败: {e}")elif model_used == "线性回归替代模型" and fitted_model is not None:    try:# 线性回归的对数似然        n = len(ts_outcome)        rss = np.sum(residuals_val ** 2)        sigma2 = rss / n        logLik_val = -n / 2 * np.log(2 * np.pi * sigma2) - rss / (2 * sigma2)# AIC和BIC        k = 2  # 斜率和截距        aic_val = 2 * k - 2 * logLik_val        bic_val = k * np.log(n) - 2 * logLik_val    except Exception as e:print(f"计算拟合指标失败: {e}")# 残差检验try:    residual_var = np.nanvar(residuals_val)except:    residual_var = np.nantry:    acf_vals = acf(residuals_val, nlags=5, fft=False, missing='drop')    acf_lag1 = acf_vals[1] if len(acf_vals) > 1 else np.nanexcept:    acf_lag1 = np.nantry:if len(residuals_val) >= 3 and len(residuals_val) <= 5000 and not np.all(np.isnan(residuals_val)):        _, shapiro_p = shapiro(residuals_val[~np.isnan(residuals_val)])else:        shapiro_p = np.nanexcept:    shapiro_p = np.nan# 创建拟合优度表print("\n创建拟合优度表...")goodness_data = {'指标': ['模型类型', '对数似然', 'AIC', 'BIC', '残差方差','残差自相关(滞后1)', '残差正态性检验(p值)', '观测数'    ],'值': [        model_used if model_used else"N/A",        f"{logLik_val:.3f}"if not np.isnan(logLik_val) else"N/A",        f"{aic_val:.3f}"if not np.isnan(aic_val) else"N/A",        f"{bic_val:.3f}"if not np.isnan(bic_val) else"N/A",        f"{residual_var:.3f}"if not np.isnan(residual_var) else"N/A",        f"{acf_lag1:.3f}"if not np.isnan(acf_lag1) else"N/A",        f"{shapiro_p:.3f}"if not np.isnan(shapiro_p) else"N/A",        str(len(ts_outcome))    ]}goodness_fit = pd.DataFrame(goodness_data)print("\n拟合优度表:")print(goodness_fit.to_string(index=False))# 4. 可视化print("\n4. 生成可视化图表...")# 设置Seaborn样式sns.set_style("whitegrid")sns.set_context("notebook")# 4.1 原始观测值与滤波/平滑状态对比图print("\n4.1 生成滤波/平滑状态估计图...")plt.figure(figsize=(12, 8))plt.plot(time_points, ts_outcome, 'bo', markersize=8, alpha=0.7, label='原始观测值')if filtered_level is not None:    plt.plot(time_points, filtered_level, 'r--', linewidth=2, label='滤波状态估计')if smoothed_level is not None:    plt.plot(time_points, smoothed_level, 'g-', linewidth=2, label='平滑状态估计')plt.title(f'状态空间模型：滤波与平滑状态估计 ({model_used})', fontsize=16, fontweight='bold')plt.xlabel('时间点', fontsize=14)plt.ylabel('观测值/状态估计', fontsize=14)plt.legend(fontsize=12)plt.grid(True, alpha=0.3)# 保存图像plt.tight_layout()plt.savefig('SSM_滤波平滑对比图.png', dpi=300, bbox_inches='tight')plt.close()print("滤波/平滑状态估计图已保存: SSM_滤波平滑对比图.png")# 4.2 残差诊断图print("\n4.2 生成残差诊断图...")fig, axes = plt.subplots(2, 2, figsize=(14, 10))# 残差序列图axes[0, 0].plot(time_points, residuals_val, 'bo-', linewidth=1.5, markersize=6)axes[0, 0].axhline(y=0, color='r', linestyle='--', linewidth=1.5)if not np.all(np.isnan(residuals_val)):    std_resid = np.nanstd(residuals_val)    axes[0, 0].axhline(y=2 * std_resid, color='gray', linestyle='--', alpha=0.7)    axes[0, 0].axhline(y=-2 * std_resid, color='gray', linestyle='--', alpha=0.7)axes[0, 0].set_title('残差序列图', fontsize=14, fontweight='bold')axes[0, 0].set_xlabel('时间')axes[0, 0].set_ylabel('残差')axes[0, 0].grid(True, alpha=0.3)# 残差自相关函数try:    acf_vals = acf(residuals_val[~np.isnan(residuals_val)], nlags=20, fft=False)    axes[0, 1].bar(range(len(acf_vals)), acf_vals, color='darkblue', alpha=0.7)    axes[0, 1].axhline(y=0, color='black', linewidth=0.8)    axes[0, 1].axhline(y=1.96 / np.sqrt(len(residuals_val)), color='red', linestyle='--', linewidth=1)    axes[0, 1].axhline(y=-1.96 / np.sqrt(len(residuals_val)), color='red', linestyle='--', linewidth=1)    axes[0, 1].set_title('残差自相关函数(ACF)', fontsize=14, fontweight='bold')    axes[0, 1].set_xlabel('滞后')    axes[0, 1].set_ylabel('ACF')except:    axes[0, 1].text(0.5, 0.5, '无法计算ACF', ha='center', va='center', fontsize=12, color='red')    axes[0, 1].set_title('残差自相关函数(ACF)', fontsize=14, fontweight='bold')# 残差偏自相关函数try:    pacf_vals = pacf(residuals_val[~np.isnan(residuals_val)], nlags=20)    axes[1, 0].bar(range(len(pacf_vals)), pacf_vals, color='darkgreen', alpha=0.7)    axes[1, 0].axhline(y=0, color='black', linewidth=0.8)    axes[1, 0].axhline(y=1.96 / np.sqrt(len(residuals_val)), color='red', linestyle='--', linewidth=1)    axes[1, 0].axhline(y=-1.96 / np.sqrt(len(residuals_val)), color='red', linestyle='--', linewidth=1)    axes[1, 0].set_title('残差偏自相关函数(PACF)', fontsize=14, fontweight='bold')    axes[1, 0].set_xlabel('滞后')    axes[1, 0].set_ylabel('PACF')except:    axes[1, 0].text(0.5, 0.5, '无法计算PACF', ha='center', va='center', fontsize=12, color='red')    axes[1, 0].set_title('残差偏自相关函数(PACF)', fontsize=14, fontweight='bold')# 残差Q-Q图try:if not np.all(np.isnan(residuals_val)):        valid_resid = residuals_val[~np.isnan(residuals_val)]        stats.probplot(valid_resid, dist="norm", plot=axes[1, 1])        axes[1, 1].set_title('残差正态Q-Q图', fontsize=14, fontweight='bold')        axes[1, 1].set_xlabel('理论分位数')        axes[1, 1].set_ylabel('样本分位数')except:    axes[1, 1].text(0.5, 0.5, '无法计算Q-Q图', ha='center', va='center', fontsize=12, color='red')    axes[1, 1].set_title('残差正态Q-Q图', fontsize=14, fontweight='bold')plt.tight_layout()plt.savefig('SSM_残差诊断图.png', dpi=300, bbox_inches='tight')plt.close()print("残差诊断图已保存: SSM_残差诊断图.png")# 4.3 预测图print("\n4.3 生成预测图...")if model_used == "局部水平模型 (UnobservedComponents)" and fitted_model is not None:    try:# 预测未来3个时间点        forecast_horizon = 3        forecast = fitted_model.get_forecast(steps=forecast_horizon)# 创建预测数据        future_time = np.arange(time_points[-1] + 1, time_points[-1] + forecast_horizon + 1)        forecast_mean = forecast.predicted_mean        forecast_ci = forecast.conf_int()        plt.figure(figsize=(12, 8))# 历史数据        plt.plot(time_points, ts_outcome, 'b-', linewidth=2, label='历史观测值')        plt.plot(time_points, ts_outcome, 'bo', markersize=8, alpha=0.7)# 预测数据        plt.plot(future_time, forecast_mean, 'r--', linewidth=2, label='预测值')        plt.plot(future_time, forecast_mean, 'ro', markersize=8, alpha=0.7)# 预测区间        plt.fill_between(future_time,                         forecast_ci.iloc[:, 0],                         forecast_ci.iloc[:, 1],                         color='red', alpha=0.2, label='95%预测区间')        plt.title('状态空间模型：未来预测', fontsize=16, fontweight='bold')        plt.xlabel('时间点', fontsize=14)        plt.ylabel('值', fontsize=14)        plt.legend(fontsize=12)        plt.grid(True, alpha=0.3)        plt.tight_layout()        plt.savefig('SSM_预测图.png', dpi=300, bbox_inches='tight')        plt.close()print("预测图已保存: SSM_预测图.png")    except Exception as e:print(f"预测图生成失败: {e}")else:print("当前模型不支持预测或模型对象不存在")# 5. 创建综合结果表print("\n5. 创建综合结果表...")# 5.1 模型参数表print("\n5.1 模型参数表...")param_table_formatted = param_table.copy()param_table_formatted['估计值'] = param_table_formatted['估计值'].apply(    lambda x: "N/A"if pd.isna(x) else f"{x:.3f}")param_table_formatted.to_csv('SSM_模型参数表.csv', index=False, encoding='utf-8-sig')print("模型参数表已保存: SSM_模型参数表.csv")# 5.2 拟合优度表print("\n5.2 拟合优度表...")goodness_fit.to_csv('SSM_拟合优度表.csv', index=False, encoding='utf-8-sig')print("拟合优度表已保存: SSM_拟合优度表.csv")# 5.3 状态估计值表print("\n5.3 状态估计值表...")state_estimates = pd.DataFrame({'时间点': time_points,'原始观测值': np.round(ts_outcome, 3),'滤波状态估计': np.round(filtered_level, 3) if filtered_level is not None else np.nan,'平滑状态估计': np.round(smoothed_level, 3) if smoothed_level is not None else np.nan,'滤波残差': np.round(ts_outcome - filtered_level, 3) if filtered_level is not None else np.nan,'平滑残差': np.round(ts_outcome - smoothed_level, 3) if smoothed_level is not None else np.nan})state_estimates.to_csv('SSM_状态估计表.csv', index=False, encoding='utf-8-sig')print("状态估计表已保存: SSM_状态估计表.csv")# 5.4 综合报告表print("\n5.4 创建综合报告表...")# 确定收敛状态convergence_status = "未检查"if model_used == "局部水平模型 (UnobservedComponents)" and fitted_model is not None:    convergence_status = "收敛"if fitted_model.mle_retvals.get('converged', False) else"未收敛"elif model_used == "SARIMAX(0,1,0) 近似模型" and fitted_model is not None:    convergence_status = "收敛"if fitted_model.mle_retvals.get('converged', False) else"未收敛"# 残差独立性判断resid_indep = "N/A"if np.isnan(acf_lag1) else"通过"if abs(acf_lag1) < 0.2 else"未通过"# 残差正态性判断resid_norm = "N/A"if np.isnan(shapiro_p) else"通过"if shapiro_p > 0.05 else"未通过"# 模型适用性model_applicability = "适用"if len(ts_outcome) >= 5 else"数据点较少，结果仅供参考"comprehensive_report = pd.DataFrame({'分析项目': ['分析模型', '分析个体', '时间点数', '观测变量', '状态维度','估计方法', '收敛状态', '对数似然值', 'AIC准则', 'BIC准则','残差独立性', '残差正态性', '模型适用性'    ],'结果': [        f'状态空间模型 - {model_used if model_used else "N/A"}',        f'ID = {sample_id}',        str(len(ts_outcome)),'Outcome_Continuous','水平状态','最大似然估计(MLE)',        convergence_status,        f'{logLik_val:.3f}'if not np.isnan(logLik_val) else'N/A',        f'{aic_val:.3f}'if not np.isnan(aic_val) else'N/A',        f'{bic_val:.3f}'if not np.isnan(bic_val) else'N/A',        resid_indep,        resid_norm,        model_applicability    ]})comprehensive_report.to_csv('SSM_综合报告表.csv', index=False, encoding='utf-8-sig')print("综合报告表已保存: SSM_综合报告表.csv")# 6. 创建Word分析报告print("\n6. 正在生成Word分析报告...")try:# 创建Word文档    doc = Document()# 标题    title = doc.add_heading('状态空间模型(SSM)分析报告', 0)    title.alignment = WD_ALIGN_PARAGRAPH.CENTER# 生成日期    doc.add_paragraph(f'生成日期：{datetime.now().strftime("%Y年%m月%d日")}')    doc.add_paragraph()# 1. 研究概述    doc.add_heading('1. 研究概述', level=1)    doc.add_paragraph(f'本研究基于个体ID = {sample_id}的纵向数据，使用状态空间模型分析其健康指标的时间动态变化。')    doc.add_paragraph('状态空间模型将观测序列分解为潜在状态（信号）和测量误差（噪声），通过卡尔曼滤波递归估计不可观测的潜状态。')    doc.add_paragraph('模型方程：')    doc.add_paragraph('状态方程：α_t = T·α_{t-1} + R·η_t')    doc.add_paragraph('观测方程：Y_t = Z·α_t + ε_t')    doc.add_paragraph()# 2. 数据描述    doc.add_heading('2. 数据描述', level=1)    doc.add_paragraph(f'分析个体：ID = {sample_id}')    doc.add_paragraph(f'时间点数：{len(ts_outcome)}')    doc.add_paragraph('观测变量：Outcome_Continuous')    doc.add_paragraph(f'数据范围：{np.min(ts_outcome):.2f} ~ {np.max(ts_outcome):.2f}')    doc.add_paragraph()# 3. 模型设定与选择    doc.add_heading('3. 模型设定与选择', level=1)    doc.add_paragraph(f'最终采用模型：{model_used}')if model_used == "局部水平模型 (UnobservedComponents)":        doc.add_paragraph('模型结构：局部水平模型')        doc.add_paragraph('状态维度：1（水平）')elif model_used == "SARIMAX(0,1,0) 近似模型":        doc.add_paragraph('模型结构：SARIMAX(0,1,0)模型')        doc.add_paragraph('状态维度：1（水平）')elif model_used == "线性回归替代模型":        doc.add_paragraph('模型结构：线性回归模型（替代方案）')        doc.add_paragraph('注：由于数据点较少，复杂状态空间模型可能不稳定，采用线性模型作为替代。')    doc.add_paragraph('估计方法：最大似然估计(MLE)')    doc.add_paragraph('滤波算法：卡尔曼滤波')    doc.add_paragraph()# 4. 模型结果    doc.add_heading('4. 模型结果', level=1)# 4.1 参数估计    doc.add_heading('4.1 参数估计', level=2)    doc.add_paragraph('下表展示了模型参数的估计结果：')# 创建参数表格    table = doc.add_table(rows=len(param_table_formatted) + 1, cols=3)    table.style = 'Light Grid Accent 1'# 表头    header_cells = table.rows[0].cells    header_cells[0].text = '参数'    header_cells[1].text = '估计值'    header_cells[2].text = '解释'# 数据行for i, row in param_table_formatted.iterrows():        row_cells = table.rows[i + 1].cells        row_cells[0].text = str(row['参数'])        row_cells[1].text = str(row['估计值'])        row_cells[2].text = str(row['解释'])    doc.add_paragraph()    doc.add_paragraph('参数解释：')    doc.add_paragraph('• 观测误差方差：反映测量误差的大小，值越大表示观测噪声越大')    doc.add_paragraph('• 系统过程方差：反映潜在状态的变化程度，值越大表示状态波动越剧烈')    doc.add_paragraph()# 4.2 拟合优度    doc.add_heading('4.2 模型拟合优度', level=2)    doc.add_paragraph('下表展示了模型的拟合优度指标：')# 创建拟合优度表格    table2 = doc.add_table(rows=len(goodness_fit) + 1, cols=2)    table2.style = 'Light Grid Accent 1'# 表头    header_cells2 = table2.rows[0].cells    header_cells2[0].text = '指标'    header_cells2[1].text = '值'# 数据行for i, row in goodness_fit.iterrows():        row_cells = table2.rows[i + 1].cells        row_cells[0].text = str(row['指标'])        row_cells[1].text = str(row['值'])    doc.add_paragraph()# 5. 可视化结果    doc.add_heading('5. 可视化结果', level=1)# 5.1 滤波平滑对比图    doc.add_heading('5.1 滤波与平滑状态估计图', level=2)    doc.add_paragraph('该图展示了原始观测值、卡尔曼滤波估计和平滑估计的对比：')    doc.add_paragraph('• 蓝色点：原始观测值（可能含有测量误差）')    doc.add_paragraph('• 红色虚线：滤波状态估计（基于当前及之前信息）')    doc.add_paragraph('• 绿色实线：平滑状态估计（基于全部时间序列信息）')    doc.add_paragraph()if os.path.exists('SSM_滤波平滑对比图.png'):        doc.add_picture('SSM_滤波平滑对比图.png', width=Inches(6))# 5.2 残差诊断图    doc.add_heading('5.2 残差诊断图', level=2)    doc.add_paragraph('残差诊断用于检验模型的适用性和假设是否满足：')    doc.add_paragraph('• 左上：残差序列图 - 残差应随机分布在0附近，无趋势')    doc.add_paragraph('• 右上：自相关函数(ACF) - 残差应无显著自相关')    doc.add_paragraph('• 左下：偏自相关函数(PACF) - 残差应无显著偏自相关')    doc.add_paragraph('• 右下：正态Q-Q图 - 残差应近似服从正态分布')    doc.add_paragraph()if os.path.exists('SSM_残差诊断图.png'):        doc.add_picture('SSM_残差诊断图.png', width=Inches(6))# 5.3 预测图if os.path.exists('SSM_预测图.png'):        doc.add_heading('5.3 未来预测图', level=2)        doc.add_paragraph('该图展示了基于状态空间模型对未来时间点的预测结果：')        doc.add_paragraph('• 蓝色线：历史观测值')        doc.add_paragraph('• 红色虚线：预测值')        doc.add_paragraph('• 红色阴影：95%预测区间')        doc.add_paragraph()        doc.add_picture('SSM_预测图.png', width=Inches(6))# 6. 公共卫生意义    doc.add_heading('6. 公共卫生意义', level=1)    doc.add_paragraph('状态空间模型在公共卫生领域有重要应用价值：')    doc.add_paragraph('1. 传染病实时监测：')    doc.add_paragraph('   • 将每日报告的病例数视为含有报告延迟和随机波动的观测值')    doc.add_paragraph('   • 估计不可见的真实有效再生数(Rt)这一关键潜状态')    doc.add_paragraph('   • 动态评估疫情传播强度，为实时防控决策提供依据')    doc.add_paragraph()    doc.add_paragraph('2. 慢性病管理：')    doc.add_paragraph('   • 将患者的生物标志物测量值分解为长期趋势和短期波动')    doc.add_paragraph('   • 分离疾病进展信号与测量噪声')    doc.add_paragraph('   • 实现精准个体化管理，及时调整治疗方案')    doc.add_paragraph()    doc.add_paragraph('3. 卫生政策评估：')    doc.add_paragraph('   • 分离政策干预的长期效应和季节性波动')    doc.add_paragraph('   • 准确评估政策效果，避免误判')    doc.add_paragraph()# 7. 结论与建议    doc.add_heading('7. 结论与建议', level=1)    doc.add_paragraph(f'本研究通过{model_used}成功分析了个体健康指标的时间动态变化。')# 根据模型结果给出结论if not np.isnan(acf_lag1) and not np.isnan(shapiro_p):if abs(acf_lag1) < 0.2 and shapiro_p > 0.05:            doc.add_paragraph('模型诊断显示：残差无显著自相关且近似正态分布，模型拟合良好。')else:            doc.add_paragraph('模型诊断提示：残差可能存在问题，建议进一步检查或考虑其他模型。')else:        doc.add_paragraph('模型诊断：无法完成完整的残差诊断。')    doc.add_paragraph()    doc.add_paragraph('建议：')    doc.add_paragraph('1. 扩展应用到多个个体，建立分层状态空间模型')    doc.add_paragraph('2. 收集更长时间序列数据，提高模型稳定性')    doc.add_paragraph('3. 纳入协变量（如治疗类型、年龄等）改进模型')    doc.add_paragraph('4. 开发实时监测系统，基于卡尔曼滤波进行在线更新')    doc.add_paragraph()# 保存Word文档    word_file = f'SSM_分析报告_{datetime.now().strftime("%Y%m%d")}.docx'    doc.save(word_file)print(f"Word分析报告已保存: {word_file}")except Exception as e:print(f"Word报告生成失败: {e}")# 创建最简单的报告    try:        simple_doc = Document()        simple_doc.add_heading('状态空间模型(SSM)分析报告', 0)        simple_doc.add_paragraph(f'生成日期：{datetime.now().strftime("%Y年%m月%d日")}')        simple_doc.add_paragraph()        simple_doc.add_paragraph(f'分析完成。采用模型：{model_used}')        simple_doc.add_paragraph(f'分析个体：ID = {sample_id}')        simple_doc.add_paragraph(f'时间点数：{len(ts_outcome)}')        simple_doc.add_paragraph('详细结果请查看CSV文件和可视化图片。')        simple_word_file = f'SSM_简易报告_{datetime.now().strftime("%Y%m%d")}.docx'        simple_doc.save(simple_word_file)print(f"简易Word报告已保存: {simple_word_file}")    except:print("无法生成Word报告")# 7. 保存工作空间print("\n7. 保存工作空间...")try:    import pickle    save_objects = {'ts_data_clean': ts_data_clean,'ts_outcome': ts_outcome,'filtered_level': filtered_level,'smoothed_level': smoothed_level,'residuals_val': residuals_val,'model_used': model_used,'param_table': param_table,'goodness_fit': goodness_fit,'state_estimates': state_estimates,'comprehensive_report': comprehensive_report    }    with open('SSM_关键结果.pkl', 'wb') as f:        pickle.dump(save_objects, f)print("关键结果已保存: SSM_关键结果.pkl")except Exception as e:print(f"保存工作空间失败: {e}")# 8. 汇总输出print("\n" + "=" * 70)print("状态空间模型(SSM)分析完成！")print("=" * 70)print("\n结果文件汇总:")result_files = ['SSM_模型参数表.csv','SSM_拟合优度表.csv','SSM_状态估计表.csv','SSM_综合报告表.csv']for file in result_files:if os.path.exists(file):print(f"✓ {file}")print("\n可视化文件汇总:")visual_files = ['SSM_滤波平滑对比图.png','SSM_残差诊断图.png']# 检查预测图是否存在if os.path.exists('SSM_预测图.png'):    visual_files.append('SSM_预测图.png')for file in visual_files:if os.path.exists(file):print(f"✓ {file}")print("\n报告文件汇总:")import globreport_files = glob.glob('SSM_*报告*.docx')for file in report_files:print(f"✓ {os.path.basename(file)}")print("\n工作空间文件:")if os.path.exists('SSM_关键结果.pkl'):print("✓ SSM_关键结果.pkl")print(f"\n分析完成！所有结果已保存在 {os.getcwd()} 文件夹中。")