当前位置：首页>python>基于Python|完全随机设计的协方差分析图解

基于Python|完全随机设计的协方差分析图解

2026-07-01 13:20:31

小七：有阿猫阿狗的眼光生活！

大家好，本期笔者将使用Python呈现协方差图解，关于R版本的，各位读者朋友可以前往上一期推文进行阅读，包括协方差和结果的解读。本期只做Python代码分享。

阶段1：环境导入与数据录入，导入核心分析库，录入高血压降压药试验的三组原始数据，合并总数据并验证维度。# 导入核心库import numpy as npimport scipy.stats as statsimport statsmodels.api as smimport matplotlib.pyplot as pltimport pandas as pd# ===================== 数据录入 =====================# 组1：单用氢氯噻嗪 X=治疗前收缩压(mmHg)，Y=治疗后收缩压(mmHg)x1 = np.array([133,129,152,161,154,141,156,160,165,154])y1 = np.array([139,134,136,151,147,137,149,145,155,133])# 组2：氢氯噻嗪+卡托普利x2 = np.array([147,129,158,164,134,155,151,141,153,140])y2 = np.array([137,120,141,137,140,144,134,123,142,130])# 组3：氢氯噻嗪+硝苯地平x3 = np.array([163,168,170,160,196,160,188,188,158,167])y3 = np.array([132,143,153,145,154,138,157,149,133,128])k = 3  # 设定组数y = np.concatenate([y1, y2, y3])  # 合并总应变量x = np.concatenate([x1, x2, x3])  # 合并总协变量# 验证数据维度（确保录入正确）print(f"总数据量：X={len(x)}, Y={len(y)} | 组数：{k}")

阶段2：方差齐性检验，验证协方差分析的方差齐性前提，笔者换了下，这里与R版稍微有所区别# ===================== 方差齐性检验（Levene检验，鲁棒性更强） =====================levene_result = stats.levene(y1, y2, y3)print("\n=== 方差齐性检验（Levene 检验） ===")print(f"检验统计量: {levene_result.statistic:.4f}")print(f"P值: {levene_result.pvalue:.4f}")# 结果解读if levene_result.pvalue > 0.05:    print("结论：P>0.05，满足方差齐性，符合协方差分析前提")else:    print("结论：P≤0.05，方差不齐，需谨慎使用协方差分析")

阶段3：拟合各组原回归线+计算SS组内，拟合三组Y对X的原始回归线，获取预测值并计算组内残差平方和# ===================== 定义回归拟合函数（复用） =====================def fit_linear_reg(x_data, y_data):    """    拟合线性回归模型（适配statsmodels）    返回：回归模型、Y预测值    """    x_sm = sm.add_constant(x_data)  # 添加截距项（R的lm默认包含）    model = sm.OLS(y_data, x_sm).fit()  # 普通最小二乘回归    y_hat = model.predict(x_sm)  # 预测值    return model, y_hat# ===================== 拟合各组原回归线 =====================# 组1原回归线model1, y1_hat = fit_linear_reg(x1, y1)print("\n=== 组1原回归线结果（核心参数） ===")print(f"截距项: {model1.params[0]:.4f}, 回归系数: {model1.params[1]:.4f}")print(f"R²: {model1.rsquared:.4f}")# 组2原回归线model2, y2_hat = fit_linear_reg(x2, y2)print("\n=== 组2原回归线结果（核心参数） ===")print(f"截距项: {model2.params[0]:.4f}, 回归系数: {model2.params[1]:.4f}")print(f"R²: {model2.rsquared:.4f}")# 组3原回归线model3, y3_hat = fit_linear_reg(x3, y3)print("\n=== 组3原回归线结果（核心参数） ===")print(f"截距项: {model3.params[0]:.4f}, 回归系数: {model3.params[1]:.4f}")print(f"R²: {model3.rsquared:.4f}")# ===================== 计算SS组内 =====================ss_within = np.sum((y1 - y1_hat)**2) + np.sum((y2 - y2_hat)**2) + np.sum((y3 - y3_hat)**2)ss_within = round(ss_within, 2)  # 保留2位小数print(f"\nSS组内（组内残差平方和）: {ss_within}")

阶段 4：拟合总回归线+计算SS总，拟合合并数据的总回归线，获取总预测值并计算总残差平方和# ===================== 拟合总回归线 =====================model_total, y_hat = fit_linear_reg(x, y)print("\n=== 总回归线结果（核心参数） ===")print(f"截距项: {model_total.params[0]:.4f}, 回归系数: {model_total.params[1]:.4f}")print(f"R²: {model_total.rsquared:.4f}")# ===================== 计算SS总 =====================ss_total = np.sum((y - y_hat)**2)ss_total = round(ss_total, 2)print(f"\nSS总（总残差平方和）: {ss_total}")

阶段5：拟合平行回归线+计算核心指标，计算公共回归系数，拟合三组平行回归线，计算SS公共、SS回归系数# ===================== 定义离均差计算函数 =====================def calc_lxy_lxx(x_sub, y_sub):    """计算单组的离均差积和(lxy)、离均差平方和(lxx)"""    n = len(x_sub)    lxy = np.sum(x_sub*y_sub) - (np.sum(x_sub)*np.sum(y_sub))/n    lxx = np.sum(x_sub**2) - (np.sum(x_sub))**2/n    return lxy, lxx# ===================== 计算总lxy、总lxx =====================lxy1, lxx1 = calc_lxy_lxx(x1, y1)lxy2, lxx2 = calc_lxy_lxx(x2, y2)lxy3, lxx3 = calc_lxy_lxx(x3, y3)lxy_total = lxy1 + lxy2 + lxy3  # 总离均差积和lxx_total = lxx1 + lxx2 + lxx3  # 总离均差平方和# ===================== 公共回归系数（bc） =====================bc = lxy_total / lxx_totalbc = round(bc, 2)print(f"\n公共回归系数bc: {bc}")  # 与R结果一致（0.49）# ===================== 拟合三组平行回归线 =====================# 公式：y_pxhat = 组Y均值 - bc*组X均值 + bc*Xy1_pxhat = np.mean(y1) - bc*np.mean(x1) + bc*x1y2_pxhat = np.mean(y2) - bc*np.mean(x2) + bc*x2y3_pxhat = np.mean(y3) - bc*np.mean(x3) + bc*x3# ===================== 计算SS公共、SS回归系数 =====================ss_common = np.sum((y1 - y1_pxhat)**2) + np.sum((y2 - y2_pxhat)**2) + np.sum((y3 - y3_pxhat)**2)ss_common = round(ss_common, 2)ss_reg_coef = np.sum((y1_pxhat - y1_hat)**2) + np.sum((y2_pxhat - y2_hat)**2) + np.sum((y3_pxhat - y3_hat)**2)ss_reg_coef = round(ss_reg_coef, 2)print(f"SS公共: {ss_common}")print(f"SS回归系数: {ss_reg_coef}")

阶段7：回归线可视化，绘制散点图+三组原回归线+三组平行回归线+总回归线# ===================== 绘制回归线示意图（修正版） =====================plt.rcParams['font.sans-serif'] = ['SimHei']  # 解决中文显示问题plt.rcParams['axes.unicode_minus'] = False    # 解决负号显示问题# 基础散点图plt.figure(figsize=(10, 6))plt.scatter(x, y, color='black', s=30, label='原始数据')# 绘制平行回归线（红色）x1_sorted = np.sort(x1)y1_pxhat_sorted = np.mean(y1) - bc*np.mean(x1) + bc*x1_sortedplt.plot(x1_sorted, y1_pxhat_sorted, color='red', linewidth=2, label='平行回归线')x2_sorted = np.sort(x2)y2_pxhat_sorted = np.mean(y2) - bc*np.mean(x2) + bc*x2_sortedplt.plot(x2_sorted, y2_pxhat_sorted, color='red', linewidth=2)x3_sorted = np.sort(x3)y3_pxhat_sorted = np.mean(y3) - bc*np.mean(x3) + bc*x3_sortedplt.plot(x3_sorted, y3_pxhat_sorted, color='red', linewidth=2)# 绘制原回归线（蓝色）y1_hat_sorted = model1.params[0] + model1.params[1]*x1_sortedplt.plot(x1_sorted, y1_hat_sorted, color='blue', linewidth=2, label='原回归线')y2_hat_sorted = model2.params[0] + model2.params[1]*x2_sortedplt.plot(x2_sorted, y2_hat_sorted, color='blue', linewidth=2)y3_hat_sorted = model3.params[0] + model3.params[1]*x3_sortedplt.plot(x3_sorted, y3_hat_sorted, color='blue', linewidth=2)# 绘制总回归线（绿色）x_sorted = np.sort(x)y_hat_sorted = model_total.params[0] + model_total.params[1]*x_sortedplt.plot(x_sorted, y_hat_sorted, color='green', linewidth=2, label='总回归线')# 图表美化（修正图例位置）plt.xlabel('入组前的收缩压(mmHg)', fontsize=12)plt.ylabel('治疗后收缩压(mmHg)', fontsize=12)plt.title('回归线示意图', fontsize=14)plt.legend(loc='lower right', fontsize=10)  # 修正：bottommright → lower rightplt.grid(alpha=0.3)plt.show()

阶段 8：生成协方差分析结果# ===================== 整理基础指标 =====================# 平方和（SS）SS = [ss_total, ss_corrected_mean, ss_common, ss_reg_coef, ss_within]# 自由度（df）df_total = len(x) - 2df_corrected_mean = k - 1df_common = len(x) - k - 1df_reg_coef = k - 1df_within = len(x) - 2*kdf = [df_total, df_corrected_mean, df_common, df_reg_coef, df_within]# 均方（MS）ms_corrected_mean = ss_corrected_mean / df_corrected_meanms_common = round(ss_common / df_common, 2)ms_reg_coef = ss_reg_coef / df_reg_coefms_within = round(ss_within / df_within, 2)MS = ["", round(ms_corrected_mean, 2), ms_common, round(ms_reg_coef, 2), ms_within]# F值f_corrected_mean = round(ms_corrected_mean / (ss_common / df_common), 2)f_parallel = round(ms_reg_coef / (ss_within / df_within), 2)F = ["", f_corrected_mean, "", f_parallel, ""]# P值（F检验）- 修正参数名：df1→dfn，df2→dfdp_corrected_mean = 1 - stats.f.cdf(f_corrected_mean, dfn=df_corrected_mean, dfd=df_common)p_parallel = 1 - stats.f.cdf(f_parallel, dfn=df_reg_coef, dfd=df_within)P = ["", round(p_corrected_mean, 3), "", round(p_parallel, 3), ""]# ===================== 构建结果表 =====================result_df = pd.DataFrame({    "变异来源": ["总", "修正均数间", "公共", "回归系数", "组内"],    "SS": SS,    "df": df,    "MS": MS,    "F": F,    "P": P})print("\n=== 协方差分析结果表 ===")print(result_df)

阶段 9：修正均数多重比较（q检验）# ===================== 修正均数多重比较（q检验） =====================# 计算组间lxx（zjlxx）zjx = np.array([np.mean(x1), np.mean(x2), np.mean(x3)])zjlxx = np.sum(zjx**2) - (np.sum(zjx))**2 / len(zjx)# 重新计算组内lxxlxx1 = np.sum(x1**2) - (np.sum(x1))**2 / len(x1)lxx2 = np.sum(x2**2) - (np.sum(x2))**2 / len(x2)lxx3 = np.sum(x3**2) - (np.sum(x3))**2 / len(x3)lxx_total = lxx1 + lxx2 + lxx3# 计算q值ms_within_num = float(ms_within)  # 转换为数值型（避免字符串干扰）q1 = (xzy2 - xzy3) / np.sqrt(ms_within_num/10 * (1 + zjlxx/4127.2))  # 组2vs组3q2 = (xzy1 - xzy3) / np.sqrt(ms_within_num/10 * (1 + zjlxx/(2*4127.2)))  # 组1vs组3q3 = (xzy1 - xzy2) / np.sqrt(ms_within_num/10 * (1 + zjlxx/4127.2))  # 组1vs组2print("\n=== 修正均数多重比较q值 ===")print(f"组2 vs 组3: q = {round(q1, 4)}")print(f"组1 vs 组3: q = {round(q2, 4)}")print(f"组1 vs 组2: q = {round(q3, 4)}")# 结果解读print("\n结论：组1 vs 组3的q值最大（≈4.62），差异有统计学意义（P<0.05）")

好，本期的内容分享就到这儿！希望大家有所收获！如需要科研辅导、数据分析、选题等科研辅导或入群学习，也欢迎大家与我们交流！微信联系：Nursing_outlook

本文来自网友投稿或网络内容，如有侵犯您的权益请联系我们删除，联系邮箱：wyl860211@qq.com 。

基于Python|完全随机设计的协方差分析图解

最新文章

热门文章

随机文章

基于Python|完全随机设计的协方差分析图解

风控规则挖掘:二维交叉表、多维决策树、Python实操...

新书推荐 | Python网络数据挖掘

最新文章

热门文章

随机文章