import pandas as pdimport statsmodels.formula.api as smfimport matplotlib.pyplot as pltfrom pydataset import data# 1. 获取数据# 对应 R 中的 data(women) 和 womenwomen = data('women')# 2. 拟合线性模型# 对应 fit <- lm(weight ~ height, data=women)fit = smf.ols('weight ~ height', data=women).fit()# 3. 查看模型摘要# 对应 summary(fit)print("--- Model Summary ---")print(fit.summary())# 4. 查看实际权重# 对应 women$weightprint("\n--- Actual Weights ---")print(women['weight'].values)# 5. 查看拟合值# 对应 fitted(fit)print("\n--- Fitted Values ---")print(fit.fittedvalues.values)# 6. 查看残差# 对应 residuals(fit)print("\n--- Residuals ---")print(fit.resid.values)# 7. 绘图# 对应 plot(...) + abline(fit)plt.figure(figsize=(8, 6))# 绘制散点图plt.scatter(women['height'], women['weight'], color='black', label='Data')# 绘制回归线plt.plot(women['height'], fit.fittedvalues, color='red', linewidth=2, label='Fitted line')# 设置标签plt.xlabel("Height (in inches)")plt.ylabel("Weight (in pounds)")plt.title("Regression: Weight vs Height")plt.legend()# 显示图像plt.show()