import osimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn.decomposition import PCAfrom sklearn.preprocessingimport StandardScalerimport matplotlib as mpl
mpl.rcParams['font.family'] = 'Arial'mpl.rcParams['axes.linewidth'] = 1.2mpl.rcParams['xtick.major.width'] = 1.2mpl.rcParams['ytick.major.width'] = 1.2mpl.rcParams['xtick.labelsize'] = 10mpl.rcParams['ytick.labelsize'] = 10mpl.rcParams['axes.labelsize'] = 12mpl.rcParams['axes.titlesize'] = 14mpl.rcParams['pdf.fonttype'] = 42
input_file = r"E:\plots\Data.csv"output_dir = r"E:\DataAnalysis\PCA"os.makedirs(output_dir, exist_ok=True)print("正在读取数据并初始化分析...")# 读取数据 (假设前三列为 Treatment, Factor1, Factor2,后面全为指标)df = pd.read_csv(input_file)factors_df = df.iloc[:, :3]data_numeric = df.iloc[:, 3:]indicators = data_numeric.columns.tolist()# 数据标准化 (Z-score)scaler = StandardScaler()data_scaled = scaler.fit_transform(data_numeric)
pca = PCA()pca.fit(data_scaled)pc_scores = pca.transform(data_scaled)# 提取特征值和贡献率eigenvalues = pca.explained_variance_variance_ratio = pca.explained_variance_ratio_ * 100cumulative_variance = np.cumsum(variance_ratio)# 提取载荷矩阵loadings = pca.components_.T * np.sqrt(pca.explained_variance_)# 生成中间数据表df_variance = pd.DataFrame({'Principal Component': [f'PC{i + 1}' for i in range(len(eigenvalues))],'Eigenvalue': eigenvalues,'Variance Contribution (%)': variance_ratio,'Cumulative Variance (%)': cumulative_variance})df_loadings = pd.DataFrame(loadings, index=indicators, columns=[f'PC{i + 1}' for i in range(len(eigenvalues))])df_scores = pd.DataFrame(pc_scores, columns=[f'PC{i + 1}' for i in range(len(eigenvalues))])df_scores = pd.concat([factors_df, df_scores], axis=1)# 计算综合得分 (选取累计贡献率>80%的主成分,通常为前几个)n_components_80 = len(cumulative_variance[cumulative_variance <= 85]) + 1print(f"提取前 {n_components_80} 个主成分进行综合评价。")composite_score = np.zeros(len(df_scores))for i in range(n_components_80):composite_score += pc_scores[:, i] * (variance_ratio[i] / 100)df_scores['Composite_Score'] = composite_score# 判定最优处理组合 (计算各处理组合的平均得分并排序)optimal_combo = df_scores.groupby('Treatment')['Composite_Score'].mean().reset_index()optimal_combo = optimal_combo.sort_values(by='Composite_Score', ascending=False)best_treatment = optimal_combo.iloc[0]['Treatment']
excel_path = os.path.join(output_dir, "PCA_Comprehensive_Results.xlsx")with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:df_variance.to_excel(writer, sheet_name='1_Variance_Contribution', index=False)df_loadings.to_excel(writer, sheet_name='2_Loadings_Matrix')df_scores.to_excel(writer, sheet_name='3_PC_Scores_and_Composite', index=False)optimal_combo.to_excel(writer, sheet_name='4_Optimal_Treatment_Ranking', index=False)print(f"数据已导出至: {excel_path}")print(f"\n【自动判定结果】: 根据综合得分,最优处理组合为 ---> {best_treatment} <---")
# 图1: 载荷矩阵热图 (Loadings Heatmap)plt.figure(figsize=(10, 8))sns.heatmap(df_loadings.iloc[:, :n_components_80], annot=True, cmap='RdBu_r', center=0,fmt=".2f", cbar_kws={'label': 'Loading Value'}, linewidths=0.5)plt.title('PCA Loadings Heatmap', pad=15)plt.tight_layout()plt.savefig(os.path.join(output_dir, "1_Loadings_Heatmap.pdf"), dpi=300)plt.close()# 图2: 指标贡献排序图 (Feature Contribution to PC1)pc1_contrib = (df_loadings['PC1'] ** 2) / (df_loadings['PC1'] ** 2).sum() * 100pc1_contrib = pc1_contrib.sort_values(ascending=True)plt.figure(figsize=(8, 10))colors = plt.cm.viridis(pc1_contrib / pc1_contrib.max())pc1_contrib.plot(kind='barh', color=colors, edgecolor='black', linewidth=0.8)plt.xlabel('Contribution to PC1 (%)')plt.title('Variables Contribution to PC1')plt.tight_layout()plt.savefig(os.path.join(output_dir, "2_Indicator_Contribution.pdf"), dpi=300)plt.close()# 图3: PCA 散点图 (PC1 vs PC2)plt.figure(figsize=(8, 6))(NPG)colors_npg = ['#E64B35', '#4DBBD5', '#00A087', '#3C5488', '#F39B7F', '#8491B4', '#91D1C2', '#DC0000', '#7E6148','#B09C85']sns.scatterplot(x='PC1', y='PC2', hue='Treatment', style='Factor2', data=df_scores,palette=colors_npg[:df_scores['Treatment'].nunique()],s=100, edgecolor='black', linewidth=0.8, alpha=0.9)plt.axhline(y=0, color='k', linestyle='--', linewidth=0.8, alpha=0.5)plt.axvline(x=0, color='k', linestyle='--', linewidth=0.8, alpha=0.5)plt.xlabel(f'PC1 ({variance_ratio[0]:.1f}%)', weight='bold')plt.ylabel(f'PC2 ({variance_ratio[1]:.1f}%)', weight='bold')plt.title('PCA Score Plot', pad=15, weight='bold')plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', frameon=False)plt.tight_layout()plt.savefig(os.path.join(output_dir, "3_PCA_Score_Plot_SCI.pdf"), dpi=300)plt.savefig(os.path.join(output_dir, "3_PCA_Score_Plot_SCI.png"), dpi=300) # 同时保存PNG预览plt.close()print(f"所有分析图已成功保存至: {output_dir}")print(f"所有分析图已成功保存至: {output_dir}")


点点关注不迷路,若需要AI参考提示词可在评论区留言。如果内容对您有帮助的话,给个免费的“点赞、转发、推荐”三连吧。