
代码绘制成果展示










代码解释


第一部分

# =========================================================================================# ====================================== 1. 环境设置 =======================================# =========================================================================================import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom sklearn.ensemble import RandomForestRegressorfrom sklearn.model_selection import train_test_split, GridSearchCVfrom sklearn.metrics import r2_score, mean_squared_errorfrom sklearn.preprocessing import LabelEncoder

第二部分

# =========================================================================================# ======================================2.颜色库=======================================# =========================================================================================COLOR_SCHEMES = {1: {'cmap': 'Reds', 'line_text': 'navy'},}SCHEME_ID = 1#设置配色方案scheme = COLR_SCHEMES.get(SCHEME_ID, COLOR_SCHEMES[1]) #获取配色

第三部分

# =========================================================================================# ======================================3.绘图函数=======================================# =========================================================================================def plot_model_performance(r2_list, rmse_list, std_list):cmap_name = scheme['cmap'] #获取颜色映射名称line_text_color = scheme['line_text'] #获取线条和文字颜色r2_mean = np.mean(r2_list) #R2平均值r2_std_dev = np.std(r2_list, ddof=1) #R2标准差rmse_mean = np.mean(rmse_list) #RMSE平均值rmse_std_dev = np.std(rmse_list, ddof=1) #RMSE标准差std_mean = np.mean(std_list) #残差标准差的平均值std_std_dev = np.std(std_list, ddof=1) #残差标准差的标准差

第四部分

#创建画布fig, ax = plt.subplots(figsize=(8, 6))#绘制散点图sc = ax.scatter(r2_list, #x轴rmse_list, #y轴c=std_list, #颜色依据cmap=cmap_name, #颜色条edgecolors='k', #散点边缘的颜色s=80, #散点的大小alpha=1, #透明度zorder=2)cbar = plt.colorbar(sc, ax=ax) #添加颜色条cbar.set_label('STD', fontsize=12) #颜色条标签

第五部分

#R2均值虚线ax.vlines(r2_mean, # xymin=min(rmse_list) - 20, #起点ymax=max(rmse_list) + 20, #终点colors=line_text_color, #颜色linestyles='--', #样式label='_nolegend_', #图例标签zorder=1) #图层顺序#RMSE均值虚线ax.hlines(rmse_mean, # yxmin=min(r2_list) - 0.05, #起点xmax=max(r2_list) + 0.05, #终点colors=line_text_color, #颜色linestyles='--', #样式label='_nolegend_', #图例标签zorder=1) #图层顺序

第六部分

ax.set_xlabel('$R^2$', fontsize=14) #x轴标题ax.set_ylabel('RMSE', fontsize=14) #y轴标题#图框中显示的统计文本内容legend_text = (f'$R^2$ = {r2_mean:.2f} $\pm$ {r2_std_dev:.2f}\n'f'RMSE = {rmse_mean:.2f} $\pm$ {rmse_std_dev:.2f}\n'f'STD = {std_mean:.2f} $\pm$ {std_std_dev:.2f}')#文本框样式属性props = dict(boxstyle='round', #边框样式,圆角矩形facecolor='white', #填充颜色alpha=0.9, #透明度edgecolor='lightgrey') #边缘颜色#添加统计信息文本框ax.text(0.05, #x0.05, #ylegend_text, #文本内容transform=ax.transAxes, #坐标变换fontsize=11, #字体大小verticalalignment='bottom', #垂直对齐bbox=props) #边框样式

第七部分

# =========================================================================================# ======================================4.执行部分=======================================# =========================================================================================if __name__ == "__main__":df = pd.read_excel(r'data.xlsx' ) # 读取Excel数据文件le = LabelEncoder() #实例化标签编码器cat_features = ['PM', 'AAT', 'NDT'] #定义需要进行编码的类别特征列名existing_cols = [col for col in cat_features if col in df.columns] #检查数据集中实际存在的类别列df_processed = df.copy() #复制原始数据for col in existing_cols:df_processed[col] = le.fit_transform(df_processed[col]) #对类别特征进行数字化编码X = df_processed.drop(columns=['SSA']) #特征变量y = df_processed['SSA'] #目标变量

第八部分

r2_list = [] # 初始化R2结果列表rmse_list = [] # 初始化RMSE结果列表std_list = [] # 初始化STD结果列表#参数网格param_grid = {'n_estimators': [10, 20, 30],'max_depth': [3, 4]}print(f"开始运行 {n_iterations} 次随机测试")start_time = time.time() #开始时间# 建立网格搜索交叉验证器grid_search = GridSearchCV(estimator=rf,param_grid=param_grid,cv=5,scoring='neg_root_mean_squared_error',n_jobs=-1,verbose=0)#执行网格搜索grid_search.fit(X_train, y_train)#获取最佳模型best_model = grid_search.best_estimator_y_pred = best_model.predict(X_test) #在测试集上进行预测r2 = r2_score(y_test, y_pred) # R2rmse = np.sqrt(mean_squared_error(y_test, y_pred)) #RMSEresiduals = y_test - y_pred #预测残差std_val = np.std(residuals, ddof=1) #残差的标准差r2_list.append(r2) #将R2结果存入列表rmse_list.append(rmse) #将RMSE结果存入列表std_list.append(std_val) #将STD结果存入列表

第九部分

#每10次循环打印一次进度if (seed + 1) % 10 == 0:elapsed = time.time() - start_time #计算已耗时print(f"进度: {seed + 1}/{n_iterations} | 耗时: {elapsed:.1f}s | "f"当前最佳参数: {grid_search.best_params_}")total_time = time.time() - start_time #计算总运行耗时print(f"总耗时: {total_time:.1f} 秒")#调用绘图函数plot_model_performance(r2_list, rmse_list, std_list)

如何应用到你自己的数据

1.设置配色:
SCHEME_ID = 1#设置配色方案2.设置绘图结果的保存地址:
plt.savefig(fr'scheme_{SCHEME_ID}_optimized.png', dpi=300, bbox_inches='tight')3.设置原始数据的保存路径:
df = pd.read_excel(r'data.xlsx' ) # 读取Excel数据文件4.定义原始数据中的文本特征数据:
cat_features = ['PM', 'AAT', 'NDT'] #定义需要进行编码的类别特征列名5.设置超参数:
param_grid = {'n_estimators': [10, 20, 30],'max_depth': [3, 4]}
6.设置试验次数:
n_iterations = 100 #设置随机试验的迭代次数
推荐


获取方式
