
代码绘制成果展示












代码解释


第一部分

# =========================================================================================# ====================================== 1. 环境设置 =======================================# =========================================================================================import osimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltfrom sklearn.model_selection import train_test_split, GridSearchCV

第二部分

# =========================================================================================# ======================================2.颜色库=======================================# =========================================================================================COLOR_SCHEMES = {1: ('#B63232', '#2A72B5', '#E88B46'),}

第三部分

# =========================================================================================# ======================================3.单个模型环形SHAP图绘制函数=======================================# =========================================================================================def draw_model_circular_shap(ax, model_name, shap_vals, feature_names, color_pos, color_neg, color_mean, letter):n_features = len(feature_names) #输入特征总数量angles = np.linspace(0, 2 * np.pi, n_features, endpoint=False) #生成等间距的极坐标角度angles_closed = np.append(angles, angles[0]) #将第一个角度值添加到数组末尾,闭合mean_abs_shap = np.mean(np.abs(shap_vals), axis=0) #计算各特征在所有样本上的SHAP值绝对值的平均值mean_pos_shap = np.zeros(n_features) #各特征正向SHAP均值mean_neg_shap = np.zeros(n_features) #各特征负向SHAP均值mean_pos_shap[i] = np.mean(pos_vals) if len(pos_vals) > 0 else 0mean_neg_shap[i] = np.mean(neg_vals) if len(neg_vals) > 0 else 0#将均值数组闭合,首尾相接mean_abs_shap_closed = np.append(mean_abs_shap, mean_abs_shap[0])mean_pos_shap_closed = np.append(mean_pos_shap, mean_pos_shap[0])mean_neg_shap_closed = np.append(mean_neg_shap, mean_neg_shap[0])

第四部分

real_min_val = np.min(shap_vals) #SHAP值全局最小值real_max_val = np.max(shap_vals) #SHAP值全局最大值val_span = real_max_val - real_min_val #计算极差ax.set_theta_offset(np.pi / 2) #起始角度ax.set_theta_direction(-1) #顺时针ax.set_ylim(r_min, r_outer) #径向范围ax.grid(False) #去掉默认网格线ax.spines['polar'].set_visible(False) #去掉边框ax.set_yticks([]) #去掉径向刻度标签

第五部分

theta_full = np.linspace(0, 2 * np.pi, 300) #生成均匀角度用于绘制平滑的参考圆圈grid_radii = np.linspace(r_inner, real_max_val + val_span * 0.05, 5) #背景虚线圈半径#遍历半径for r in grid_radii:#绘制背景虚线圈ax.plot(theta_full, #角度np.full_like(theta_full, r), #半径color='gray', #颜色linestyle='--', # 设置线条样式为虚线linewidth=1, # 设置线条宽度为0.8alpha=0.5, # 设置透明度为0.5(半透明)zorder=1) # 设置绘图层级置底#中心圆的外框ax.plot(theta_full, #角度np.full_like(theta_full, r_inner), #半径color='black', #颜色linestyle='-', #样式linewidth=1.5, #线宽zorder=2) #层#SHAP=0的分界圆ax.plot(theta_full, #角度np.full_like(theta_full, 0), #半径color='#404040', #颜色linestyle='-', #样式linewidth=2.5, #线宽zorder=2) #层

第六部分

#遍历特征绘制散点图for i inrange(n_features):angle = angles[i] #角度vals = shap_vals[:, i] #获取该特征的所有样本SHAP值low_bound, high_bound = np.percentile(vals, [2.5, 97.5]) #计算95%置信区间的上下限mask_95 = (vals >= low_bound) & (vals <= high_bound) #剔除前2.5%和后2.5%的极端异常值vals_filtered = vals[mask_95] #剔除后数据#绘制正SHAP值ax.scatter(angle + jitter_vals_filtered[pos_mask], #角度+偏移vals_filtered[pos_mask], #半径color=color_pos, #填充颜色s=18, #大小alpha=0.65, #透明度zorder=3, #层edgecolors='none') #不绘制边#绘制负SHAP值ax.scatter(angle + jitter_vals_filtered[neg_mask], #角度+偏移vals_filtered[neg_mask], #半径color=color_neg, #填充颜色s=18, #大小alpha=0.65, #透明度zorder=3, #图edgecolors='none') #不绘制边

第七部分

#SHAP绝对值均值闭合线ax.plot(angles_closed, #角度mean_abs_shap_closed, #半径color=color_mean, #颜色linewidth=2, #线zorder=5) #层#均值曲线上对应特征的角点标记ax.scatter(angles, #角度mean_abs_shap, #半径color=color_mean, #颜色s=10, #大小zorder=5) #层#SHAP正向均值的闭合线ax.plot(angles_closed, #角度mean_pos_shap_closed, #半径color=color_pos, #颜色linewidth=1.0, #线宽zorder=5) #层#SHAP负向均值的闭合线ax.plot(angles_closed,#角度mean_neg_shap_closed, #半径color=color_neg, #颜色linewidth=1.0, #线宽zorder=4) #层

第八部分

axis_angle = np.deg2rad(-20) #用于绘制数值刻度轴的角度#数值刻度轴ax.plot([axis_angle, axis_angle], #角度[r_inner, real_max_val + val_span * 0.05], #半径范围color='black', #颜色linewidth=1.0, #线宽zorder=5) #层all_ticks = np.linspace(real_min_val, real_max_val, 7) #生成均匀分布的刻度点d_theta = 0.04 #刻度线长ax.text(0, #xr_min, #yf"{model_name}\n95% CI", #文本ha='center', #水平va='center', #垂直fontsize=14, #字体大小fontweight='bold', #加粗zorder=6) #层#子图编号ax.text(-0.05, #x1.15, #Yletter, #编号transform=ax.transAxes, #坐标系fontsize=32, #大小fontweight='bold', #加粗va='top', #垂直定ha='right') #水平

第九部分

# =========================================================================================# ======================================4.主绘图函数=======================================# =========================================================================================def plot_circular_shap_charts(shap_data_dict, top_features_dict, scheme_id=1):fig, axes = plt.subplots(1, #行3, #列figsize=(25, 8), #尺寸subplot_kw=dict(polar=True)) #极坐标#调用绘图函数绘制组合图draw_model_circular_shap(axes[idx], #画布model_name, #模型top_shap_vals, #SHAPtop_features, #特征color_pos, #正颜色color_neg, #负颜色color_mean, #均值颜色letters[idx]) #子图编号#创建子图画布fig_single, ax_single = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))#绘制子图draw_model_circular_shap(ax_single, model_name, top_shap_vals, top_features,color_pos, color_neg, color_mean, letters[idx])# 创建用于独立图表图例说明的定制线条对象(此处为空线条)mean_line_s = mlines.Line2D([], #占位[],#占位color=color_mean, #颜色marker='o', #圆形markersize=20, #大小label='SHAP mean', #图例文本linestyle='None') #不绘制线# 创建用于独立图表正向值的图例空对象pos_dot_s = mlines.Line2D([], #占位[],#占位color=color_pos, #颜色marker='o', #圆形markersize=20, #大小label='SHAP positive', #图例文本linestyle='None') #不绘制线single_save_dir = os.path.join(base_path, model_name)os.makedirs(single_save_dir, exist_ok=True)#保存fig_single.savefig(os.path.join(single_save_dir, f'shap_circular_{model_name}_scheme_{scheme_id}.png'), dpi=300,bbox_inches='tight')fig_single.savefig(os.path.join(single_save_dir, f'shap_circular_{model_name}_scheme_{scheme_id}.pdf'),bbox_inches='tight')plt.close(fig_single) #关闭#创建组合图图例#均值mean_line = mlines.Line2D([], #占位[],#占位color=color_mean, #颜色marker='o', #圆形markersize=26, # 大小label='SHAP mean', #文本

第十部分

# =========================================================================================# ======================================5.执行部分=======================================# =========================================================================================if __name__ == '__main__':excel_filename = r'\simulated_microplastic_data.xlsx' #模型数据集df_main = pd.read_excel(excel_filename) #读取feature_names = df_main.drop(columns=['Target_Class']).columns.tolist() #特征名X_main = df_main.drop(columns=['Target_Class']).values #特征数据y_main = df_main['Target_Class'].values #目标数据#划分训练集与测试集best_models = {} #存放最佳模型model_names = ['Random Forest', 'XGBoost', 'CatBoost'] #模型名称#超参数网格rf_param = {'n_estimators': [50, 100],'max_depth': [5, 10, None]}#配置网格搜索rf_grid = GridSearchCV(RandomForestClassifier(random_state=42, class_weight='balanced'), rf_param, cv=3, n_jobs=-1)rf_grid.fit(X_train, y_train) #拟合best_models['Random Forest'] = rf_grid.best_estimator_ #最佳RF模型#超参数网格xgb_param = {'n_estimators': [50, 100],'max_depth': [3, 5],'learning_rate': [0.1, 0.2]}#配置网格搜索xgb_grid = GridSearchCV(XGBClassifier(random_state=42, eval_metric='mlogloss'), xgb_param, cv=3, n_jobs=-1)xgb_grid.fit(X_train, y_train) #拟合best_models['XGBoost'] = xgb_grid.best_estimator_ #最佳XGBoost模型#超参数网格cb_param = {'iterations': [50, 100],'depth': [4, 6],'learning_rate': [0.1]}#配置网格搜索cb_grid = GridSearchCV(CatBoostClassifier(random_state=42, verbose=0, auto_class_weights='Balanced'), cb_param,cv=3, n_jobs=-1)cb_grid.fit(X_train, y_train) #拟合best_models['CatBoost'] = cb_grid.best_estimator_ #最佳CatBoost模型shap_data_dict = {} #存放SHAP结果top_features_dict = {} #按各模型存放筛选后头部特征k_features = 12 #要呈现的上限,topplot_all = Trueif plot_all:for i in COLOR_SCHEMES.keys():plot_circular_shap_charts(shap_data_dict, top_features_dict, scheme_id=i)else:target_scheme = 1plot_circular_shap_charts(shap_data_dict, top_features_dict, scheme_id=target_scheme)

如何应用到你自己的数据

1.设置是一次绘制一张图还是一次性绘制出所有配色的图,执行部分:
plot_all = True2.设置模型数据集路径,执行部分:
excel_filename = r'data.xlsx' #模型数据集3.设置独立验证数据集路径,执行部分:
ind_excel_filename = r'independent_validation_data.xlsx'4.设置目标数据,执行部分:
y_main = df_main['Target_Class'].values #目标数据5.设置特征数据,执行部分:
X_main = df_main.drop(columns=['Target_Class']).values #特征数据6.设置超参数网格,执行部分:
rf_param = {'n_estimators': [50, 100],'max_depth': [5, 10, None]}
7.设置要绘制的特征,执行部分:
k_features = 12 #要呈现的上限,top8.设置保存路径,主绘图函数部分:
base_path = r''
推荐


获取方式
