
代码绘制成果展示









代码解释


第一部分

# =========================================================================================# ====================================== 1. 环境设置 =======================================# =========================================================================================import pandas as pdimport numpy as npimport xgboostimport shapimport matplotlib.pyplot as pltimport matplotlib.colors as mcolorsimport matplotlib.ticker as tickerfrom matplotlib.cm import ScalarMappablefrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom shap.plots import beeswarmfrom sklearn.model_selection import GridSearchCV

第二部分

# =========================================================================================# ======================================2.shap图颜色库=======================================# =========================================================================================COLOR_SCHEMES = {1: ["blue", "#4B0082", "red"],}# 设置当前使用的颜色方案SCHEME_ID = 29

第三部分

# =========================================================================================# ====================================== 3.网络图部分颜色库 ===========================# =========================================================================================NETWORK_COLOR_SCHEMES = {1: {'node_path': '#D689C5','node_rad': '#92C2DD','edge_pos': '#8E44AD','edge_neg': '#3498DB'},}# 设置当前网络图配色SCHEME_ID2 = 29

第四部分

# =========================================================================================# ======================================4.特征重要性条形图和径向图/玫瑰图绘制函数=======================================# =========================================================================================def draw_bar_and_radial(sorted_features, sorted_shap_values, bar_colors, cmap, color_norm):fig = plt.figure(figsize=(16, 15)) # 创建画布# 画布边距left_margin, right_margin, bottom_margin, top_margin = 0.08, 0.08, 0.12, 0.12# 颜色条宽度colorbar_width = 0.02# 计算绘图区域的底部位置和高度plot_bottom = bottom_marginplot_height = 1.0 - bottom_margin - top_margin# 颜色条的左侧位置cbar_left = left_margin# 条形图的左侧位置main_ax_left = cbar_left + colorbar_width + 0.04# 条形图的宽度main_ax_width = 1.0 - main_ax_left - right_margin# 添加颜色条的坐标轴ax_cbar = fig.add_axes([cbar_left, plot_bottom, colorbar_width, plot_height])ax_cbar.text(0.5,-0.01,'Low''', transform=ax_cbar.transAxes,ha='center',va='top',fontsize=24)# 去掉颜色条边框cbar.outline.set_visible(False)# 颜色条标题ax_cbar.text(-1.4,0.5,'Contribution for CEs ($10^4$ t)',transform=ax_cbar.transAxes,fontsize=24,rotation=90,va='center')

第五部分

ax_bar.xaxis.tick_bottom() # 条形图x轴刻度位置ax_bar.xaxis.set_label_position("bottom") # 设置x轴标签位置# 反转x轴方向ax_bar.invert_xaxis()# 绘制水平条形图ax_bar.barh(y=range(len(sorted_features)), # Y坐标width=sorted_shap_values, # 水平条形宽度color=bar_colors, # 条形颜色height=0.6) # 条形高度# 反转y轴方向,使最重要的特征排在顶部ax_bar.invert_yaxis()# 设置x轴标签ax_bar.set_xlabel('Contribution for CEs ($10^4$ t)', size=24, labelpad=20)# 移除y轴刻度ax_bar.set_yticks([])# 去掉左侧和顶部边框ax_bar.spines[['left', 'top']].set_visible(False)# 设置右侧边框位置ax_bar.spines['right'].set_position(('data', 0))# 显示边框ax_bar.spines['right'].set_visible(True)ax_bar.spines['bottom'].set_visible(True)direction='in',length=4)# 标签的x轴偏移量label_x_padding = 0.005# 遍历特征,在条形图旁边添加特征名称文本for i, feature in enumerate(sorted_features):ax_bar.text(label_x_padding,i,feature,ha='right',va='center',color='black',fontsize=24)# 子图标签ax_bar.text(0.02,0.98,'(a)',transform=ax_bar.transAxes,fontsize=30,weight='bold',ha='left',va='top')

第六部分

inset_left = main_ax_left - 0.15 # 径向图/玫瑰图的左侧位置inset_bottom = plot_bottom - 0.05 # 径向图/玫瑰图的底部位置inset_size = min(main_ax_width, plot_height) * 0.85 # 径向图/玫瑰图的大小# 定义径向图/玫瑰图的矩形区域inset_ax_rect = [inset_left, inset_bottom, inset_size, inset_size]# 添加坐标轴作为径向图/玫瑰图ax_radial_inset = fig.add_axes(inset_ax_rect, projection='polar')# 背景透明ax_radial_inset.patch.set_alpha(0)ng_width = 3.0, 0.5, 2.0# 计算每个扇形的总长度/半径total_lengths = [base_length + i * fixed_increment for i in range(num_vars)]# 计算内部灰色部分的长度inner_heights = [max(0, tl - colored_ring_width) for tl in total_lengths]# 定义内部颜色列表inner_colors = ['#EAEAEA', '#FFFFFF'] * (num_vars // 2 + 1)# 截取对应数量的颜色inner_colors = inner_colors[:num_vars]# 起始角度偏移量one_oclock_offset = np.pi / 21# 每个扇形的起始角度thetas = np.cumsum([0] + widths[:-1].tolist()) - one_oclock_offset# 绘制内部灰色扇形ax_radial_inset.bar(x=thetas, # 条形的起始角度位置height=inner_heights, # 内部灰色部分的长度width=widths, # 指定每个条形的角宽度color=inner_colors, # 条形的填充颜色align='edge', # 对齐方式为边缘对齐edgecolor='white', # 条形边框的颜色为白色linewidth=1.5) # 条形边框线的宽度va='center',fontsize=18)ax_radial_inset.set_yticklabels([]) # 移除径向图的y轴标签ax_radial_inset.set_xticklabels([]) # 移除径向图的x轴标签# 隐藏极坐标轴的脊柱ax_radial_inset.spines['polar'].set_visible(False)# 关闭网格ax_radial_inset.grid(False)ax_radial_inset.set_theta_zero_location('N') # 正北方向ax_radial_inset.set_theta_direction(-1) # 顺时针ax_radial_inset.set_ylim(0, max(total_lengths) + 2) # 半径范围

第七部分

# =========================================================================================# ======================================5.SHAP蜂巢图函数=======================================# =========================================================================================def draw_native_beeswarm(shap_values, X, cmap):plt.figure(figsize=(16, 15)) # 创建画布# 绘制蜂巢图shap.summary_plot(shap_values, # SHAP值数据X, # 对应的特征矩阵数据plot_type="dot", # 蜂巢图show=False, # 不立即显示cmap=cmap) # 颜色映射# 获取当前坐标轴ax = plt.gca()# 设置x轴标签ax.set_xlabel("SHAP Value (impact on model output)", fontsize=18)# y轴刻度标签ax.tick_params(axis='y', labelsize=16)# x轴刻度标签ax.tick_params(axis='x', labelsize=14)# 如果存在多个坐标轴if len(plt.gcf().axes) > 1:cbar_ax = plt.gcf().axes[-1] # 获取颜色条坐标轴cbar_ax.set_ylabel('Feature Value', size=16, rotation=-90, labelpad=20) # 设置颜色条标签cbar_ax.tick_params(labelsize=14) # 设置颜色条刻度标签大小# 调整布局plt.tight_layout()

第八部分

# =========================================================================================# ======================================6.无Y轴标签的SHAP蜂巢图的函数=======================================# =========================================================================================def draw_beeswarm_no_labels(shap_values, X, cmap):# 创建画布plt.figure(figsize=(16, 15))# 绘制蜂巢图shap.summary_plot(shap_values,X,plot_type="dot",show=False,cmap=cmap)# 获取当前坐标轴ax_third_plot = plt.gca()# 移除y轴刻度标签(特征名)ax_third_plot.set_yticklabels([])# x轴标题ax_third_plot.set_xlabel("SHAP Value (impact on model output)", fontsize=18)# x轴刻度标签ax_third_plot.tick_params(axis='x', labelsize=14)# 处理颜色条(如果存在)if len(plt.gcf().axes) > 1:cbar_ax_third = plt.gcf().axes[-1] # 获取当前图形对象列表中的最后一个坐标轴cbar_ax_third.set_ylabel('Feature Value', # Y轴名size=16, # 字体大小rotation=-90, # 旋转labelpad=20) # 文本与坐标轴之间的距离cbar_ax_third.tick_params(labelsize=14) # 字体大小# 调整布局plt.tight_layout()

第九部分

# =========================================================================================# ======================================7.特征重要性条形图+蜂巢图+玫瑰图组合图绘制函数=======================================# =========================================================================================def draw_combined_plot(sorted_features, sorted_shap_values, shap_values, bar_colors, cmap, color_norm):# 创建画布fig_combined = plt.figure(figsize=(34, 25))# 定义边距和间距参数left_margin, right_margin, bottom_margin, top_margin = 0.05, 0.05, 0.02, 0.1space_between = 0.01 # 左右子图之间的间距plot_bottom = bottom_margin # 绘图区域的底部plot_height = 1 - bottom_margin - top_margin # 绘图区域的高度total_plot_width = 1 - left_margin - right_margin - space_between # 宽度# 分配左侧图和右侧图的宽度比例left_plot_width = total_plot_width * 0.6right_plot_width = total_plot_width * 0.4cbar_left = 0.1 # 颜色条位置colorbar_width = 0.01 # 颜色条宽度# 颜色条坐标轴ax_cbar_new = fig_combined.add_axes([cbar_left, plot_bottom, colorbar_width, plot_height])# 创建ScalarMappable对象,用于颜色映射sm = ScalarMappable(cmap=cmap, norm=color_norm)# 绘制颜色条cbar = fig_combined.colorbar(sm,cax=ax_cbar_new,orientation='vertical')# 设置标签cbar.set_label('', size=18, labelpad=5)# 移除刻度cbar.set_ticks([])# 设置刻度位置cbar.ax.yaxis.set_ticks_position('left')# 上文本ax_cbar_new.text(0.5,1.01, # y坐标位置'High', # 文本内容transform=ax_cbar_new.transAxes, # 使用相对坐标ha='center', # 水平居中va='bottom', # 垂直底部对齐fontsize=30) # 字体大小# 左侧条形图的位置main_ax_left = cbar_left + colorbar_width + 0.05# 添加条形图坐标轴ax_bar_new = fig_combined.add_axes([main_ax_left, # 左plot_bottom, # 下left_plot_width, # 宽度plot_height]) # 高度# x轴刻度在底部ax_bar_new.xaxis.tick_bottom()# 设置x轴标签ax_bar_new.xaxis.set_label_position("bottom")# 反转x轴ax_bar_new.invert_xaxis()# 绘制水平条形ax_bar_new.barh(y=range(len(sorted_features)), # 数据width=sorted_shap_values, # 条形宽度color=bar_colors, # 颜色height=0.6) # 条形高度# 反转y轴ax_bar_new.invert_yaxis()# 设置x轴标题ax_bar_new.set_xlabel('Contribution for CEs ($10^4$ t)', size=30, labelpad=20)# 移除y轴刻度ax_bar_new.set_yticks([])# 去掉边框ax_bar_new.spines[['left', 'top']].set_visible(False)# 设置右侧边框位置ax_bar_new.spines['right'].set_position(('data', 0))# 显示边框ax_bar_new.spines['right'].set_visible(True)ax_bar.spines['bottom'].set_visible(True)# 主刻度样式ax_bar_new.tick_params(axis='x', # 轴which='major', # 主刻度direction='in', # 朝内labelsize=30, # 标签大小length=6, # 刻度长度pad=8) # 刻度间距# 图标签ax_bar_new.text(-0.02, # x坐标0.98, # y坐标'(a)', # 文本内容transform=ax_bar_new.transAxes, # 使用相对坐标fontsize=80, # 字体大小weight='bold', # 字体加粗ha='left', # 水平左对齐va='top') # 垂直顶部对齐

第十部分

num_vars = len(sorted_features) # 特征数量# 百分比percentages = (sorted_shap_values / sorted_shap_values.sum()) * 100# 每个扇形的宽度widths = (sorted_shap_values / sorted_shap_values.sum()) * 2 * np.pi# 设置基础长度、增量和彩色环宽度base_length, fixed_increment, colored_ring_width = 3.0, 0.5, 2.0# 每个扇形的总长度total_lengths = [base_length + i * fixed_increment for i in range(num_vars)]# 累积角度thetas = np.cumsum([0] + widths[:-1].tolist()) - one_oclock_offsetinset_size = min(left_plot_width, plot_height) * 1.3 # 计算大小# 左边距inset_left = main_ax_left - 0.2# 底边距inset_bottom = plot_bottom - 0.1# 定义插图矩形区域inset_ax_rect = [inset_left, inset_bottom, inset_size, inset_size]# 添加径向极坐标轴ax_radial_inset_new = fig_combined.add_axes(inset_ax_rect, projection='polar')# 背景透明ax_radial_inset_new.patch.set_alpha(0)# 绘制内部背景条ax_radial_inset_new.bar(x=thetas, # 角度height=inner_heights, # 高度width=widths, # 宽度color=inner_colors, # 颜色align='edge', # 对齐方式edgecolor='white', # 边缘颜色linewidth=1.5) # 线宽# 绘制外部彩色条ax_radial_inset_new.bar(x=thetas, # 角度height=[colored_ring_width] * num_vars, # 高度width=widths, # 宽度bottom=inner_heights, # 底部起始位置color=bar_colors, # 颜色align='edge', # 对齐方式edgecolor='white', # 边缘颜色linewidth=1.5) # 线宽# 移除y轴标签ax_radial_inset_new.set_yticklabels([])# 移除x轴标签ax_radial_inset_new.set_xticklabels([])# 隐藏极坐标脊柱ax_radial_inset_new.spines['polar'].set_visible(False)# 隐藏网格ax_radial_inset_new.grid(False)ax_radial_inset_new.set_theta_zero_location('N') # 正北ax_radial_inset_new.set_theta_direction(-1) # 顺时针ax_radial_inset_new.set_ylim(0, max(total_lengths) + 2) # 半径范围

第十一部分

shap.plots.beeswarm,将图形绘制在指定的坐标轴上。手动增大了散点的大小。移除了Y轴标签,并添加了X轴标签。添加子图编号,并调整了蜂巢图自带的颜色条的标签和旋转角度。将这张包组合图保存到指定文件夹。# 右侧蜂巢图位置right_plot_left = main_ax_left + left_plot_width + space_between# 添加蜂巢图坐标轴ax_beeswarm = fig_combined.add_axes([right_plot_left, plot_bottom, right_plot_width, plot_height])# 绘制蜂巢图beeswarm(shap_values, # 数据max_display=len(sorted_features), # 最大显示特征数ax=ax_beeswarm, # 指定坐标轴show=False, # 不立即显示color=cmap, # 颜色映射plot_size=None) # 不自动调整大小# 遍历坐标轴上的所有散点for collection in ax_beeswarm.collections:# 蜂巢图颜色条if len(fig_combined.axes) > 3:# 获取颜色条坐标轴cbar_ax_right = fig_combined.axes[-1]# 设置右侧颜色条标签cbar_ax_right.set_ylabel('Feature Value', # 内容size=30, # 字体大小rotation=270, # 旋转labelpad=5) # 标签间距# 刻度标签大小cbar_ax_right.tick_params(labelsize=30)

第十二部分

# =========================================================================================# ====================================== 8.网络相关性图绘制函数 ===========================# =========================================================================================def draw_right_part_network(data_df, sorted_features, corr_method='spearman'):# 创建画布和坐标轴fig, ax = plt.subplots(figsize=(15, 15))# 获取配色方案net_colors = NETWORK_COLOR_SCHEMES[SCHEME_ID2]# 筛选出特征数据selected_data = data_df[sorted_features]# 计算筛选数据的相关性corr_matrix = selected_data.corr(method=corr_method)# 获取矩阵上三角部分的索引(k=1表示不包含对角线本身)upper_tri_indices = np.triu_indices_from(corr_matrix, k=1)# 根据索引提取所有上三角的相关系数值all_corrs = corr_matrix.values[upper_tri_indices]# 计算这些相关系数的绝对值all_abs_corrs = np.abs(all_corrs)# 最大值max_abs = np.max(all_abs_corrs)# 最小值min_abs = np.min(all_abs_corrs)# 如果最大值等于最小值if max_abs == min_abs:max_abs += 1e-9# 给最大值加上一个微小的数值print(f"相关性绝对值范围: Min={min_abs:.4f}, Max={max_abs:.4f}")# 设置连线宽度的最小值和最大值LW_MIN, LW_MAX = 0.5, 7# 设置绘制连线的相关性阈值threshold = 0.0# 获取特征变量的数量num_vars = len(sorted_features)x_coords = radius * np.cos(angles)# 计算节点的Y坐标y_coords = radius * np.sin(angles)# 初始化节点颜色列表node_colors = []# 遍历每一个特征,设置分组特征for feat in sorted_features:if str(feat).startswith('P') or 'Texture' in str(feat):node_colors.append(net_colors['node_path'])else:node_colors.append(net_colors['node_rad'])orr > 0 else net_colors['edge_neg']# 如果相关系数大于0使用实线,否则使用虚线style = '-' if corr > 0 else '--'# 对相关系数绝对值进行归一化处理norm_score = (abs_corr - min_abs) / (max_abs - min_abs)# 根据归一化分数计算线宽width = LW_MIN + norm_score * (LW_MAX - LW_MIN)# 根据归一化分数计算透明度alpha = 0.2 + norm_score * 0.7# 绘制连接两个节点的线条ax.plot([x_coords[i], # 起始节点和终止节点的X坐标x_coords[j]],[y_coords[i], # 起始节点和终止节点的Y坐标y_coords[j]],color=color, # 线条颜色linestyle=style, # 线型linewidth=width, # 线条宽度alpha=alpha, # 透明度# 遍历特征以添加标签# 定义图例列表legs = [# 节点图例Line2D([0], # X坐标[0], # Y坐标marker='o', # 图例的形状color='w', # 设置线条边缘颜色markerfacecolor=net_colors['node_path'], # 设置标记内部的填充颜色(取自配置中的病理组学颜色)label='Pathomics', # 图例中显示的文本标签ms=15), # 标记的大小# 影像组学节点图例Line2D([0], [0], marker='o', color='w', markerfacecolor=net_colors['node_rad'], label='Radiomics', ms=15),# 正相关图例Line2D([0], [0], color=net_colors['edge_pos'], lw=2, label='Pos Correlation'),# 负相关图例Line2D([0], [0], color=net_colors['edge_neg'], lw=2, ls='--', label='Neg Correlation')]# 添加图例ax.legend(handles=legs, # 图例中显示的句柄列表loc='upper left', # 位置bbox_to_anchor=(-0.1, 1.05), # 绝对位置坐标frameon=False, # 是否显示图例的边框fontsize=16) # 字体大小

第十三部分

# =========================================================================================# ====================================== 9.图片拼接函数 ===========================# =========================================================================================def stitch_images(path_left, path_right, scale_right=1.0):# 打开图片,加载为图像对象img_left = Image.open(path_left)img_right = Image.open(path_right)# 基于左图高度乘以缩放比例,计算右图的新高度target_height_right = int(img_left.height * scale_right)# 计算右侧图片的宽高比aspect_ratio_right = img_right.width / img_right.height# 根据新的目标高度计算右侧图片的新宽度new_width_right = int(target_height_right * aspect_ratio_right)y_left = (canvas_height - img_left.height) // 2# 粘贴左侧图片new_img.paste(img_left, (0, y_left))# 计算右图的垂直居中Y坐标y_right = (canvas_height - target_height_right) // 2# 粘贴右侧图片new_img.paste(img_right_resized, (img_left.width + gap, y_right))

第十四部分

StandardScaler 对特征进行标准化处理,并将其转回带有列名的 DataFrame 格式(以便SHAP能识别特征名)。初始化 XGBoost 回归器,设置参数网格,并使用 5 折交叉验证和网格搜索寻找最佳超参数。最后输出找到的最佳参数。SHAP分析与绘图,使用 TreeExplainer 计算测试集的SHAP值。计算每个特征的平均绝对SHAP值(代表全局重要性),并按降序排列,为绘图做准备。根据之前定义的颜色方案和SHAP值的大小,生成对应的颜色映射和每个条形的具体颜色。依次调用之前定义的绘图函数,生成并保存图片。# =========================================================================================# ======================================10.执行部分=======================================# =========================================================================================if __name__ == '__main__':# 读取数据data_df = pd.read_excel(r'data.xlsx')# 定义目标变量target_column_name = 'Target_y'# 提取目标变量数据y = data_df[target_column_name]# 提取特征变量数据(删除目标列)X = data_df.drop(columns=[target_column_name])# 获取所有特征名称并转换为列表feature_names = X.columns.tolist()# 划分训练集和测试集X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)# 标准化处理scaler = StandardScaler()X_train_scaled = scaler.fit_transform(X_train)X_test_scaled = scaler.transform(X_test)model = best_model# 创建SHAP树解释器对象,用于解释模型explainer = shap.TreeExplainer(model)# 计算测试集数据的SHAP值shap_values = explainer(X_test_df)# 计算所有样本SHAP绝对值的平均值,衡量特征整体重要性mean_abs_shap = np.abs(shap_values.values).mean(axis=0)# 创建包含特征重要性数值的Series,索引为特征名shap_series = pd.Series(mean_abs_shap, index=feature_names)# 对特征重要性进行降序排序shap_series.sort_values(ascending=False, inplace=True)# 获取排序后的特征名称列表sorted_features = shap_series.index.tolist()# 获取排序后的特征重要性数值数组sorted_shap_values = shap_series.valuesprint(pd.DataFrame(shap_values.values[:5, :3], columns=feature_names[:3]).round(4))print("\n测试集特征平均重要性 (Mean |SHAP|):")print(np.round(sorted_shap_values, 4))# 调用函数绘图draw_bar_and_radial(sorted_features, sorted_shap_values, bar_colors, cmap, color_norm)draw_native_beeswarm(shap_values, X_test_df, cmap)draw_beeswarm_no_labels(shap_values, X_test_df, cmap)draw_combined_plot(sorted_features, sorted_shap_values, shap_values, bar_colors, cmap, color_norm)draw_right_part_network(X_test_df, sorted_features, corr_method='spearman')

如何应用到你自己的数据

1.设置SHAP图颜色方案:
SCHEME_ID = 302.设置网络图图颜色方案:
SCHEME_ID2 = 30plt.savefig(fr'shap_bar_radial{SCHEME_ID}.png', dpi=208, bbox_inches='tight')data_df = pd.read_excel(r'simulated_data.xlsx')target_column_name = 'Target_y'param_grid = {'n_estimators': [100, 200],}
推荐


获取方式
