当前位置：首页>python>期刊图片复现|Python绘制XGBoost+SHAP的特征重要性条形-玫瑰组合图

期刊图片复现|Python绘制XGBoost+SHAP的特征重要性条形-玫瑰组合图

2026-02-05 22:49:39

代码绘制成果展示

论文：The new perspective of future multi-scenario analysis: Decoding impact pathways of land use dynamics on sustainable development goals

论文原图

仿图

多种配色

代码解释

第一部分

库的导入以及字体设置

# =========================================================================================# ====================================== 1. 环境设置 =======================================# =========================================================================================import matplotlib.pyplot as pltimport matplotlib.patches as patchesimport matplotlib.colors as mcolorsimport numpy as npimport pandas as pdimport osfrom xgboost import XGBRegressorfrom sklearn.model_selection import train_test_split, RandomizedSearchCV

第二部分

颜色库的设置以及配色方案的选择

# =========================================================================================# ======================================2.颜色库=======================================# =========================================================================================COLOR_SCHEMES = {    1: ["#8CB2CF", "#D0DCEF", "#F6A8A1", "#D63E51"],}scheme_id = 40  #选用的配色方案colors_list = COLOR_SCHEMES.get(scheme_id, COLOR_SCHEMES[1])  #获取颜色

第三部分

绘图函数：数据准备与画布设置

# =========================================================================================# ======================================3.绘图函数=======================================# =========================================================================================def draw_climate_importance_chart(data):    ids = [d["id"] for d in data]  #提取ID    values = [d["val"] for d in data]  #提取SHAP值    pcts = [d["pct"] for d in data]  #提取百分比    custom_cmap = mcolors.LinearSegmentedColormap.from_list("custom_theme", colors_list)  #创建自定义渐变色    norm = mcolors.Normalize(vmin=min(values), vmax=max(values))  #创建颜色归一化对象    colors = [custom_cmap(norm(v)) for v in values]  #根据每个值生成对应的颜色    #创建画布    fig = plt.figure(figsize=(15, 8), facecolor='white')    # 创建网格布局    gs = fig.add_gridspec(1, #行                          2,#列                          width_ratios=[3, 1.2])#比例    ax_bar = fig.add_subplot(gs[0])  #第一个，用于条形图    ax_legend = fig.add_subplot(gs[1])  #第二个，用于自定义图例    y_pos = np.arange(len(ids))  #生成Y轴的位置索引数组

第四部分

绘图函数：背景网格线绘制，条形图绘制，shap值标注

    #网格线    ax_bar.grid(axis='y',  #Y轴                linestyle='--',  #样式                alpha=0.5,  #透明度                zorder=0)  #层级    #水平条形图    bars = ax_bar.barh(y_pos,  #位置                       values,  # 宽度                       color=colors,  #颜色                       height=0.6,  #高度                       align='center',  #对齐                       zorder=2)  #层级    #刻度参数    ax_bar.tick_params(direction='in', top=False, right=False)    ax_bar.invert_yaxis()  #反转Y轴    ax_bar.set_yticks(y_pos)  #Y轴刻度位置    ax_bar.set_yticklabels(ids, fontsize=12)  #Y轴刻度标签    #x轴标题    ax_bar.set_xlabel('SHAP Importance (Mean |SHAP value|)',fontsize=20,fontweight='bold')    ax_bar.set_xlim(0, max(values) * 1.1)  #X轴范围    #隐藏边框    ax_bar.spines['top'].set_visible(False)    ax_bar.spines['right'].set_visible(False)

第五部分

绘图函数：内嵌的南丁格尔玫瑰图绘制，在条形图的空白区域插入一个新的坐标系，设置为极坐标，用于绘制玫瑰图。扇形的宽度由特征的重要性占比决定。扇形的半径由SHAP 值决定。在扇形外侧标记百分比数值。关闭极坐标系的轴线和刻度，只保留图形本身。

    #极坐标图位置    rect = [0.4,#左            0.15,#下            0.35,#宽            0.45] #高    ax_rose = fig.add_axes(rect, polar=True)  #添加子图    # 根据百分比计算每个扇形的弧度宽度    widths = [p / 100 * 2 * np.pi for p in pcts]    starts = np.cumsum([0] + widths[:-1])  #每个扇形的起始角度    #遍历每个扇形参数以添加标签    for angle, height, width, pct in zip(starts, heights, widths, pcts):        label_angle = angle + width / 2  #标签的角度位置        label_r = bottom + height + 0.12  #径向位置        rot_angle = np.degrees(label_angle) % 360  #将弧度转换为角度        #文本水平对齐方式        alignment_h = 'left' if (rot_angle < 90 or rot_angle > 270) else 'right'        #添加文本        ax_rose.text(label_angle,  #角度位置                     label_r,  #半径                     f"{pct:.1f}%",  #文本内容                     ha=alignment_h,  #水平对齐                     va='center',  #垂直对齐                     fontsize=12,  #大小                     fontweight='bold')  #加粗    ax_rose.set_axis_off()  #隐藏坐标轴及其刻度

第六部分

绘图函数：绘制右侧图例区域，调整刻度线。

    ax_legend.axis('off')  #关闭右侧图例区域的坐标轴显示    # 生成渐变数据    gradient = np.linspace(1, 0, 256).reshape(-1, 1)    #颜色条坐标轴    ax_cbar = ax_legend.inset_axes([0.05, 0.05, 0.08, 0.9])    ax_cbar.imshow(gradient, aspect='auto', cmap=custom_cmap)  #绘制颜色条    ax_cbar.axis('off')  #隐藏颜色条的坐标轴    #设置所有边框    for spine in ax_bar.spines.values():        spine.set_linewidth(2.0)  #设置线宽    ax_bar.set_yticks(y_pos)    ax_bar.set_yticklabels(ids, fontsize=12)    #标注字体加粗    plt.setp(ax_bar.get_yticklabels(), fontweight='bold')    plt.setp(ax_bar.get_xticklabels(), fontweight='bold')    #颜色条高低值文本    ax_legend.text(0.18,  #X                   0.96,  #Y                   "High Contribution",  #内容                   fontsize=12,  #大小                   va='center',  #垂直居中                   fontweight='bold')  #加粗    ax_legend.text(0.18,  #X                   0.04,  #Y                   "Low Contribution",  #内容                   fontsize=12,  #大小                   va='center',  #垂直居中                   fontweight='bold')  #加粗

第七部分

执行部分，负责数据处理、模型训练、SHAP分析和调用绘图函数。包括读取数据，将数据划分为训练集和测试集。初始化 XGBoost 回归器。定义超参数搜索空间。进行随机搜索和交叉验证，找到最好的模型参数。使用`shap解释训练好的XGBoost模型，计算测试集SHAP值。计算全局特征重要性。计算每个特征的重要性占比。构建列表，包含特征的 ID、数值、百分比和原始名称。按 SHAP 值从大到小对数据进行排序。调用之前定义的`函数，传入处理好的数据进行绘图。

# =========================================================================================# ======================================4.执行部分=======================================# =========================================================================================if __name__ == "__main__":    input_file = r"Data.xlsx"  #输入文件路径    df = pd.read_excel(input_file)  #读取    X = df.iloc[:, :-1]  #提取特征    y = df.iloc[:, -1]  #提取目标    feature_names = X.columns.tolist()  #获取特征名称    # 划分训练集和测试集    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)    xgb = XGBRegressor(random_state=42)  #初始化XGBoost    #超参数    param_dist = {  # 定义超参数搜索空间        'n_estimators': [100, 200, 300, 400, 500],        'max_depth': [3, 4, 5, 6, 8],        'learning_rate': [0.01, 0.05, 0.1, 0.2],    }    search = RandomizedSearchCV(xgb, param_dist, n_iter=5, cv=3, random_state=42)  #初始化随机搜索    search.fit(X_train, y_train)  #拟合    best_model = search.best_estimator_  #最佳模型    explainer = shap.TreeExplainer(best_model)  #解释最佳模型    shap_values = explainer.shap_values(X_test)  #计算测试集的SHAP值    mean_shap = np.abs(shap_values).mean(axis=0)  #绝对值平均值    total_importance = np.sum(mean_shap)  #总重要性    analysis_data = []  #初始化分析数据列表    for i in range(len(feature_names)):  # 遍历每个特征        analysis_data.append({            "id": f"Var.{i + 1}",  #ID            "val": mean_shap[i],  #SHAP平均值            "pct": (mean_shap[i] / total_importance) * 100,  #百分比            "desc": feature_names[i]  #特征名称        })    analysis_data = sorted(analysis_data, key=lambda x: x['val'], reverse=True)  #根据SHAP值大小排序    # 调用函数    draw_climate_importance_chart(analysis_data)

如何应用到你自己的数据

1.设置要使用的配色方案：

scheme_id = 40  #选用的配色方案

2.设置绘图结果的保存路径：

plt.savefig(fr"\{scheme_id}.png", dpi=300, bbox_inches='tight')

3.设置原始数据的路径：

input_file = r"Data.xlsx"  #输入文件路径

4.分类特征数据以及目标数据：

X = df.iloc[:, :-1]  #提取特征y = df.iloc[:, -1]  #提取目标

5.设置超参数：

param_dist = {    'n_estimators': [100, 200, 300, 400, 500],    'max_depth': [3, 4, 5, 6, 8],    'learning_rate': [0.01, 0.05, 0.1, 0.2],}

期刊图片复现|Python绘制XGBoost+SHAP的特征重要性条形-玫瑰组合图

最新文章

热门文章

随机文章

期刊图片复现|Python绘制XGBoost+SHAP的特征重要性条形-玫瑰组合图

Python内卷到饱和?现在学还能拿高薪吗

Linux KVM在ARM64环境多VCPU的演示

最新文章

热门文章

随机文章