当前位置：首页>python>python科研绘图-绘制多模型对比条形图

python科研绘图-绘制多模型对比条形图

2026-01-07 12:54:55

“ 提供定制化绘图/数据分析服务，支持先出样图/成果，满意后付款，私信联系我们。”

点击蓝字~关注我们

之前有许多朋友不知道如何获取完整的代码与数据，在这里分享给大家完整的步骤，了解的朋友请跳过，直接阅读正文：

1、点击关注：

2、点击发消息：

3、收到自动回复：

完毕。

代码该代码基于Python机器学习生态，先加载或生成回归数据集，经拆分、标准化预处理后，训练XGBoost等5种回归模型，通过MAE、RMSE、R2量化模型在训练/测试集的性能，最终绘制分组水平条形图可视化对比各模型性能。核心原理为标准化消除量纲影响，回归评估指标量化拟合效果，分组条形图直观呈现多模型多指标性能差异；实现方法为调用sklearn、xgboost库完成建模评估，利用matplotlib实现可视化。

（图中数据均为随机示例数据，以实际训练结果为准）

—

库导入

导入数据处理、建模、可视化所需库，调用 Python 成熟的数据分析与机器学习库，通过 import 语句导入 pandas、sklearn 等库及对应模块，为后续流程提供基础。

import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport warningsfrom pathlib import Pathfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.metrics import mean_absolute_error, mean_squared_error, r2_scorefrom xgboost import XGBRegressorfrom sklearn.svm import SVRfrom sklearn.ensemble import RandomForestRegressorfrom sklearn.neighbors import KNeighborsRegressorfrom sklearn.tree import DecisionTreeRegressor

—

样式与配置段

定义可视化样式参数，配置绘图全局参数并关闭警告；通过字典存储样式参数，修改 matplotlib 全局配置，调用 warnings.filterwarnings 屏蔽无关警告。

STYLE_PARAMS = {    'label_fontsize': 12,    'title_fontsize': 16,    'tick_fontsize': 11,    'annotation_fontsize': 8,    'figure_size': (16, 9),    'dpi': 300,    'bar_width': 0.1,    'gap': 0.02}plt.rcParams['font.family'] = 'Times New Roman'plt.rcParams['axes.unicode_minus'] = Falseplt.rcParams.update({    'font.size': STYLE_PARAMS['label_fontsize'],    'axes.labelsize': STYLE_PARAMS['label_fontsize'],    'axes.titlesize': STYLE_PARAMS['title_fontsize'],    'legend.fontsize': STYLE_PARAMS['tick_fontsize'],    'xtick.labelsize': STYLE_PARAMS['tick_fontsize'],    'ytick.labelsize': STYLE_PARAMS['tick_fontsize'],    'figure.figsize': STYLE_PARAMS['figure_size'],    'figure.dpi': STYLE_PARAMS['dpi'],    'savefig.dpi': STYLE_PARAMS['dpi'],    'savefig.bbox': 'tight',    'savefig.pad_inches': 0.1})warnings.filterwarnings("ignore")DATA_FILE_PATH = "data(分组条形图比较多个模型).xlsx"

—

数据加载 / 生成函数

加载指定 Excel 数据，格式不符或文件不存在时生成随机回归数据并保存；检查文件存在性，验证数据格式，无有效数据时生成带噪声的特征和目标值。

def load_or_generate_data(file_path):    file = Path(file_path)    if file.exists():        print(f"✅ 找到数据文件，正在读取: {file_path}")        try:            data_df = pd.read_excel(file_path, index_col=0)            if "target" not in data_df.columns or data_df.shape[1] < 5:                raise ValueError("文件数据格式不符合要求")            return data_df        except Exception as e:            print(f"⚠️ 读取文件失败: {e}，将生成随机数据...")    print(f"📊 生成随机回归数据集并保存至: {file_path}")    np.random.seed(42)    n_samples = 1500    n_features = 8    features = np.random.randn(n_samples, n_features) * 2    target = (            3 * features[:, 0] + 1.8 * features[:, 1] - 2.5 * features[:, 2] +            1.2 * features[:, 3] + np.random.randn(n_samples) * 0.5    )    data_df = pd.DataFrame(features, columns=[f"feat_{i}" for i in range(n_features)])    data_df["target"] = target    data_df.to_excel(file_path, index=True)    return data_df

—

数据预处理函数

拆分特征与目标值，划分训练测试集并标准化特征；按比例拆分保证数据集分布合理，标准化消除量纲影响，通过 train_test_split 和 StandardScaler 实现。

def preprocess_data(data_df):    feature_data = data_df.drop("target", axis=1)    target_data = data_df["target"]    X_train, X_test, y_train, y_test = train_test_split(        feature_data, target_data, test_size=0.3, random_state=42    )    scaler = StandardScaler()    X_train_scaled = scaler.fit_transform(X_train)    X_test_scaled = scaler.transform(X_test)    return X_train_scaled, X_test_scaled, y_train, y_test

—

模型训练与评估函数

训练多个回归模型，计算训练 / 测试集 MAE、RMSE、R2 指标；基于标准化数据训练模型，通过回归指标量化拟合效果，遍历模型配置并调用 sklearn 指标函数计算。

def train_models(train_feat, test_feat, train_tar, test_tar):    model_config = {        "XGBoost": XGBRegressor(n_estimators=450, learning_rate=0.09, max_depth=5,                                objective='reg:squarederror', random_state=42, verbosity=0),        "SVR": SVR(kernel='rbf', C=1.5, epsilon=0.09, max_iter=10000),        "RandomForest": RandomForestRegressor(n_estimators=450, max_depth=5, random_state=42),        "KNN": KNeighborsRegressor(n_neighbors=6, weights='distance', n_jobs=-1),        "DecisionTree": DecisionTreeRegressor(max_depth=7, random_state=42)    }    eval_results = {}    for model_name, model in model_config.items():        print(f"🔧 训练模型: {model_name}")        model.fit(train_feat, train_tar)        train_pred = np.array(model.predict(train_feat))        test_pred = np.array(model.predict(test_feat))        try:            train_mae = mean_absolute_error(train_tar, train_pred)            test_mae = mean_absolute_error(test_tar, test_pred)            train_rmse = np.sqrt(mean_squared_error(train_tar, train_pred))            test_rmse = np.sqrt(mean_squared_error(test_tar, test_pred))            train_r2 = r2_score(train_tar, train_pred)            test_r2 = r2_score(test_tar, test_pred)        except Exception as e:            print(f"⚠️ {model_name} 指标计算异常: {e}，使用0填充")            train_mae = test_mae = train_rmse = test_rmse = train_r2 = test_r2 = 0.0        eval_results[model_name] = {            "Train_MAE": round(train_mae, 4),            "Test_MAE": round(test_mae, 4),            "Train_RMSE": round(train_rmse, 4),            "Test_RMSE": round(test_rmse, 4),            "Train_R2": round(train_r2, 4),            "Test_R2": round(test_r2, 4)        }    eval_df = pd.DataFrame(eval_results).T    print("\n📈 模型评估结果:")    print(eval_df.round(4))    eval_df = eval_df.fillna(0.0)    return eval_df

—

条形图标注函数

为水平条形图添加数值标注，区分训练 / 测试集标注位置；获取条形位置和数值信息，计算标注位置，调用 ax.text 添加文本标注。

def add_bar_annotations(bars, ax, is_train=False):    for bar in bars:        width = bar.get_width()        y_pos = bar.get_y() + bar.get_height() / 2        if is_train:            x_pos = width - (abs(width) * 0.05)            ha = 'right'        else:            x_pos = width + (abs(width) * 0.05)            ha = 'left'        ax.text(            x_pos, y_pos, f'{abs(width):.3f}',            ha=ha, va='center', fontsize=STYLE_PARAMS['annotation_fontsize'],            color='black', fontweight='bold'        )

—

分组水平条形图绘制函数段

绘制多模型评估指标的分组水平条形图并保存；按模型和指标分组布局，区分训练 / 测试集颜色，计算条形位置并绘制、美化图表后保存。

def plot_grouped_horizontal_bars(eval_df):    model_names = eval_df.index.tolist()    n_models = len(model_names)    bar_width = STYLE_PARAMS['bar_width']    gap = STYLE_PARAMS['gap']    y_pos = np.arange(n_models)    n_metrics = 3    group_height = (bar_width * n_metrics) + (gap * (n_metrics - 1))    group_bottom = y_pos - group_height / 2    train_metrics = ["Train_MAE", "Train_RMSE", "Train_R2"]    test_metrics = ["Test_MAE", "Test_RMSE", "Test_R2"]    train_vals = -eval_df[train_metrics].values    test_vals = eval_df[test_metrics].values    colors_train = ['#E74C3C', '#3498DB', '#2ECC71']    colors_test = ['#F1948A', '#85C1E9', '#82E5AA']    fig, ax = plt.subplots(figsize=STYLE_PARAMS['figure_size'])    for spine in ax.spines.values():        spine.set_linewidth(1.2)    ax.spines['top'].set_visible(False)    ax.spines['right'].set_visible(False)    ax.spines['left'].set_visible(False)    train_bars = []    for i in range(n_metrics):        pos = group_bottom + (i * (bar_width + gap))        bars = ax.barh(            pos, train_vals[:, i], height=bar_width,            color=colors_train[i], label=f"Train {train_metrics[i].split('_')[1]}",            edgecolor='white', linewidth=0.5        )        train_bars.append(bars)        add_bar_annotations(bars, ax, is_train=True)    test_bars = []    for i in range(n_metrics):        pos = group_bottom + (i * (bar_width + gap))        bars = ax.barh(            pos, test_vals[:, i], height=bar_width,            color=colors_test[i], label=f"Test {test_metrics[i].split('_')[1]}",            edgecolor='white', linewidth=0.5        )        test_bars.append(bars)        add_bar_annotations(bars, ax, is_train=False)    ax.set_yticks(y_pos)    ax.set_yticklabels(model_names, fontsize=14, fontweight='bold')    ax.tick_params(axis='y', length=0)    max_abs_val = max(abs(train_vals).max(), test_vals.max())    x_lim = max_abs_val * 1.15    ax.set_xlim(-x_lim, x_lim)    ax.axvline(x=0, color='black', linewidth=1.5, linestyle='-')    ax.set_xlabel("Metric Value", fontsize=14, fontweight='bold')    ax.set_title("Model Performance Comparison (Train ← Y-axis → Test)",                 fontsize=STYLE_PARAMS['title_fontsize'], fontweight='bold', pad=80)    label_y = y_pos[-1] + 0.6    ax.text(-x_lim * 0.6, label_y, 'Train Set',            fontsize=14, fontweight='bold', ha='center', color='#2C3E50')    ax.text(x_lim * 0.6, label_y, 'Test Set',            fontsize=14, fontweight='bold', ha='center', color='#2C3E50')    ax.legend(        loc='upper right', bbox_to_anchor=(1.0, 1.2),        frameon=True, fancybox=True, shadow=True, ncol=2, fontsize=12    )    for label in ax.get_xticklabels():        label.set_fontweight('bold')    for label in ax.get_yticklabels():        label.set_fontweight('bold')    plt.tight_layout()    plt.savefig("model_performance_comparison.pdf", format='pdf', bbox_inches='tight', dpi=1200)    plt.savefig("model_performance_comparison.png", format='png', bbox_inches='tight', dpi=300)    print("\n💾 图表已保存为：model_performance_comparison.pdf/png")    plt.close()

—

主程序执行段

按流程执行数据加载、预处理、模型训练评估、可视化；遵循数据处理 - 建模 - 评估 - 可视化流程，依次调用对应函数完成全流程执行。

if __name__ == "__main__":    raw_data = load_or_generate_data(DATA_FILE_PATH)    X_train_scaled, X_test_scaled, y_train, y_test = preprocess_data(raw_data)    evaluation_results = train_models(X_train_scaled, X_test_scaled, y_train, y_test)    plot_grouped_horizontal_bars(evaluation_results)

私信我“260104”，免费获取完整代码和示例数据。

本文来自网友投稿或网络内容，如有侵犯您的权益请联系我们删除，联系邮箱：wyl860211@qq.com 。

python科研绘图-绘制多模型对比条形图

最新文章

热门文章

随机文章

python科研绘图-绘制多模型对比条形图

干货 | Python爬虫与金融文本分析(实战篇)

Linux核心目录功能速查表

最新文章

热门文章

随机文章