当前位置：首页>python>Python绘制带残差分析的高级回归散点图

Python绘制带残差分析的高级回归散点图

2026-02-27 21:07:38

带残差分析的高级回归散点图

在机器学习回归任务或化学计量学建模（如 PLS, SVR, RF）中，我们通常需要绘制 “预测值 vs 真实值” 的散点图来展示模型性能。

然而，普通的散点图往往信息密度不足：

- 无法直观看到数据在不同区间的分布密度；

- 残差（Residuals）的分布情况往往需要单独画一张图；

- 很难一眼判断模型在训练集和测试集上的表现是否存在统计学上的显著差异。

今天分享一套基于 Python Matplotlib 的高级绘图模板，它能在一张图上同时展示：回归散点 + 边缘分布密度 + 残差分布/箱线图 + 显著性差异检验。

下图为参考文章图

风格一：经典直方图版 —— 关注误差分布形态

这张图在主散点图的基础上，做到了“三个维度的信息增强”：

1. 主图（Main Plot）：展示 Experimental Yield vs Predicted Yield，包含 y = x 对角线、线性拟合线以及 95% 置信区间（灰色阴影），并自动标注R2 和 RMSE。

2. 边缘图（Marginal Plots）：顶部和右侧分别附带了真实值和预测值的直方图 + 核密度估计（KDE）曲线，让你一眼看清数据是否存在偏态或样本不平衡。

3. 嵌入残差图（Inset Histogram）：在右下角嵌入了残差分布直方图。

- 亮点：用虚线清晰标注了训练集和测试集残差的均值位置。如果两条线距离很近且接近 0，说明模型偏差很小且泛化能力强。

import numpy as npimport matplotlib.pyplot as pltfrom matplotlib.ticker import MaxNLocatorfrom mpl_toolkits.axes_grid1 import make_axes_locatablefrom mpl_toolkits.axes_grid1.inset_locator import inset_axesfrom scipy.stats import gaussian_kde# ------------------------------# 回归散点图（示例风格：主图+边缘直方）# ------------------------------font_size = 12train_color = "#7f7f7f"test_color = "#f28e2b"fig, ax = plt.subplots(figsize=(5, 5))y_train_arr = np.asarray(y_train, dtype=float)y_test_arr = np.asarray(y_test, dtype=float)y_pred_train_arr = np.asarray(y_pred_train, dtype=float)y_pred_test_arr = np.asarray(y_pred_test, dtype=float)ax.scatter(y_train_arr, y_pred_train_arr, s=25, color=train_color, alpha=0.65, edgecolors='none', label='Train data')ax.scatter(y_test_arr, y_pred_test_arr, s=25, color=test_color, alpha=0.80, edgecolors='none', label='Test data')all_true = np.concatenate([y_train_arr, y_test_arr])all_pred = np.concatenate([y_pred_train_arr, y_pred_test_arr])vmin = float(min(np.nanmin(all_true), np.nanmin(all_pred)))vmax = float(max(np.nanmax(all_true), np.nanmax(all_pred)))pad = (vmax - vmin) * 0.05 if vmax > vmin else 1.0vmin -= padvmax += padax.set_xlim(vmin, vmax)ax.set_ylim(vmin, vmax)ax.plot([vmin, vmax], [vmin, vmax], color='k', linestyle='--', linewidth=1.1, alpha=0.85)x = all_true.copy()y = all_pred.copy()mask = np.isfinite(x) & np.isfinite(y)x = x[mask]y = y[mask]x_fit = np.linspace(vmin, vmax, 200)if x.size >= 2:    slope, intercept = np.polyfit(x, y, 1)    y_fit = slope * x_fit + intercept    y_hat = slope * x + intercept    denom = max(x.size - 2, 1)    s_err = np.sqrt(np.sum((y - y_hat) ** 2) / denom)    x_mean = float(np.mean(x))    sxx = float(np.sum((x - x_mean) ** 2))    if x.size > 2 and sxx > 0:        t = 1.96        ci = t * s_err * np.sqrt(1.0 / x.size + (x_fit - x_mean) ** 2 / sxx)        ax.fill_between(x_fit, y_fit - ci, y_fit + ci, color="#bdbdbd", alpha=0.25, linewidth=0)    ax.plot(x_fit, y_fit, color='k', linewidth=1.6, label='Fitted line')divider = make_axes_locatable(ax)ax_top = divider.append_axes("top", size=1.0, pad=0.10, sharex=ax)ax_right = divider.append_axes("right", size=1.0, pad=0.10, sharey=ax)bins = 18edge_lw = 0.6edge_color = 'black'train_true = y_train_arr[np.isfinite(y_train_arr)]test_true = y_test_arr[np.isfinite(y_test_arr)]train_pred = y_pred_train_arr[np.isfinite(y_pred_train_arr)]test_pred = y_pred_test_arr[np.isfinite(y_pred_test_arr)]ax_top.hist(train_true, bins=bins, range=(vmin, vmax), color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_top.hist(test_true, bins=bins, range=(vmin, vmax), color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(train_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(test_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)x_plot = np.linspace(vmin, vmax, 200)y_plot = np.linspace(vmin, vmax, 200)bin_width = (vmax - vmin) / bins if vmax > vmin else 1.0for data, color in [(train_true, train_color), (test_true, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(x_plot)        ax_top.plot(x_plot, density * data.size * bin_width, color=color, linewidth=2.0, alpha=0.95)for data, color in [(train_pred, train_color), (test_pred, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(y_plot)        ax_right.plot(density * data.size * bin_width, y_plot, color=color, linewidth=2.0, alpha=0.95)ax_top.axis('off')ax_right.axis('off')ax.grid(True, color='#d0d0d0', linewidth=0.8, alpha=0.8)ax.set_xlabel('Experimental Yield', fontsize=font_size + 4, fontweight='bold')ax.set_ylabel('Predicted Yield', fontsize=font_size + 4, fontweight='bold')ax.tick_params(axis='both', labelsize=font_size)ax.xaxis.set_major_locator(MaxNLocator(6))ax.yaxis.set_major_locator(MaxNLocator(6))ax.text(-0.14, 1.02, '(f)', transform=ax.transAxes, ha='left', va='bottom', fontsize=font_size + 2, fontweight='bold')# ax.text(0.5, 0.98, 'SVR', transform=ax.transAxes, ha='center', va='top', fontsize=font_size + 2, fontweight='bold')ax.text(    0.05,    0.88,    f"$R^2_{{test}}$={r2_test:.2f}\nRMSE$_{{test}}$={rmse_test:.2f}",    transform=ax.transAxes,    ha='left',    va='top',    fontsize=font_size - 1,    color='red')train_mask = np.isfinite(y_train_arr) & np.isfinite(y_pred_train_arr)test_mask = np.isfinite(y_test_arr) & np.isfinite(y_pred_test_arr)train_resid = y_pred_train_arr[train_mask] - y_train_arr[train_mask]test_resid = y_pred_test_arr[test_mask] - y_test_arr[test_mask]train_resid_mean = float(np.mean(train_resid)) if train_resid.size > 0 else np.nantest_resid_mean = float(np.mean(test_resid)) if test_resid.size > 0 else np.nanax_res = inset_axes(    ax,    width="38%",    height="32%",    loc="lower right",    borderpad=0.0,    bbox_to_anchor=(-0.1, 0.15, 1.0, 1.0),    bbox_transform=ax.transAxes,)res_all = np.concatenate([train_resid, test_resid]) if (train_resid.size + test_resid.size) > 0 else np.asarray([], dtype=float)if res_all.size > 0:    rmax = float(np.nanmax(np.abs(res_all)))    rpad = rmax * 0.10 if rmax > 0 else 1.0    ax_res.set_xlim(-rmax - rpad, rmax + rpad)res_bins = 16ax_res.hist(train_resid, bins=res_bins, color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_res.hist(test_resid, bins=res_bins, color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)ax_res.axvline(0, color='k', linestyle='--', linewidth=1.0, alpha=0.85)if np.isfinite(train_resid_mean):    ax_res.axvline(train_resid_mean, color=train_color, linestyle='--', linewidth=1.6, alpha=0.95)if np.isfinite(test_resid_mean):    ax_res.axvline(test_resid_mean, color=test_color, linestyle='--', linewidth=1.6, alpha=0.95)x_min_res, x_max_res = ax_res.get_xlim()y_min_res, y_max_res = ax_res.get_ylim()dx_res = (x_max_res - x_min_res) * 0.02y_text_train = y_max_res - (y_max_res - y_min_res) * 0.10y_text_test = y_max_res - (y_max_res - y_min_res) * 0.25if np.isfinite(train_resid_mean):    if train_resid_mean > (x_min_res + x_max_res) / 2:        x_text_train = train_resid_mean - dx_res        ha_train = 'right'    else:        x_text_train = train_resid_mean + dx_res        ha_train = 'left'    ax_res.text(        x_text_train,        y_text_train,        f"{train_resid_mean:.2f}",        ha=ha_train,        va='top',        fontsize=font_size - 4,        color=train_color,    )if np.isfinite(test_resid_mean):    if test_resid_mean > (x_min_res + x_max_res) / 2:        x_text_test = test_resid_mean - dx_res        ha_test = 'right'    else:        x_text_test = test_resid_mean + dx_res        ha_test = 'left'    ax_res.text(        x_text_test,        y_text_test,        f"{test_resid_mean:.2f}",        ha=ha_test,        va='top',        fontsize=font_size - 4,        color=test_color,    )ax_res.grid(True, color='#e0e0e0', linewidth=0.6, alpha=0.7)ax_res.spines['left'].set_visible(False)ax_res.spines['top'].set_visible(False)ax_res.spines['right'].set_visible(False)ax_res.tick_params(axis='y', left=False, labelleft=False)ax_res.set_xlabel('Residual', fontsize=font_size - 3, fontweight='bold')ax_res.tick_params(axis='x', labelsize=font_size - 3)ax_res.xaxis.set_major_locator(MaxNLocator(5))ax_res.yaxis.set_major_locator(MaxNLocator(4))ax.legend(loc='upper right', frameon=True, fontsize=font_size - 1)plt.tight_layout()plt.savefig('scatter_plot5.png', dpi=300)plt.show()

import numpy as npimport matplotlib.pyplot as pltfrom matplotlib.ticker import MaxNLocatorfrom mpl_toolkits.axes_grid1 import make_axes_locatablefrom mpl_toolkits.axes_grid1.inset_locator import inset_axesfrom scipy.stats import gaussian_kde# ------------------------------# 回归散点图（示例风格：主图+边缘直方）# ------------------------------font_size = 12train_color = "#edacac"test_color = "#c1d9eb"fig, ax = plt.subplots(figsize=(5, 5))y_train_arr = np.asarray(y_train, dtype=float)y_test_arr = np.asarray(y_test, dtype=float)y_pred_train_arr = np.asarray(y_pred_train, dtype=float)y_pred_test_arr = np.asarray(y_pred_test, dtype=float)ax.scatter(y_train_arr, y_pred_train_arr, s=25, color=train_color, alpha=0.65, edgecolors='none', label='Train data')ax.scatter(y_test_arr, y_pred_test_arr, s=25, color=test_color, alpha=0.80, edgecolors='none', label='Test data')all_true = np.concatenate([y_train_arr, y_test_arr])all_pred = np.concatenate([y_pred_train_arr, y_pred_test_arr])vmin = float(min(np.nanmin(all_true), np.nanmin(all_pred)))vmax = float(max(np.nanmax(all_true), np.nanmax(all_pred)))pad = (vmax - vmin) * 0.05 if vmax > vmin else 1.0vmin -= padvmax += padax.set_xlim(vmin, vmax)ax.set_ylim(vmin, vmax)ax.plot([vmin, vmax], [vmin, vmax], color='k', linestyle='--', linewidth=1.1, alpha=0.85)x = all_true.copy()y = all_pred.copy()mask = np.isfinite(x) & np.isfinite(y)x = x[mask]y = y[mask]x_fit = np.linspace(vmin, vmax, 200)if x.size >= 2:    slope, intercept = np.polyfit(x, y, 1)    y_fit = slope * x_fit + intercept    y_hat = slope * x + intercept    denom = max(x.size - 2, 1)    s_err = np.sqrt(np.sum((y - y_hat) ** 2) / denom)    x_mean = float(np.mean(x))    sxx = float(np.sum((x - x_mean) ** 2))    if x.size > 2 and sxx > 0:        t = 1.96        ci = t * s_err * np.sqrt(1.0 / x.size + (x_fit - x_mean) ** 2 / sxx)        ax.fill_between(x_fit, y_fit - ci, y_fit + ci, color="#bdbdbd", alpha=0.25, linewidth=0)    ax.plot(x_fit, y_fit, color='k', linewidth=1.6, label='Fitted line')divider = make_axes_locatable(ax)ax_top = divider.append_axes("top", size=1.0, pad=0.10, sharex=ax)ax_right = divider.append_axes("right", size=1.0, pad=0.10, sharey=ax)bins = 18edge_lw = 0.6edge_color = 'black'train_true = y_train_arr[np.isfinite(y_train_arr)]test_true = y_test_arr[np.isfinite(y_test_arr)]train_pred = y_pred_train_arr[np.isfinite(y_pred_train_arr)]test_pred = y_pred_test_arr[np.isfinite(y_pred_test_arr)]ax_top.hist(train_true, bins=bins, range=(vmin, vmax), color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_top.hist(test_true, bins=bins, range=(vmin, vmax), color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(train_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(test_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)x_plot = np.linspace(vmin, vmax, 200)y_plot = np.linspace(vmin, vmax, 200)bin_width = (vmax - vmin) / bins if vmax > vmin else 1.0for data, color in [(train_true, train_color), (test_true, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(x_plot)        ax_top.plot(x_plot, density * data.size * bin_width, color=color, linewidth=2.0, alpha=0.95)for data, color in [(train_pred, train_color), (test_pred, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(y_plot)        ax_right.plot(density * data.size * bin_width, y_plot, color=color, linewidth=2.0, alpha=0.95)ax_top.axis('off')ax_right.axis('off')ax.grid(True, color='#d0d0d0', linewidth=0.8, alpha=0.8)ax.set_xlabel('Experimental Yield', fontsize=font_size + 4, fontweight='bold')ax.set_ylabel('Predicted Yield', fontsize=font_size + 4, fontweight='bold')ax.tick_params(axis='both', labelsize=font_size)ax.xaxis.set_major_locator(MaxNLocator(6))ax.yaxis.set_major_locator(MaxNLocator(6))ax.text(-0.14, 1.02, '(f)', transform=ax.transAxes, ha='left', va='bottom', fontsize=font_size + 2, fontweight='bold')# ax.text(0.5, 0.98, 'SVR', transform=ax.transAxes, ha='center', va='top', fontsize=font_size + 2, fontweight='bold')ax.text(    0.05,    0.88,    f"$R^2_{{test}}$={r2_test:.2f}\nRMSE$_{{test}}$={rmse_test:.2f}",    transform=ax.transAxes,    ha='left',    va='top',    fontsize=font_size - 1,    color='red')train_mask = np.isfinite(y_train_arr) & np.isfinite(y_pred_train_arr)test_mask = np.isfinite(y_test_arr) & np.isfinite(y_pred_test_arr)train_resid = y_pred_train_arr[train_mask] - y_train_arr[train_mask]test_resid = y_pred_test_arr[test_mask] - y_test_arr[test_mask]train_resid_mean = float(np.mean(train_resid)) if train_resid.size > 0 else np.nantest_resid_mean = float(np.mean(test_resid)) if test_resid.size > 0 else np.nanax_res = inset_axes(    ax,    width="38%",    height="32%",    loc="lower right",    borderpad=0.0,    bbox_to_anchor=(-0.1, 0.15, 1.0, 1.0),    bbox_transform=ax.transAxes,)res_all = np.concatenate([train_resid, test_resid]) if (train_resid.size + test_resid.size) > 0 else np.asarray([], dtype=float)if res_all.size > 0:    rmax = float(np.nanmax(np.abs(res_all)))    rpad = rmax * 0.10 if rmax > 0 else 1.0    ax_res.set_xlim(-rmax - rpad, rmax + rpad)res_bins = 16ax_res.hist(train_resid, bins=res_bins, color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_res.hist(test_resid, bins=res_bins, color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)ax_res.axvline(0, color='k', linestyle='--', linewidth=1.0, alpha=0.85)if np.isfinite(train_resid_mean):    ax_res.axvline(train_resid_mean, color=train_color, linestyle='--', linewidth=1.6, alpha=0.95)if np.isfinite(test_resid_mean):    ax_res.axvline(test_resid_mean, color=test_color, linestyle='--', linewidth=1.6, alpha=0.95)x_min_res, x_max_res = ax_res.get_xlim()y_min_res, y_max_res = ax_res.get_ylim()dx_res = (x_max_res - x_min_res) * 0.02y_text_train = y_max_res - (y_max_res - y_min_res) * 0.10y_text_test = y_max_res - (y_max_res - y_min_res) * 0.25if np.isfinite(train_resid_mean):    if train_resid_mean > (x_min_res + x_max_res) / 2:        x_text_train = train_resid_mean - dx_res        ha_train = 'right'    else:        x_text_train = train_resid_mean + dx_res        ha_train = 'left'    ax_res.text(        x_text_train,        y_text_train,        f"{train_resid_mean:.2f}",        ha=ha_train,        va='top',        fontsize=font_size - 4,        color=train_color,    )if np.isfinite(test_resid_mean):    if test_resid_mean > (x_min_res + x_max_res) / 2:        x_text_test = test_resid_mean - dx_res        ha_test = 'right'    else:        x_text_test = test_resid_mean + dx_res        ha_test = 'left'    ax_res.text(        x_text_test,        y_text_test,        f"{test_resid_mean:.2f}",        ha=ha_test,        va='top',        fontsize=font_size - 4,        color=test_color,    )ax_res.grid(True, color='#e0e0e0', linewidth=0.6, alpha=0.7)ax_res.spines['left'].set_visible(False)ax_res.spines['top'].set_visible(False)ax_res.spines['right'].set_visible(False)ax_res.tick_params(axis='y', left=False, labelleft=False)ax_res.set_xlabel('Residual', fontsize=font_size - 3, fontweight='bold')ax_res.tick_params(axis='x', labelsize=font_size - 3)ax_res.xaxis.set_major_locator(MaxNLocator(5))ax_res.yaxis.set_major_locator(MaxNLocator(4))ax.legend(loc='upper right', frameon=True, fontsize=font_size - 1)plt.tight_layout()plt.savefig('scatter_plot4.png', dpi=300)plt.show()

风格二：统计箱线图版 —— 关注显著性差异

如果你想让图表更具“统计学说服力”，推荐使用第二种风格。它将右下角的直方图替换为了水平箱线图（Boxplot）。

1. 箱线图展示：清晰展示残差的中位数、四分位距以及离群点（Outliers）。

2. 显著性检验（Significance Test）：

- 代码集成了 Mann-Whitney U 检验（双侧）。

- 自动计算训练集与测试集残差分布的 P 值。

- 自动标注：根据 P 值大小，自动在图上画出括号并标记 ns (无显著差异), *****, ****** 等符号。

- 科研意义：如果结果显示 ns （not significant），说明模型在训练集和测试集上的误差分布没有显著差异，有力地证明了模型没有过拟合，具有良好的泛化能力。

import numpy as npimport matplotlib.pyplot as pltfrom matplotlib.ticker import MaxNLocatorfrom mpl_toolkits.axes_grid1 import make_axes_locatablefrom mpl_toolkits.axes_grid1.inset_locator import inset_axesfrom scipy.stats import gaussian_kde, mannwhitneyu# ------------------------------# 回归散点图（示例风格：主图+边缘直方）# ------------------------------font_size = 12train_color = "#7f7f7f"test_color = "#f28e2b"fig, ax = plt.subplots(figsize=(5, 5))y_train_arr = np.asarray(y_train, dtype=float)y_test_arr = np.asarray(y_test, dtype=float)y_pred_train_arr = np.asarray(y_pred_train, dtype=float)y_pred_test_arr = np.asarray(y_pred_test, dtype=float)ax.scatter(y_train_arr, y_pred_train_arr, s=25, color=train_color, alpha=0.65, edgecolors='none', label='Train data')ax.scatter(y_test_arr, y_pred_test_arr, s=25, color=test_color, alpha=0.80, edgecolors='none', label='Test data')all_true = np.concatenate([y_train_arr, y_test_arr])all_pred = np.concatenate([y_pred_train_arr, y_pred_test_arr])vmin = float(min(np.nanmin(all_true), np.nanmin(all_pred)))vmax = float(max(np.nanmax(all_true), np.nanmax(all_pred)))pad = (vmax - vmin) * 0.05 if vmax > vmin else 1.0vmin -= padvmax += padax.set_xlim(vmin, vmax)ax.set_ylim(vmin, vmax)ax.plot([vmin, vmax], [vmin, vmax], color='k', linestyle='--', linewidth=1.1, alpha=0.85)x = all_true.copy()y = all_pred.copy()mask = np.isfinite(x) & np.isfinite(y)x = x[mask]y = y[mask]x_fit = np.linspace(vmin, vmax, 200)if x.size >= 2:    slope, intercept = np.polyfit(x, y, 1)    y_fit = slope * x_fit + intercept    y_hat = slope * x + intercept    denom = max(x.size - 2, 1)    s_err = np.sqrt(np.sum((y - y_hat) ** 2) / denom)    x_mean = float(np.mean(x))    sxx = float(np.sum((x - x_mean) ** 2))    if x.size > 2 and sxx > 0:        t = 1.96        ci = t * s_err * np.sqrt(1.0 / x.size + (x_fit - x_mean) ** 2 / sxx)        ax.fill_between(x_fit, y_fit - ci, y_fit + ci, color="#bdbdbd", alpha=0.25, linewidth=0)    ax.plot(x_fit, y_fit, color='k', linewidth=1.6, label='Fitted line')divider = make_axes_locatable(ax)ax_top = divider.append_axes("top", size=1.0, pad=0.10, sharex=ax)ax_right = divider.append_axes("right", size=1.0, pad=0.10, sharey=ax)bins = 18edge_lw = 0.6edge_color = 'black'train_true = y_train_arr[np.isfinite(y_train_arr)]test_true = y_test_arr[np.isfinite(y_test_arr)]train_pred = y_pred_train_arr[np.isfinite(y_pred_train_arr)]test_pred = y_pred_test_arr[np.isfinite(y_pred_test_arr)]ax_top.hist(train_true, bins=bins, range=(vmin, vmax), color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_top.hist(test_true, bins=bins, range=(vmin, vmax), color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(train_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(test_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)x_plot = np.linspace(vmin, vmax, 200)y_plot = np.linspace(vmin, vmax, 200)bin_width = (vmax - vmin) / bins if vmax > vmin else 1.0for data, color in [(train_true, train_color), (test_true, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(x_plot)        ax_top.plot(x_plot, density * data.size * bin_width, color=color, linewidth=2.0, alpha=0.95)for data, color in [(train_pred, train_color), (test_pred, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(y_plot)        ax_right.plot(density * data.size * bin_width, y_plot, color=color, linewidth=2.0, alpha=0.95)ax_top.axis('off')ax_right.axis('off')ax.grid(True, color='#d0d0d0', linewidth=0.8, alpha=0.8)ax.set_xlabel('Experimental Yield', fontsize=font_size + 4, fontweight='bold')ax.set_ylabel('Predicted Yield', fontsize=font_size + 4, fontweight='bold')ax.tick_params(axis='both', labelsize=font_size)ax.xaxis.set_major_locator(MaxNLocator(6))ax.yaxis.set_major_locator(MaxNLocator(6))ax.text(-0.14, 1.02, '(f)', transform=ax.transAxes, ha='left', va='bottom', fontsize=font_size + 2, fontweight='bold')# ax.text(0.5, 0.98, 'SVR', transform=ax.transAxes, ha='center', va='top', fontsize=font_size + 2, fontweight='bold')ax.text(    0.05,    0.88,    f"$R^2_{{test}}$={r2_test:.2f}\nRMSE$_{{test}}$={rmse_test:.2f}",    transform=ax.transAxes,    ha='left',    va='top',    fontsize=font_size - 1,    color='red')train_mask = np.isfinite(y_train_arr) & np.isfinite(y_pred_train_arr)test_mask = np.isfinite(y_test_arr) & np.isfinite(y_pred_test_arr)train_resid = y_pred_train_arr[train_mask] - y_train_arr[train_mask]test_resid = y_pred_test_arr[test_mask] - y_test_arr[test_mask]ax_res = inset_axes(    ax,    width="38%",    height="32%",    loc="lower right",    borderpad=0.0,    bbox_to_anchor=(-0.1, 0.15, 1.0, 1.0),    bbox_transform=ax.transAxes,)res_all = np.concatenate([train_resid, test_resid]) if (train_resid.size + test_resid.size) > 0 else np.asarray([], dtype=float)if res_all.size > 0:    rmax = float(np.nanmax(np.abs(res_all)))    rpad = rmax * 0.10 if rmax > 0 else 1.0    ax_res.set_xlim(-rmax - rpad, rmax + rpad)ax_res.axvline(0, color='k', linestyle='--', linewidth=1.0, alpha=0.85)train_resid_clean = np.asarray(train_resid[np.isfinite(train_resid)], dtype=float)test_resid_clean = np.asarray(test_resid[np.isfinite(test_resid)], dtype=float)bp = ax_res.boxplot(    [train_resid_clean, test_resid_clean],    vert=False,    patch_artist=True,    widths=0.55,    showfliers=True,    boxprops=dict(linewidth=edge_lw, color=edge_color),    medianprops=dict(linewidth=1.2, color='k'),    whiskerprops=dict(linewidth=edge_lw, color=edge_color),    capprops=dict(linewidth=edge_lw, color=edge_color),    flierprops=dict(marker='o', markersize=2.8, markerfacecolor='none', markeredgecolor=edge_color, alpha=0.6),)for patch, color, alpha in zip(bp['boxes'], [train_color, test_color], [0.35, 0.65]):    patch.set_facecolor(color)    patch.set_alpha(alpha)ax_res.set_yticks([])p_value = np.nansig_text = "n/a"if train_resid_clean.size >= 2 and test_resid_clean.size >= 2:    _, p_value = mannwhitneyu(train_resid_clean, test_resid_clean, alternative='two-sided')    if p_value < 0.001:        sig_text = "***"    elif p_value < 0.01:        sig_text = "**"    elif p_value < 0.05:        sig_text = "*"    else:        sig_text = "ns"x_min_res, x_max_res = ax_res.get_xlim()x_range_res = x_max_res - x_min_resx_bracket = x_max_res - x_range_res * 0.06cap = x_range_res * 0.025y1, y2 = 1, 2ax_res.set_ylim(0.4, 2.6)ax_res.plot([x_bracket, x_bracket], [y1, y2], color='k', linewidth=1.1)ax_res.plot([x_bracket - cap, x_bracket], [y1, y1], color='k', linewidth=1.1)ax_res.plot([x_bracket - cap, x_bracket], [y2, y2], color='k', linewidth=1.1)ax_res.text(x_bracket - cap * 1.15, (y1 + y2) / 2, sig_text, ha='right', va='center', fontsize=font_size - 3, color='k')ax_res.grid(True, axis='x', color='#e0e0e0', linewidth=0.6, alpha=0.7)ax_res.spines['left'].set_visible(False)ax_res.spines['top'].set_visible(False)ax_res.spines['right'].set_visible(False)ax_res.tick_params(axis='y', left=False, labelleft=False)ax_res.set_xlabel('Residual', fontsize=font_size - 3, fontweight='bold')ax_res.tick_params(axis='x', labelsize=font_size - 3)ax_res.xaxis.set_major_locator(MaxNLocator(5))ax_res.yaxis.set_major_locator(MaxNLocator(4))ax.legend(loc='upper right', frameon=True, fontsize=font_size - 1)plt.tight_layout()plt.savefig('scatter_plot6.png', dpi=300)plt.show()

import numpy as npimport matplotlib.pyplot as pltfrom matplotlib.ticker import MaxNLocatorfrom mpl_toolkits.axes_grid1 import make_axes_locatablefrom mpl_toolkits.axes_grid1.inset_locator import inset_axesfrom scipy.stats import gaussian_kde, mannwhitneyu# ------------------------------# 回归散点图（示例风格：主图+边缘直方）# ------------------------------font_size = 12train_color = "#e1b0b0"test_color = "#c8e3eb"fig, ax = plt.subplots(figsize=(5, 5))y_train_arr = np.asarray(y_train, dtype=float)y_test_arr = np.asarray(y_test, dtype=float)y_pred_train_arr = np.asarray(y_pred_train, dtype=float)y_pred_test_arr = np.asarray(y_pred_test, dtype=float)ax.scatter(y_train_arr, y_pred_train_arr, s=25, color=train_color, alpha=0.65, edgecolors='none', label='Train data')ax.scatter(y_test_arr, y_pred_test_arr, s=25, color=test_color, alpha=0.80, edgecolors='none', label='Test data')all_true = np.concatenate([y_train_arr, y_test_arr])all_pred = np.concatenate([y_pred_train_arr, y_pred_test_arr])vmin = float(min(np.nanmin(all_true), np.nanmin(all_pred)))vmax = float(max(np.nanmax(all_true), np.nanmax(all_pred)))pad = (vmax - vmin) * 0.05 if vmax > vmin else 1.0vmin -= padvmax += padax.set_xlim(vmin, vmax)ax.set_ylim(vmin, vmax)ax.plot([vmin, vmax], [vmin, vmax], color='k', linestyle='--', linewidth=1.1, alpha=0.85)x = all_true.copy()y = all_pred.copy()mask = np.isfinite(x) & np.isfinite(y)x = x[mask]y = y[mask]x_fit = np.linspace(vmin, vmax, 200)if x.size >= 2:    slope, intercept = np.polyfit(x, y, 1)    y_fit = slope * x_fit + intercept    y_hat = slope * x + intercept    denom = max(x.size - 2, 1)    s_err = np.sqrt(np.sum((y - y_hat) ** 2) / denom)    x_mean = float(np.mean(x))    sxx = float(np.sum((x - x_mean) ** 2))    if x.size > 2 and sxx > 0:        t = 1.96        ci = t * s_err * np.sqrt(1.0 / x.size + (x_fit - x_mean) ** 2 / sxx)        ax.fill_between(x_fit, y_fit - ci, y_fit + ci, color="#bdbdbd", alpha=0.25, linewidth=0)    ax.plot(x_fit, y_fit, color='k', linewidth=1.6, label='Fitted line')divider = make_axes_locatable(ax)ax_top = divider.append_axes("top", size=1.0, pad=0.10, sharex=ax)ax_right = divider.append_axes("right", size=1.0, pad=0.10, sharey=ax)bins = 18edge_lw = 0.6edge_color = 'black'train_true = y_train_arr[np.isfinite(y_train_arr)]test_true = y_test_arr[np.isfinite(y_test_arr)]train_pred = y_pred_train_arr[np.isfinite(y_pred_train_arr)]test_pred = y_pred_test_arr[np.isfinite(y_pred_test_arr)]ax_top.hist(train_true, bins=bins, range=(vmin, vmax), color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_top.hist(test_true, bins=bins, range=(vmin, vmax), color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(train_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=train_color, alpha=0.35, edgecolor=edge_color, linewidth=edge_lw)ax_right.hist(test_pred, bins=bins, range=(vmin, vmax), orientation='horizontal', color=test_color, alpha=0.65, edgecolor=edge_color, linewidth=edge_lw)x_plot = np.linspace(vmin, vmax, 200)y_plot = np.linspace(vmin, vmax, 200)bin_width = (vmax - vmin) / bins if vmax > vmin else 1.0for data, color in [(train_true, train_color), (test_true, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(x_plot)        ax_top.plot(x_plot, density * data.size * bin_width, color=color, linewidth=2.0, alpha=0.95)for data, color in [(train_pred, train_color), (test_pred, test_color)]:    if data.size > 1:        kde = gaussian_kde(np.asarray(data, dtype=float))        density = kde(y_plot)        ax_right.plot(density * data.size * bin_width, y_plot, color=color, linewidth=2.0, alpha=0.95)ax_top.axis('off')ax_right.axis('off')ax.grid(True, color='#d0d0d0', linewidth=0.8, alpha=0.8)ax.set_xlabel('Experimental Yield', fontsize=font_size + 4, fontweight='bold')ax.set_ylabel('Predicted Yield', fontsize=font_size + 4, fontweight='bold')ax.tick_params(axis='both', labelsize=font_size)ax.xaxis.set_major_locator(MaxNLocator(6))ax.yaxis.set_major_locator(MaxNLocator(6))ax.text(-0.14, 1.02, '(f)', transform=ax.transAxes, ha='left', va='bottom', fontsize=font_size + 2, fontweight='bold')# ax.text(0.5, 0.98, 'SVR', transform=ax.transAxes, ha='center', va='top', fontsize=font_size + 2, fontweight='bold')ax.text(    0.05,    0.88,    f"$R^2_{{test}}$={r2_test:.2f}\nRMSE$_{{test}}$={rmse_test:.2f}",    transform=ax.transAxes,    ha='left',    va='top',    fontsize=font_size - 1,    color='red')train_mask = np.isfinite(y_train_arr) & np.isfinite(y_pred_train_arr)test_mask = np.isfinite(y_test_arr) & np.isfinite(y_pred_test_arr)train_resid = y_pred_train_arr[train_mask] - y_train_arr[train_mask]test_resid = y_pred_test_arr[test_mask] - y_test_arr[test_mask]ax_res = inset_axes(    ax,    width="38%",    height="32%",    loc="lower right",    borderpad=0.0,    bbox_to_anchor=(-0.1, 0.15, 1.0, 1.0),    bbox_transform=ax.transAxes,)res_all = np.concatenate([train_resid, test_resid]) if (train_resid.size + test_resid.size) > 0 else np.asarray([], dtype=float)if res_all.size > 0:    rmax = float(np.nanmax(np.abs(res_all)))    rpad = rmax * 0.10 if rmax > 0 else 1.0    ax_res.set_xlim(-rmax - rpad, rmax + rpad)ax_res.axvline(0, color='k', linestyle='--', linewidth=1.0, alpha=0.85)train_resid_clean = np.asarray(train_resid[np.isfinite(train_resid)], dtype=float)test_resid_clean = np.asarray(test_resid[np.isfinite(test_resid)], dtype=float)bp = ax_res.boxplot(    [train_resid_clean, test_resid_clean],    vert=False,    patch_artist=True,    widths=0.55,    showfliers=True,    boxprops=dict(linewidth=edge_lw, color=edge_color),    medianprops=dict(linewidth=1.2, color='k'),    whiskerprops=dict(linewidth=edge_lw, color=edge_color),    capprops=dict(linewidth=edge_lw, color=edge_color),    flierprops=dict(marker='o', markersize=2.8, markerfacecolor='none', markeredgecolor=edge_color, alpha=0.6),)for patch, color, alpha in zip(bp['boxes'], [train_color, test_color], [0.35, 0.65]):    patch.set_facecolor(color)    patch.set_alpha(alpha)ax_res.set_yticks([])p_value = np.nansig_text = "n/a"if train_resid_clean.size >= 2 and test_resid_clean.size >= 2:    _, p_value = mannwhitneyu(train_resid_clean, test_resid_clean, alternative='two-sided')    if p_value < 0.001:        sig_text = "***"    elif p_value < 0.01:        sig_text = "**"    elif p_value < 0.05:        sig_text = "*"    else:        sig_text = "ns"x_min_res, x_max_res = ax_res.get_xlim()x_range_res = x_max_res - x_min_resx_bracket = x_max_res - x_range_res * 0.06cap = x_range_res * 0.025y1, y2 = 1, 2ax_res.set_ylim(0.4, 2.6)ax_res.plot([x_bracket, x_bracket], [y1, y2], color='k', linewidth=1.1)ax_res.plot([x_bracket - cap, x_bracket], [y1, y1], color='k', linewidth=1.1)ax_res.plot([x_bracket - cap, x_bracket], [y2, y2], color='k', linewidth=1.1)ax_res.text(x_bracket - cap * 1.15, (y1 + y2) / 2, sig_text, ha='right', va='center', fontsize=font_size - 3, color='k')ax_res.grid(True, axis='x', color='#e0e0e0', linewidth=0.6, alpha=0.7)ax_res.spines['left'].set_visible(False)ax_res.spines['top'].set_visible(False)ax_res.spines['right'].set_visible(False)ax_res.tick_params(axis='y', left=False, labelleft=False)ax_res.set_xlabel('Residual', fontsize=font_size - 3, fontweight='bold')ax_res.tick_params(axis='x', labelsize=font_size - 3)ax_res.xaxis.set_major_locator(MaxNLocator(5))ax_res.yaxis.set_major_locator(MaxNLocator(4))ax.legend(loc='upper right', frameon=True, fontsize=font_size - 1)plt.tight_layout()plt.savefig('scatter_plot7.png', dpi=300)plt.show()