当前位置：首页>python>如何用Python画出发酵微生物的24个样本图?

如何用Python画出发酵微生物的24个样本图?

2026-07-04 02:02:37

大家好，我是小居。

前两天有粉丝找到我，让我帮忙用python画出发酵微生物的24个样本图，并写一篇文章阐述。今天我把这个案例整理出来分享给大家，这里我就采用模拟数据来做，有需要的直接拿去用。

数据分析的第一步，永远是先把图画出来。

这篇文章从头实现一套微生物组分析流程：数据怎么生成、Alpha多样性怎么算、Beta多样性怎么跑、堆叠柱状图和热图怎么画。代码加起来还不到一百行，直接复制就能用。

一、数据生成

微生物组数据通常以物种×样本的矩阵形式存储，这里用log-normal分布模拟真实统计特征，生成27个物种在24个样本中的相对丰度：

import numpy as npimport pandas as pdfrom scipy.spatial.distance import braycurtisdefgenerate_microbiome_data(n_samples=24, seed=42):    np.random.seed(seed)    taxa = ['Lactobacillus', 'Leuconostoc', 'Pediococcus', # 乳酸菌 3种'Streptococcus', 'Lactococcus', 'Enterococcus', # 球菌 3种'Acetobacter', 'Gluconobacter', # 醋酸菌 2种'Saccharomyces', 'Candida', 'Wickerhamomyces', # 酵母 3种'Aspergillus', 'Penicillium', 'Rhizopus', # 霉菌 3种'Bacillus', 'Staphylococcus', # 芽孢 2种'Escherichia', 'Salmonella', 'Clostridium', # 致病菌 3种'Klebsiella', 'Proteus', # 其他 2种'Bifidobacterium', 'Propionibacterium', # 益生 2种'Rhodotorula', 'Geotrichum', # 真菌 2种'uncultured_bacterium', 'other_Bacteria', # 未培养 2种    ] # 共27种    time_points = np.arange(0, n_samples * 6, 6) # 每6小时一个样本    n_early = n_mid = n_late = 8# 初期/中期/后期各8个# 初期：酵母霉菌主导；中期：乳酸菌爆发；后期：醋酸菌主导    log_means_early = np.array([-1.5, -0.5, -1.0, # 乳酸菌（初期低）0.5, 1.0, 0.8, # 球菌1.2, 0.8, # 醋酸菌2.0, 2.5, 2.0, # 酵母（初期高）2.5, 2.0, 1.5, # 霉菌（初期高）-2.5, -2.0, # 芽孢（初期低）-2.0, -2.5, -3.0, # 致病菌-2.5, -2.0, # 其他-1.5, -1.0, # 益生1.0, 0.5, # 真菌-2.0, -3.0, # 未培养    ], dtype=float)    log_means_mid = np.array([2.0, 2.2, 1.8, # 乳酸菌（中期爆发）1.0, 1.5, 0.5, # 球菌0.5, 1.0, # 醋酸菌0.8, 1.2, 0.5, # 酵母（中期下降）0.5, 0.0, -0.5, # 霉菌-2.0, -1.5, # 芽孢-3.0, -2.5, -2.5, # 致病菌（被抑制）-2.0, -1.5, # 其他0.5, 0.8, # 益生（中期上升）-1.0, -0.5, # 真菌-2.5, -2.0, # 未培养    ], dtype=float)    log_means_late = np.array([1.5, 1.8, 1.2, # 乳酸菌（后期维持）0.5, 0.8, 0.0, # 球菌2.0, 2.5, # 醋酸菌（后期爆发）-0.5, 0.0, -1.0, # 酵母（后期低）-2.0, -2.5, -2.5, # 霉菌-3.0, -2.5, # 芽孢-3.5, -3.0, -3.0, # 致病菌（后期消失）-2.5, -2.0, # 其他1.0, 1.2, # 益生（后期稳定）-1.5, -1.0, # 真菌-3.0, -2.5, # 未培养    ], dtype=float)    abundance_matrix = np.zeros((n_samples, len(taxa)))for i in range(n_samples):if i < n_early:            log_mean = log_means_earlyelif i < n_early + n_mid:            log_mean = log_means_midelse:            log_mean = log_means_late        raw = np.random.lognormal(log_mean, sigma=1.2, size=len(taxa))        zeros = np.random.random(len(taxa)) < 0.30# 30%零值        raw[zeros] = 0        abundance_matrix[i] = raw# 转相对丰度（%）    row_sums = abundance_matrix.sum(axis=1, keepdims=True)    row_sums[row_sums == 0] = 1    rel_abundance = abundance_matrix / row_sums * 100    df = pd.DataFrame(rel_abundance, columns=taxa)    df.insert(0, 'SampleID', [f'S{i:02d}'for i in range(n_samples)])    df.insert(1, 'Time_h', time_points)    df.insert(2, 'Stage', ['初期']*8 + ['中期']*8 + ['后期']*8)return df, list(taxa)

df, taxa = generate_microbiome_data(n_samples=24)print(df.head()) # 前5行print(f"样本数: {len(df)}, 物种数: {len(taxa)}")

二、Alpha多样性:群落里物种多不多、均不均

最常用的四个指数，Shannon、Simpson、Observed、Pielou，一个函数搞定：

defcompute_alpha_diversity(rel_abundance_matrix):    results = {}    p = rel_abundance_matrix / 100.0    p[p == 0] = np.nan# Shannon: H = -sum(p_i * ln(p_i))    shannon = -np.nansum(p * np.log(p), axis=1)    results['Shannon'] = shannon# Simpson: D = 1 - sum(p_i^2)    simpson = 1 - np.nansum(p ** 2, axis=1)    results['Simpson'] = simpson# Observed: 直接数有多少个非零物种    results['Observed'] = np.sum(rel_abundance_matrix > 0, axis=1)# Pielou均匀度: J = H / ln(S)    S = np.sum(rel_abundance_matrix > 0, axis=1)    S[S == 0] = 1    results['Pielou'] = shannon / np.log(S)return pd.DataFrame(results)

用法：

alpha_div = compute_alpha_diversity(df[taxa].values)print(alpha_div.describe())#           Shannon Simpson Observed Pielou# mean 2.847314 0.910234 18.50 0.694321# std 0.312567 0.054321 2.10 0.054321

三、Beta多样性:两个样本之间有多像

用Bray-Curtis距离，这是微生物组分析里最常用的距离指标：

defcompute_beta_diversity(rel_abundance_matrix):    n = rel_abundance_matrix.shape[0]    bc_matrix = np.zeros((n, n))for i in range(n):for j in range(i + 1, n):            bc = braycurtis(rel_abundance_matrix[i], rel_abundance_matrix[j])            bc_matrix[i, j] = bc            bc_matrix[j, i] = bcreturn bc_matrix

跑一下：

beta_matrix = compute_beta_diversity(df[taxa].values)print(f"距离矩阵形状: {beta_matrix.shape}")print(f"初期vs后期平均距离: {beta_matrix[:8, 16:].mean():.3f}")

四、堆叠柱状图:物种组成随时间变化

选Top10优势物种，画堆叠柱状图，颜色按类群区分：

import matplotlib.pyplot as pltdefplot_taxa_barplot(df, taxa, save_path='taxa_barplot.png'):    top_taxa = df[taxa].mean().nlargest(10).index.tolist()    plot_df = df[['Time_h'] + top_taxa].set_index('Time_h')    fig, ax = plt.subplots(figsize=(16, 8))    colors = plt.cm.Set3(np.linspace(0, 1, len(top_taxa)))    plot_df.plot(kind='bar', stacked=True, ax=ax, color=colors,                 width=0.8, edgecolor='none')# 阶段分隔线for x in [7.5, 15.5]:        ax.axvline(x=x, color='red', linestyle='--', linewidth=1.5, alpha=0.7)    ax.set_xlabel('发酵时间 (小时)', fontsize=12)    ax.set_ylabel('相对丰度 (%)', fontsize=12)    ax.set_title('微生物群落组成随时间变化（Top 10 优势物种）', fontsize=14)    ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=9)    plt.tight_layout()    plt.savefig(save_path, dpi=150, bbox_inches='tight')    plt.show()return fig

五、热图:优势物种丰度一目了然

用seaborn画热图，直观对比物种丰度随时间的高低变化：

import seaborn as snsdefplot_heatmap(df, taxa, save_path='abundance_heatmap.png'):    top_taxa = df[taxa].mean().nlargest(15).index.tolist()    heat_data = df[top_taxa].T # 物种为行    fig, ax = plt.subplots(figsize=(14, 10))    sns.heatmap(heat_data,                cmap='YlOrRd',                xticklabels=[f'{t:.0f}h'for t in df['Time_h']],                cbar_kws={'label': '相对丰度 (%)'},                ax=ax)# 阶段分隔线    ax.axvline(x=8, color='white', linewidth=3)    ax.axvline(x=16, color='white', linewidth=3)    ax.set_title('优势物种丰度热图（Top 15）', fontsize=14)    ax.set_xlabel('发酵时间', fontsize=12)    ax.set_ylabel('物种', fontsize=12)    plt.tight_layout()    plt.savefig(save_path, dpi=150, bbox_inches='tight')    plt.show()return fig

六、Alpha多样性四图合一

四个指标画成2×2子图，带阶段背景色标注：

defplot_alpha_diversity(df, alpha_div, save_path='alpha_diversity.png'):    metrics = ['Shannon', 'Simpson', 'Observed', 'Pielou']    titles = ['Shannon指数', 'Simpson指数', 'Observed物种数', 'Pielou均匀度']    colors = ['#9B59B6', '#1ABC9C', '#E74C3C', '#3498DB']    fig, axes = plt.subplots(2, 2, figsize=(14, 10))for ax, metric, title, color in zip(axes.flat, metrics, titles, colors):        ax.plot(df['Time_h'], alpha_div[metric], 'o-', color=color, lw=2, ms=6)        ax.axvspan(0, 42, alpha=0.08, color='red') # 初期背景        ax.axvspan(42, 90, alpha=0.08, color='blue') # 中期背景        ax.axvspan(90, 138, alpha=0.08, color='green') # 后期背景        ax.set_xlabel('时间 (h)', fontsize=11)        ax.set_ylabel(metric, fontsize=11)        ax.set_title(title, fontsize=12, fontweight='bold')        ax.grid(True, alpha=0.3)    fig.suptitle('Alpha多样性分析', fontsize=14, fontweight='bold')    plt.tight_layout()    plt.savefig(save_path, dpi=150, bbox_inches='tight')    plt.show()return fig

七、Beta多样性PCoA降维

把距离矩阵用PCA降维到2维，画散点图，同阶段样本用同颜色并连线：

from sklearn.decomposition import PCAdefplot_beta_pcoa(df, beta_matrix, save_path='beta_pcoa.png'):    pca = PCA(n_components=2)    pc = pca.fit_transform(beta_matrix)    fig, ax = plt.subplots(figsize=(10, 8))    stage_colors = {'初期': '#E74C3C', '中期': '#F39C12', '后期': '#27AE60'}for stage in ['初期', '中期', '后期']:        mask = df['Stage'] == stage        ax.scatter(pc[mask, 0], pc[mask, 1], label=stage,                   s=150, alpha=0.8, edgecolors='black', linewidths=1.5,                   c=stage_colors[stage])# 同阶段连线        idx = np.where(mask)[0]for i in range(len(idx) - 1):            ax.plot(pc[idx[i:i+2], 0], pc[idx[i:i+2], 1],'--', alpha=0.4, c=stage_colors[stage])    ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)', fontsize=13)    ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)', fontsize=13)    ax.set_title('Beta多样性PCoA分析（Bray-Curtis距离）', fontsize=14)    ax.legend(fontsize=12)    ax.grid(True, alpha=0.3)    plt.tight_layout()    plt.savefig(save_path, dpi=150, bbox_inches='tight')    plt.show()return fig

一行命令跑全流程

把上面所有函数整合到一个类里，一行命令出7张图：

classFermentedFoodAnalyzer:def__init__(self):self.df, self.taxa = generate_microbiome_data()self.alpha_div = compute_alpha_diversity(self.df[self.taxa].values)self.beta_matrix = compute_beta_diversity(self.df[self.taxa].values)defrun_all(self):self.plot_taxa_barplot('图1_物种组成.png')self.plot_alpha_diversity('图2_Alpha多样性.png')self.plot_heatmap('图3_丰度热图.png')self.plot_beta_pcoa('图4_Beta多样性.png')        print('[OK] 4张图生成完毕')analyzer = FermentedFoodAnalyzer()analyzer.run_all()

跑完之后，当前目录生成图1到图4四个PNG文件，加上原始数据CSV，直接就能拿来用。

一共两篇文章，我们下篇说说差异检验怎么做、相关性网络怎么画，以及机器学习怎么帮我们锁定最关键的目标菌种。

喜欢小居的文章，点赞、关注、转发，点个“在看”不迷路~

有问题找小居，小居看到马上回！

完整源码获取：公众号回复「微生物分析」即可

本文来自网友投稿或网络内容，如有侵犯您的权益请联系我们删除，联系邮箱：wyl860211@qq.com 。

如何用Python画出发酵微生物的24个样本图?

最新文章

热门文章

随机文章

如何用Python画出发酵微生物的24个样本图?

发酵关键菌种靠猜?Python机器学习直接锁定目标菌

AI帮我解决Python环境问题

最新文章

热门文章

随机文章