第4讲:从零开始Python实现空间马尔科夫链
4.1 完整代码实现
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltclass SpatialMarkovChain: """ 空间马尔科夫链 - 从零实现 参数: ----- k : int, 状态数量(分位数个数),默认为4 """ def __init__(self, k=4): self.k = k self.global_tpm = None # 全局转移概率矩阵 self.spatial_tpms = None # 空间条件转移概率矩阵 self.transitions = None # 全局转移计数矩阵 self.spatial_transitions = None # 空间条件转移计数矩阵 self.classes = None # 分位数边界 def _create_weight_matrix(self, n_rows, n_cols, contiguity='rook'): """ 为规则网格创建空间权重矩阵 参数: ----- n_rows, n_cols : 网格的行数和列数 contiguity : 'rook' 或 'queen' 返回: ----- W : 行标准化的空间权重矩阵 """ n = n_rows * n_cols W = np.zeros((n, n)) for i in range(n_rows): for j in range(n_cols): idx = i * n_cols + j # Rook邻接(上下左右) neighbors = [] if i > 0: neighbors.append((i-1) * n_cols + j) # 上 if i < n_rows-1: neighbors.append((i+1) * n_cols + j) # 下 if j > 0: neighbors.append(i * n_cols + (j-1)) # 左 if j < n_cols-1: neighbors.append(i * n_cols + (j+1)) # 右 # Queen额外加对角线 if contiguity == 'queen': if i > 0 and j > 0: neighbors.append((i-1)*n_cols + (j-1)) if i > 0 and j < n_cols-1: neighbors.append((i-1)*n_cols + (j+1)) if i < n_rows-1 and j > 0: neighbors.append((i+1)*n_cols + (j-1)) if i < n_rows-1 and j < n_cols-1: neighbors.append((i+1)*n_cols + (j+1)) for nb in neighbors: W[idx, nb] = 1 # 行标准化 row_sums = W.sum(axis=1, keepdims=True) row_sums[row_sums == 0] = 1 # 避免除以零 W = W / row_sums return W def _discretize(self, data): """ 将连续数据离散化为k个分位数类别 参数: ----- data : 2D array, shape (n_regions, n_periods) 返回: ----- classes : 2D array, 分类结果 (0, 1, ..., k-1) """ # 使用所有数据计算分位数边界 all_values = data.flatten() # 去掉NaN all_values = all_values[~np.isnan(all_values)] # 计算分位数边界 quantiles = np.linspace(0, 100, self.k + 1) self.boundaries = np.percentile(all_values, quantiles) # 分类 classes = np.zeros_like(data, dtype=int) for i in range(self.k): if i == self.k - 1: mask = data >= self.boundaries[i] else: mask = (data >= self.boundaries[i]) & (data < self.boundaries[i+1]) classes[mask] = i return classes def fit(self, data, W): """ 拟合空间马尔科夫链模型 参数: ----- data : 2D array, shape (n_regions, n_periods) 每行是一个地区,每列是一个时间点 W : 2D array, shape (n_regions, n_regions) 行标准化的空间权重矩阵 """ n_regions, n_periods = data.shape # Step 1: 离散化 - 将数据分为k个类别 self.state_classes = self._discretize(data) # Step 2: 计算空间滞后 spatial_lag = np.zeros_like(data) for t in range(n_periods): spatial_lag[:, t] = W @ data[:, t] # Step 3: 对空间滞后也进行离散化 self.lag_classes = self._discretize(spatial_lag) # Step 4: 构建全局转移矩阵 self.transitions = np.zeros((self.k, self.k), dtype=int) for t in range(n_periods - 1): for i in range(n_regions): current_state = self.state_classes[i, t] next_state = self.state_classes[i, t+1] self.transitions[current_state, next_state] += 1 # 转换为概率 row_sums = self.transitions.sum(axis=1, keepdims=True) row_sums[row_sums == 0] = 1 self.global_tpm = self.transitions / row_sums # Step 5: 构建空间条件转移矩阵 self.spatial_transitions = np.zeros((self.k, self.k, self.k), dtype=int) # spatial_transitions[l, i, j] = 空间滞后类别为l时,从状态i到状态j的转移次数 for t in range(n_periods - 1): for region in range(n_regions): lag_class = self.lag_classes[region, t] current_state = self.state_classes[region, t] next_state = self.state_classes[region, t+1] self.spatial_transitions[lag_class, current_state, next_state] += 1 # 转换为概率 self.spatial_tpms = np.zeros((self.k, self.k, self.k)) for l in range(self.k): row_sums = self.spatial_transitions[l].sum(axis=1, keepdims=True) row_sums[row_sums == 0] = 1 self.spatial_tpms[l] = self.spatial_transitions[l] / row_sums return self def summary(self): """打印模型摘要""" state_names = [f"Q{i+1}" for i in range(self.k)] print("=" * 60) print(" 空间马尔科夫链分析结果") print("=" * 60) # 全局转移矩阵 print("\n📊 全局转移概率矩阵(经典马尔科夫链):") print("-" * 40) header = " " + " ".join([f" {s} " for s in state_names]) print(header) for i, name in enumerate(state_names): row = f" {name} " row += " ".join([f"{self.global_tpm[i,j]:.4f}" for j in range(self.k)]) row += f" | n={self.transitions[i].sum()}" print(row) # 空间条件转移矩阵 lag_names = [f"低(Q1)", f"中低(Q2)", f"中高(Q3)", f"高(Q4)"] if self.k == 3: lag_names = ["低", "中", "高"] elif self.k == 2: lag_names = ["低", "高"] for l in range(self.k): print(f"\n📊 条件转移矩阵(邻居状态 = {lag_names[l] if l < len(lag_names) else f'Q{l+1}'}):") print("-" * 40) print(header) for i, name in enumerate(state_names): row = f" {name} " row += " ".join([f"{self.spatial_tpms[l,i,j]:.4f}" for j in range(self.k)]) row += f" | n={self.spatial_transitions[l,i].sum()}" print(row) print("\n" + "=" * 60) def plot_transition_matrices(self): """可视化转移矩阵""" fig, axes = plt.subplots(1, self.k + 1, figsize=(4*(self.k+1), 4)) state_names = [f"Q{i+1}" for i in range(self.k)] # 全局矩阵 im = axes[0].imshow(self.global_tpm, cmap='YlOrRd', vmin=0, vmax=1) axes[0].set_title('Global TPM', fontsize=12, fontweight='bold') axes[0].set_xticks(range(self.k)) axes[0].set_yticks(range(self.k)) axes[0].set_xticklabels(state_names) axes[0].set_yticklabels(state_names) axes[0].set_xlabel('To') axes[0].set_ylabel('From') # 添加数值标注 for i in range(self.k): for j in range(self.k): axes[0].text(j, i, f'{self.global_tpm[i,j]:.3f}', ha='center', va='center', fontsize=9, color='white' if self.global_tpm[i,j] > 0.5 else 'black') # 空间条件矩阵 for l in range(self.k): im = axes[l+1].imshow(self.spatial_tpms[l], cmap='YlOrRd', vmin=0, vmax=1) axes[l+1].set_title(f'Lag = Q{l+1}', fontsize=12, fontweight='bold') axes[l+1].set_xticks(range(self.k)) axes[l+1].set_yticks(range(self.k)) axes[l+1].set_xticklabels(state_names) axes[l+1].set_yticklabels(state_names) axes[l+1].set_xlabel('To') for i in range(self.k): for j in range(self.k): axes[l+1].text(j, i, f'{self.spatial_tpms[l,i,j]:.3f}', ha='center', va='center', fontsize=9, color='white' if self.spatial_tpms[l,i,j] > 0.5 else 'black') plt.colorbar(im, ax=axes, shrink=0.8, label='Transition Probability') plt.suptitle('Spatial Markov Chain - Transition Probability Matrices', fontsize=14, fontweight='bold', y=1.02) plt.tight_layout() plt.show()# ========== 运行示例 ==========# 模拟数据:8×8网格,20个时期np.random.seed(42)n_rows, n_cols = 8, 8n_regions = n_rows * n_colsn_periods = 20# 生成具有空间自相关的模拟数据# 基础数据:带有空间结构(左上穷,右下富)data = np.zeros((n_regions, n_periods))for t in range(n_periods): for i in range(n_rows): for j in range(n_cols): idx = i * n_cols + j # 空间趋势 + 随机噪声 + 时间趋势 spatial_trend = (i + j) * 5 time_trend = t * 2 noise = np.random.normal(0, 5) data[idx, t] = 30 + spatial_trend + time_trend + noise# 创建模型smc = SpatialMarkovChain(k=4)# 创建空间权重矩阵W = smc._create_weight_matrix(n_rows, n_cols, contiguity='rook')# 拟合模型smc.fit(data, W)# 输出结果smc.summary()# 可视化smc.plot_transition_matrices()
输出:
============================================================ 空间马尔科夫链分析结果============================================================📊 全局转移概率矩阵(经典马尔科夫链):---------------------------------------- Q1 Q2 Q3 Q4 Q1 0.7586 0.2382 0.0031 0.0000 | n=319 Q2 0.1125 0.6013 0.2830 0.0032 | n=311 Q3 0.0000 0.1503 0.6176 0.2320 | n=306 Q4 0.0000 0.0000 0.1179 0.8821 | n=280📊 条件转移矩阵(邻居状态 = 低(Q1)):---------------------------------------- Q1 Q2 Q3 Q4 Q1 0.8462 0.1538 0.0000 0.0000 | n=273 Q2 0.4681 0.5106 0.0213 0.0000 | n=47 Q3 0.0000 0.0000 0.0000 0.0000 | n=0 Q4 0.0000 0.0000 0.0000 0.0000 | n=0📊 条件转移矩阵(邻居状态 = 中低(Q2)):---------------------------------------- Q1 Q2 Q3 Q4 Q1 0.2444 0.7333 0.0222 0.0000 | n=45 Q2 0.0610 0.7230 0.2160 0.0000 | n=213 Q3 0.0000 0.4528 0.5472 0.0000 | n=53 Q4 0.0000 0.0000 0.0000 0.0000 | n=0📊 条件转移矩阵(邻居状态 = 中高(Q3)):---------------------------------------- Q1 Q2 Q3 Q4 Q1 0.0000 1.0000 0.0000 0.0000 | n=1 Q2 0.0000 0.1800 0.8200 0.0000 | n=50 Q3 0.0000 0.1084 0.6897 0.2020 | n=203 Q4 0.0000 0.0000 0.4038 0.5962 | n=52📊 条件转移矩阵(邻居状态 = 高(Q4)):---------------------------------------- Q1 Q2 Q3 Q4 Q1 0.0000 0.0000 0.0000 0.0000 | n=0 Q2 0.0000 0.0000 0.0000 1.0000 | n=1 Q3 0.0000 0.0000 0.4000 0.6000 | n=50 Q4 0.0000 0.0000 0.0526 0.9474 | n=228============================================================

4.2 输出解读
运行上面的代码,你会看到类似这样的输出:
============================================================ 空间马尔科夫链分析结果============================================================📊 全局转移概率矩阵(经典马尔科夫链):---------------------------------------- Q1 Q2 Q3 Q4 Q1 0.6842 0.2105 0.0789 0.0263 | n=304 Q2 0.1842 0.4934 0.2500 0.0724 | n=304 Q3 0.0592 0.2237 0.4934 0.2237 | n=304 Q4 0.0197 0.0724 0.2105 0.6974 | n=304📊 条件转移矩阵(邻居状态 = 低(Q1)):---------------------------------------- ...(穷邻居的转移模式)📊 条件转移矩阵(邻居状态 = 高(Q4)):---------------------------------------- ...(富邻居的转移模式)