简介
EOF(经验正交函数)和REOF(旋转经验正交函数)是气象与海洋数据分析中常用的时空降维技术。EOF侧重于提取最大的方差贡献,其特征向量呈全局分布,而REOF是对EOF的载荷向量进行旋转(常用Varimax法),使区域性特征更清晰、物理意义更明确,并解决EOF的区域重叠问题。 本文介绍如何使用Python实现REOF分析,并以CMAP降水数据为例尝试复现一篇文章的图1a(https://doi.org/10.1007/s00376-022-2079-1)。注意!!代码主要使用了AI的计算过程,仅供参考!
代码
import numpy as npimport xarray as xrimport matplotlib.pyplot as pltimport cartopy.crs as ccrsimport cartopy.feature as cfeature# from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatterfrom sklearn.preprocessing import StandardScalerimport warningsimport osimport cmapsfrom cartopy.io.shapereader import Reader as shpreaderwarnings.filterwarnings('ignore')class REOF:""" REOF (旋转经验正交函数) 基于EOF + VARIMAX旋转 """ def __init__(self, n_components=None, rotate=True, max_iter=1000, tol=1e-6): self.n_components = n_components self.rotate = rotate self.max_iter = max_iter self.tol = tol def fit(self, data): self.original_data = data.copy() self.original_shape = data.shape# 标准化 self.scaler = StandardScaler() data_std = self.scaler.fit_transform(data.T).T# PCA from sklearn.decomposition import PCA self.pca = PCA(n_components=self.n_components) scores = self.pca.fit_transform(data_std) self.eofs = self.pca.components_.T self.explained_variance = self.pca.explained_variance_ self.explained_variance_ratio = self.pca.explained_variance_ratio_if self.rotate and self.n_components and self.n_components > 1: self._varimax_rotation(scores)else: self.rotated_eofs = self.eofs self.rotated_pcs = scores self.rotation_matrix = Nonereturn self def _varimax_rotation(self, scores): loadings = self.eofs * np.sqrt(self.explained_variance) n_spatial, n_modes = loadings.shape h2 = np.sum(loadings ** 2, axis=1, keepdims=True) h2_inv_sqrt = 1.0 / np.sqrt(h2 + 1e-10) loadings_norm = loadings * h2_inv_sqrt rotation_matrix = np.eye(n_modes)for iteration in range(self.max_iter): rotated_loadings_norm = loadings_norm @ rotation_matrix u = rotated_loadings_norm ** 3 - (1 / n_modes) * rotated_loadings_norm * np.sum( rotated_loadings_norm ** 2, axis=1, keepdims=True) B = loadings_norm.T @ u U, s, Vt = np.linalg.svd(B) new_rotation = U @ Vtif np.sum(np.abs(new_rotation - rotation_matrix)) < self.tol:break rotation_matrix = new_rotation self.rotation_matrix = rotation_matrix rotated_loadings = loadings @ rotation_matrix self.rotated_eofs = rotated_loadings / np.sqrt(self.explained_variance) self.rotated_pcs = scores @ rotation_matrix.T total_variance = np.sum(self.explained_variance) rotated_variance = np.sum(rotated_loadings ** 2, axis=0) self.rotated_explained_variance = rotated_variance self.rotated_explained_variance_ratio = rotated_variance / total_variance def get_eofs(self, rotated=True):return self.rotated_eofs if rotated else self.eofs def get_pcs(self, rotated=True):if rotated:return self.rotated_pcselse:return self.pca.transform(self.scaler.transform(self.original_data.T).T)def read_cmap_data(file_path, start_year=1981, end_year=2020):""" 读取CMAP降水数据,提取中国东部区域,计算6-7月平均 """ ds = xr.open_dataset(file_path)# 查找降水变量if'precip'in ds.variables: precip = ds['precip']elif'pre'in ds.variables: precip = ds['pre']else:for var in ds.variables:if'precip'in var.lower() or 'pre'in var.lower(): precip = ds[var]break# 选择中国东部区域 china_east_lat = slice( 52, 18) china_east_lon = slice(95, 135) precip_east = precip.sel(lat=china_east_lat, lon=china_east_lon)# 选择6-7月数据 summer_data = precip_east.sel(time=precip_east.time.dt.month.isin([6, 7]))# 计算6-7月平均 summer_mean = summer_data.groupby('time.year').mean('time')# 选择指定年份范围 summer_mean = summer_mean.sel(year=slice(start_year, end_year))return summer_meandef prepare_data_for_reof(da):""" 准备数据用于REOF分析 """# 转换为 (时间, 空间点) 的二维数组 ntime, nlat, nlon = da.shape data_flat = da.values.reshape(ntime, -1)# 去除含有NaN的格点 mask = ~np.isnan(data_flat).any(axis=0) data_clean = data_flat[:, mask]# 保存原始网格信息用于绘图 lon2d, lat2d = np.meshgrid(da.lon.values, da.lat.values) lon_flat = lon2d.flatten()[mask] lat_flat = lat2d.flatten()[mask]# 进行标准化 scaler = StandardScaler() data_std = scaler.fit_transform(data_clean.T).Treturn data_std, lon_flat, lat_flat, da.lat.values, da.lon.values, maskdef plot_reof_mode1(reofs, var_ratio, lat, lon, mask, mode=0,):""" 绘制REOF第一模态 """# 创建完整的网格(包括无效格点) nlat, nlon = len(lat), len(lon) full_grid = np.full((nlat, nlon), np.nan) full_grid.flat[mask] = reofs[:, mode] fig = plt.figure(figsize=(12, 10)) ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())# 绘制REOF模态 im = ax.pcolormesh(lon, lat, full_grid, cmap=cmaps.precip_diff_12lev, transform=ccrs.PlateCarree(), shading='auto', vmin=-0.2, vmax=0.2)# 添加海岸线 ax.add_feature(cfeature.COASTLINE, linewidth=0.8)# 添加长江流域框 yrb_lat_range = (27, 31) yrb_lon_range = (107, 120)# 绘制长江流域框 ax.plot([yrb_lon_range[0], yrb_lon_range[1], yrb_lon_range[1], yrb_lon_range[0], yrb_lon_range[0]], [yrb_lat_range[0], yrb_lat_range[0], yrb_lat_range[1], yrb_lat_range[1], yrb_lat_range[0]], color='black', linewidth=3.5, transform=ccrs.PlateCarree()) shp_path = r'cnmap.shp' ct = shpreader(shp_path).geometries() ax.add_geometries(ct, ccrs.PlateCarree(), facecolor='none', edgecolor='grey', linewidth=2, zorder=6) shp_path2 = r'river.shp' ct2 = shpreader(shp_path2).geometries() ax.add_geometries(ct2, ccrs.PlateCarree(), facecolor='none', edgecolor='b', linewidth=4, zorder=7)# 设置地图范围 ax.set_extent([100, 125, 20, 50])# 添加经纬度网格线 gl = ax.gridlines(draw_labels=True, linewidth=0.5, color='gray', alpha=0.5, linestyle='--') gl.top_labels = False gl.right_labels = False# 添加colorbar cbar = plt.colorbar(im, ax=ax, orientation='vertical', pad=0.05, aspect=40, shrink=0.8)# 添加标题 ax.set_title(f'REOF1 {var_ratio[0]:.1f}% ',fontsize=20) plt.tight_layout() plt.savefig('reof.png', dpi=600, bbox_inches='tight') plt.show()return figdef main():""" 主函数 """# 设置保存路径 save_dir = "D:\reof"# 读取CMAP数据 data_path = "precip.mon.mean.nc" precip_da = read_cmap_data(data_path, start_year=1981, end_year=2020) data_clean, lon_flat, lat_flat, lat, lon, mask = prepare_data_for_reof(precip_da)# 3. 执行REOF分析(旋转前10个模态) n_modes_to_rotate = 10 reof_analyzer = REOF(n_components=n_modes_to_rotate, rotate=True) reof_analyzer.fit(data_clean) reofs = reof_analyzer.get_eofs(rotated=True) rpcs = reof_analyzer.get_pcs(rotated=True) reof_var_ratio = reof_analyzer.rotated_explained_variance_ratio * 100return precip_da, reof_analyzer, reof_var_ratioif __name__ == "__main__": precip_da, reof_analyzer, reof_var_ratio = main()
结果
结果显示,夏季中国东部地区REOF第一模态解释方差为9.6%(原文7.5%),其以长江流域显著的降水为主,说明长江流域是东亚夏季风的主要雨带!