🌍 在遥感地学分析的实际应用中,探究环境因子对生态系统指标(如蒸散发、植被指数等)的贡献量时,往往面临多因子共线性干扰、像元尺度计算量大以及物理意义难以定量表达等挑战。
✅ 本文介绍一套基于 Python 的完整岭回归(Ridge Regression)地学分析工作流:首先对多源遥感变量(如降水、气温、植被指数等)进行像元尺度的归一化处理,消除量级差异;接着利用岭回归系数结合时间序列趋势分析,定量分解各因子的相对贡献率与绝对贡献量;最后实现结果的批量空间可视化输出。
👇 该方法实现了从数据预处理、模型参数寻优到贡献量制图的全流程自动化,代码结构清晰,能够为解释区域环境变化驱动机制提供科学支撑。
完整可复制代码Ridge_Regression_Code.zip
点击关注上方“翔的学术日记”,选择加"星标"置顶重磅干货,第一时间送达


贡献分量 (n_c):计算为回归系数 a 与因子归一化趋势 X_s_trend 的乘积。它代表了单个因子对因变量变化倾向的拉动力度。
相对贡献 (n_rc):反映该因子在所有影响因子中的贡献占比。
绝对贡献 (n_ac):将归一化空间中的贡献还原回具有物理单位的实际趋势中


# -*- coding: utf-8 -*-# Packagesimport osimport reimport numpy as npimport rasterioimport pandas as pdfrom scipy import statsclass RidgeTrend(object):"""Ridge Regression for Remote Sensing with Absolute and Relative Contribution analysis.This class performs pixel-wise regression and exports results to GeoTIFF format."""def __init__(self, data_dir, nodata=-99999):"""Initialize the RidgeTrend tool.:param data_dir: Directory containing subfolders for each variable.:param nodata: Value used for masking and output NoData."""self.data_dir = data_dirself.nodata = nodataself.profile = Nonedef _load_var_stack(self, var_path, pattern=r'\d{4}'):"""Internal helper to load temporal TIF stacks into a 3D numpy array (T, H, W)."""files = sorted([f for f in os.listdir(var_path) if f.endswith(".tif")],key=lambda x: int(re.findall(r"\d+", re.search(pattern, x).group())[0]))stack = []for f in files:with rasterio.open(os.path.join(var_path, f)) as src:arr = src.read(1).astype(float)if src.nodata is not None:arr[arr == src.nodata] = np.nanstack.append(arr)if self.profile is None:# Save profile from the first file to use for output GeoTIFFsself.profile = src.profile.copy()return np.stack(stack)def _calculate_trend(self, data_1d):"""Calculates the linear slope (trend) of a 1D time series."""y = data_1d.flatten()x = np.arange(len(y))mask = ~np.isnan(y)if np.sum(mask) < 2: return 0slope, _, _, _, _ = stats.linregress(x[mask], y[mask])return slopedef _normalize(self, data_1d):"""Min-Max Normalization: Xm = (x - min) / (max - min)."""d_min, d_max = np.nanmin(data_1d), np.nanmax(data_1d)if d_max == d_min: return np.zeros_like(data_1d)return (data_1d - d_min) / (d_max - d_min)def RR(self, y_var_name, pattern=r'\d{4}', k_best=2):"""Main Ridge Regression function.:param y_var_name: Folder name of the dependent variable (e.g., 'ET').:param k_best: Regularization parameter (Lambda).:return: Dictionary containing 'summary' (DataFrame) and 'maps' (2D Arrays)."""......def save_results_to_tif(self, results, output_dir):"""Exports all result maps to GeoTIFF and summary to CSV."""if not os.path.exists(output_dir):os.makedirs(output_dir)# Update metadata profile for outputout_profile = self.profile.copy()out_profile.update({"driver": "GTiff","dtype": 'float32',"count": 1,"nodata": self.nodata})# Save each mapfor name, data in results['maps'].items():out_path = os.path.join(output_dir, f"{name}.tif")out_data = data.astype(np.float32)out_data[np.isnan(out_data)] = self.nodatawith rasterio.open(out_path, "w", **out_profile) as dst:dst.write(out_data, 1)# Save summary tableresults['summary'].to_csv(os.path.join(output_dir, "summary.csv"), index=False)print(f"Results successfully saved to {output_dir}")
import numpy as npimport matplotlib.pyplot as pltimport geosis.rdr as rdr
data_dir = r'E:\公众号\岭回归\data\vars'
# 创建数据集:data_dir 是所有变量文件夹的上一级文件夹inst = rdr.RidgeTrend(data_dir)# y_var_name 指的是因变脸名称,pattern 指的是每个变量中# 变量名称和时间名称的关系(例如‘\d{4}’代表变量名+年份# k_best 是岭回归正则化参数(需通过交叉验证等方法确定最优值)RidgeRe = inst.RR(y_var_name = 'SOT', pattern=r'\d{4}', k_best=2)
RidgeRe.get('summary')
maps = RidgeRe.get('maps')# 可视化im = plt.imshow(maps['r2'], cmap='RdYlGn')plt.colorbar(im)

inst.save_results_to_tif(RidgeRe, output_dir=r'E:\公众号\岭回归\Result')
请在微信客户端打开