当前位置：首页>python>对比QGIS与Python GPU栅格计算

对比QGIS与Python GPU栅格计算

2026-06-28 21:46:24

前几天做了一下土地覆被二级分类转一级分类，操作就是把二级类的像元值的十位数提取出来，作为一级类的数值，这个可以在QGIS中完成，但是转换后的数据数据量极大，在ChatGPT的帮助下，我做了个Python GPU的计算，计算结果非常理想，计算速度快，数据量很小。

QGIS重分类

QGIS的重分类是我经常使用的工具，可以完成土地覆被重分类工作，但是经实验发现，QGIS转换速度很慢，转换后的文件数据量巨大，全国30米土地覆被一幅高大20GB，转换效果差。

Python设置代码如下：

processing.run("native:reclassifybytable", {'INPUT_RASTER':'G:/Geodata/CASLUCC/1980s.tif','RASTER_BAND':1,'TABLE':['10','12','1','20','24','2','30','33','3','40','46','4','50','53','5','60','67','6','90','99','9'],'NO_DATA':-9999,'RANGE_BOUNDARIES':0,'NODATA_FOR_MISSING':False,'DATA_TYPE':0,'OUTPUT':'I:/LUCC_CAS/L1_1980s.tif'})

Python CPU栅格计算

我用的服务器是至强E5双路CPU，256GB内存，运行下面的程序，居然显示内存不足了，ChatGPT表示并行计算建议将分块设置的小一点，下面代码默认是1024，我估计256可能会好一些吧，计算速度极慢，等了好半天，不想等了，突然想起GPU也是可以计算的嘛，干嘛不试试GPU计算，于是就有了后面第三部分的代码。

import osimport numpy as npimport rasteriofrom concurrent.futures import ProcessPoolExecutorfrom tqdm import tqdm  # 导入tqdm库# Define input and output directoriesinput_dir = r'G:\Geodata\CASLUCC'output_dir = r'I:\LUCC_CAS'# Create output directory if it doesn't existos.makedirs(output_dir, exist_ok=True)# Get list of all .tif files in the input directorytif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]# Function to process each raster filedef process_raster(input_path, output_path):    with rasterio.open(input_path) as src:# Read metadata        meta = src.meta.copy()# Initialize the processed data array        processed_data = np.zeros((src.height, src.width), dtype=np.uint8)# Define block size (you can adjust this based on available memory)        block_size = 1024  # 建议改256，减小块减小内存使用# Calculate total number of blocks        num_blocks_x = (src.width + block_size - 1) // block_size  # Horizontal blocks        num_blocks_y = (src.height + block_size - 1) // block_size  # Vertical blocks# Process the raster in blocks to reduce memory usagefor i in tqdm(range(0, src.height, block_size), desc=f'Processing {os.path.basename(input_path)}', total=num_blocks_y):for j in range(0, src.width, block_size):                window = rasterio.windows.Window(j, i, block_size, block_size)                data = src.read(1, window=window)# Create a mask for valid data (values between 11 and 99)                mask = (data >= 11) & (data <= 99)# Process data (compute tens digit)                processed_data[i:i+block_size, j:j+block_size] = np.where(mask, data // 10, 0)# Update metadata for output        meta.update(dtype=rasterio.uint8, compress='LZW', tiled=True, bigtiff='YES')# Write the processed data to the output file with high compression        with rasterio.open(output_path, 'w', **meta) as dst:            dst.write(processed_data, 1)# Use ProcessPoolExecutor to process each .tif file in paralleldef parallel_processing():    with ProcessPoolExecutor() as executor:# Map the process_raster function to each file in the list, and track progress with tqdm        futures = [            executor.submit(process_raster, os.path.join(input_dir, tif_file), os.path.join(output_dir, tif_file))for tif_file in tif_files        ]# Use tqdm to track the overall progress of the futuresfor future in tqdm(futures, desc="Overall progress", total=len(futures)):            future.result()  # This will raise exceptions if there were any errors during processing# Run the parallel processingif __name__ == "__main__":    parallel_processing()print("All files processed successfully.")

Python GPU栅格计算

这个效果最好，推荐使用，我是用的RTX3060显卡，12GB显存，计算速度非常快，一幅全国30米的影像2分半就可以算完，效率很高，而且处理结果数据量很小，只要400MB，非常理想。

要说唯一的不足，就是这个GPU计算的环境配置非常麻烦。两点要注意：

CUDA环境不要安装太高版本的，11.3版本比较好
使用conda forge来安装cupy,参考参考文献1

Conda安装cupy代码：

conda install -c conda-forge cupy

GPU栅格计算代码：

分块处理，block大小为1024
输出数据使用LZW压缩，INT8，NODATA值设为0

# -*- coding: utf-8 -*-"""Created on Mon Dec 30 22:06:12 2024@author: Xu Yang"""import osimport cupy as cpimport rasteriofrom tqdm import tqdmimport numpy as np# Define input and output directories定义输入和输出路径input_dir = r'G:\Geodata\CASLUCC'output_dir = r'I:\LUCC_CAS'# Create output directory if it doesn't existos.makedirs(output_dir, exist_ok=True)# Get list of all .tif files in the input directory获取所有的tif文件tif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]tif_files = tif_files[6:10]# Function to process each raster file using GPU (with memory-efficient approach)def process_raster(input_path, output_path):    with rasterio.open(input_path) as src:# Read metadata        meta = src.meta.copy()# Initialize the processed data array (on CPU)        processed_data = np.zeros((src.height, src.width), dtype=np.uint8)# Define block size (you can adjust this based on available memory)        block_size = 1024  # You can modify this to a smaller block size if needed# Calculate total number of blocks        num_blocks_x = (src.width + block_size - 1) // block_size  # Horizontal blocks        num_blocks_y = (src.height + block_size - 1) // block_size  # Vertical blocks# Process the raster in blocks to reduce memory usagefor i in tqdm(range(0, src.height, block_size), desc=f'Processing {os.path.basename(input_path)}', total=num_blocks_y):for j in range(0, src.width, block_size):                window = rasterio.windows.Window(j, i, block_size, block_size)                data = src.read(1, window=window)# Transfer data to GPU using CuPy                data_gpu = cp.asarray(data)  # Transfer data to GPU using CuPy# Create a mask for valid data (values between 11 and 99)                mask = (data_gpu >= 11) & (data_gpu <= 99)# Compute the tens digit (L1 level) on the GPU                processed_data_gpu = cp.zeros_like(data_gpu, dtype=cp.uint8)                processed_data_gpu[mask] = data_gpu[mask] // 10# Transfer the result back to CPU (host memory) for writing                processed_data[i:i+block_size, j:j+block_size] = cp.asnumpy(processed_data_gpu)  # Transfer to NumPy array# Update metadata for output        meta.update(dtype=rasterio.uint8, nodata = 0, compress='LZW', tiled=True, bigtiff='YES')# Write the processed data to the output file with high compression        with rasterio.open(output_path, 'w', **meta) as dst:            dst.write(processed_data, 1)# Use tqdm to show progress during parallel processing显示进度条def parallel_processing():for tif_file in tqdm(tif_files, desc="Processing files"):        input_path = os.path.join(input_dir, tif_file)        output_path = os.path.join(output_dir, tif_file)        process_raster(input_path, output_path)# Run the parallel processing并行处理if __name__ == "__main__":    parallel_processing()print("All files processed successfully.")