import reimport hashlibimport requestsfrom pathlib import Pathfrom time import sleepimport os# ===== 修改为自己的工作目录 =====work_dir = Path("/path/to/your/cmip6_download_folder")os.chdir(work_dir)print(os.getcwd())# ===== 修改为自己的 ESGF wget 脚本路径 =====sh_file = work_dir / "CNRM-CM6-1.historical.r1i1p1f2.Amon.tauu.gr.sh"# ===== 数据保存目录 =====save_dir = work_dir# ===============================requests.packages.urllib3.disable_warnings()chunk_size = 1024 * 1024max_retry = 10# 读取 ESGF wget 脚本text = sh_file.read_text(encoding="utf-8", errors="ignore")# 提取文件名、下载链接、校验类型和校验值pattern = r"'([^']+\.nc)'\s+'(https://[^']+)'\s+'([^']+)'\s+'([^']+)'"files = re.findall(pattern, text)print(f"Found {len(files)} file(s)")def sha256sum(filename): """计算本地文件的 SHA256 值""" h = hashlib.sha256() with open(filename, "rb") as f: for block in iter(lambda: f.read(1024 * 1024), b""): h.update(block) return h.hexdigest()def is_netcdf_file(filename): """检查文件是否为 NetCDF 格式""" with open(filename, "rb") as f: head = f.read(8) return head.startswith(b"CDF") or head.startswith(b"\x89HDF")for fname, url, chk_type, chk_value in files: out = save_dir / fname tmp = save_dir / (fname + ".tmp") print("\n======================================") print(fname) print(url) # 删除旧文件,避免上一次错误下载影响本次结果 if out.exists(): print("Removing old file") out.unlink() if tmp.exists(): tmp.unlink() for attempt in range(1, max_retry + 1): try: print(f"\nAttempt {attempt}") with requests.get( url, stream=True, timeout=(30, 300), verify=False, allow_redirects=True ) as r: print("HTTP status:", r.status_code) print("Content-Length:", r.headers.get("Content-Length")) if r.status_code != 200: raise RuntimeError(f"HTTP status {r.status_code}") total = 0 with open(tmp, "wb") as f: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: f.write(chunk) total += len(chunk) # 每下载约 10 MB 输出一次进度 if total % (10 * 1024 * 1024) < chunk_size: print(f"Downloaded: {total/1024**2:.1f} MB") print(f"Downloaded size: {total/1024**2:.2f} MB") # 检查是否为 NetCDF 文件 if not is_netcdf_file(tmp): print("Not NetCDF file, removing") tmp.unlink() raise RuntimeError("Invalid file") # 下载完成后改为正式文件名 tmp.rename(out) # SHA256 校验 if chk_type.lower() == "sha256": print("Checking SHA256...") local_hash = sha256sum(out) if local_hash.lower() == chk_value.lower(): print("SHA256 OK") break else: print("SHA256 FAILED") print("local :", local_hash) print("remote:", chk_value) out.unlink() raise RuntimeError("Checksum mismatch") break except Exception as e: print("Error:", e) sleep(15) else: print("FAILED after all retries")