下面是一个简单的测试脚本,用于验证 PyTorch 是否正确安装并能够使用 GPU:
#!/usr/bin/env python3
"""
PyTorch信息获取与测试脚本
获取PyTorch、CUDA、GPU等详细信息,并进行简单的功能测试
"""
import sys
import platform
import json
import time
import numpy as np
from datetime import datetime
import subprocess
import os
defsafe_getattr(obj, attr_name, default=None):
"""安全获取对象属性,如果属性不存在则返回默认值"""
try:
returngetattr(obj, attr_name, default)
except (AttributeError, TypeError):
return default
defget_system_info():
"""获取系统信息"""
info = {
"platform": platform.platform(),
"system": platform.system(),
"release": platform.release(),
"version": platform.version(),
"machine": platform.machine(),
"processor": platform.processor(),
"python_version": platform.python_version(),
"python_implementation": platform.python_implementation(),
"python_compiler": platform.python_compiler(),
}
return info
defget_nvidia_info():
"""获取NVIDIA驱动和CUDA信息"""
info = {
"driver_version": None,
"cuda_version": None,
"nvidia_smi_output": None
}
try:
# 尝试获取nvidia-smi输出
result = subprocess.run(['nvidia-smi'],
capture_output=True, text=True, timeout=5)
if result.returncode == 0:
info["nvidia_smi_output"] = result.stdout[:1000] # 只取前1000字符
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
pass
try:
# 尝试获取nvidia-smi版本
result = subprocess.run(['nvidia-smi', '--query-gpu=driver_version', '--format=csv,noheader'],
capture_output=True, text=True, timeout=5)
if result.returncode == 0:
info["driver_version"] = result.stdout.strip()
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
pass
# 检查CUDA安装
cuda_paths = [
r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA", # Windows
"/usr/local/cuda", # Linux/macOS
"/opt/cuda"# Alternative Linux
]
for path in cuda_paths:
if os.path.exists(path):
info["cuda_installed_path"] = path
# 尝试获取CUDA版本
version_file = os.path.join(path, "version.txt")
if os.path.exists(version_file):
withopen(version_file, 'r') as f:
info["cuda_version"] = f.read().strip()
break
return info
defget_torch_info():
"""获取PyTorch信息 - 修复版本"""
import torch
info = {
"torch_version": torch.__version__,
"torch_cuda_version": torch.version.cuda ifhasattr(torch.version, 'cuda') elseNone,
"torch_cudnn_version": torch.backends.cudnn.version() ifhasattr(torch.backends.cudnn, 'is_available') and torch.backends.cudnn.is_available() elseNone,
"cuda_available": torch.cuda.is_available(),
"cuda_device_count": torch.cuda.device_count() if torch.cuda.is_available() else0,
"devices": []
}
if torch.cuda.is_available():
for i inrange(torch.cuda.device_count()):
device_props = torch.cuda.get_device_properties(i)
capability = torch.cuda.get_device_capability(i)
# 使用安全的方式获取属性,兼容不同版本的PyTorch
device_info = {
"id": i,
"name": torch.cuda.get_device_name(i),
"compute_capability": f"{capability[0]}.{capability[1]}",
"total_memory_gb": device_props.total_memory / (1024**3),
"multi_processor_count": safe_getattr(device_props, 'multi_processor_count'),
# 不同版本的属性名称
"max_threads_per_block": safe_getattr(device_props, 'max_threads_per_block',
safe_getattr(device_props, 'max_threads_per_block_dim', None)),
# 尝试不同的属性名称
"max_threads_dim": safe_getattr(device_props, 'max_threads_dim',
safe_getattr(device_props, 'max_threads_per_block_dim', None)),
"max_grid_size": safe_getattr(device_props, 'max_grid_size',
safe_getattr(device_props, 'max_grid_dim', None)),
"shared_memory_per_block_kb": safe_getattr(device_props, 'shared_memory_per_block', 0) / 1024,
"warp_size": safe_getattr(device_props, 'warp_size', 32),
# 尝试获取时钟频率(不同版本可能有不同属性名)
"memory_clock_rate_mhz": None,
"memory_bus_width": safe_getattr(device_props, 'memory_bus_width'),
}
# 尝试不同的时钟频率属性名
for attr in ['memory_clock_rate', 'memoryClockRate', 'clock_rate']:
value = safe_getattr(device_props, attr)
if value:
device_info["memory_clock_rate_mhz"] = value / 1000
break
# 获取所有可用的属性(用于调试)
device_info["all_attributes"] = {}
for attr indir(device_props):
ifnot attr.startswith('_'):
try:
device_info["all_attributes"][attr] = getattr(device_props, attr)
except:
pass
info["devices"].append(device_info)
# 获取PyTorch配置信息
info["torch_config"] = {}
try:
info["torch_config"]["debug"] = torch.is_debug()
except:
info["torch_config"]["debug"] = "Unknown"
try:
info["torch_config"]["parallel_info"] = torch.__config__.parallel_info()
except:
info["torch_config"]["parallel_info"] = "Not available"
try:
info["torch_config"]["show_config"] = torch.__config__.show()
except:
info["torch_config"]["show_config"] = "Not available"
return info
deftest_basic_torch_operations():
"""测试基本的PyTorch操作"""
import torch
results = {
"tensor_operations": {},
"gpu_operations": {},
"autograd_test": {},
"neural_network_test": {}
}
# 1. 测试基本张量操作
print("测试基本张量操作...")
try:
# 创建张量
x = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
y = torch.tensor([[5, 6], [7, 8]], dtype=torch.float32)
# 基本运算
add_result = x + y
mul_result = x * y
matmul_result = torch.matmul(x, y)
results["tensor_operations"] = {
"addition": add_result.tolist(),
"multiplication": mul_result.tolist(),
"matrix_multiplication": matmul_result.tolist(),
"success": True
}
print("✓ 基本张量操作测试通过")
except Exception as e:
results["tensor_operations"] = {"success": False, "error": str(e)}
print(f"✗ 基本张量操作测试失败: {e}")
# 2. 测试GPU操作(如果可用)
if torch.cuda.is_available():
print("测试GPU操作...")
try:
# 创建GPU张量
device = torch.device('cuda:0')
x_gpu = x.to(device)
y_gpu = y.to(device)
# GPU上的矩阵乘法
gpu_result = torch.matmul(x_gpu, y_gpu)
# 同步确保计算完成
torch.cuda.synchronize()
# 测试CUDA事件计时
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
for _ inrange(100):
_ = torch.matmul(x_gpu, y_gpu)
end_event.record()
torch.cuda.synchronize()
elapsed_time = start_event.elapsed_time(end_event)
results["gpu_operations"] = {
"gpu_matrix_multiplication": gpu_result.cpu().tolist(),
"gpu_computation_time_ms": elapsed_time,
"success": True
}
print(f"✓ GPU操作测试通过 (100次矩阵乘法耗时: {elapsed_time:.2f}ms)")
except Exception as e:
results["gpu_operations"] = {"success": False, "error": str(e)}
print(f"✗ GPU操作测试失败: {e}")
else:
results["gpu_operations"] = {"success": False, "error": "CUDA不可用"}
print("⚠ GPU操作测试跳过 (CUDA不可用)")
# 3. 测试自动微分
print("测试自动微分...")
try:
x = torch.tensor([2.0], requires_grad=True)
y = x ** 3 + 2 * x ** 2 + 3 * x + 4
y.backward()
gradient = x.grad.item()
# 验证梯度正确性(手动计算导数)
expected_gradient = 3 * 2**2 + 4 * 2 + 3# 3x² + 4x + 3 在 x=2 处的值
results["autograd_test"] = {
"computed_gradient": gradient,
"expected_gradient": expected_gradient,
"gradient_match": abs(gradient - expected_gradient) < 1e-6,
"success": True
}
print(f"✓ 自动微分测试通过 (梯度: {gradient:.6f})")
except Exception as e:
results["autograd_test"] = {"success": False, "error": str(e)}
print(f"✗ 自动微分测试失败: {e}")
# 4. 测试神经网络模块
print("测试神经网络模块...")
try:
# 创建一个简单的神经网络
classSimpleNet(torch.nn.Module):
def__init__(self):
super(SimpleNet, self).__init__()
self.fc1 = torch.nn.Linear(10, 5)
self.relu = torch.nn.ReLU()
self.fc2 = torch.nn.Linear(5, 1)
self.sigmoid = torch.nn.Sigmoid()
defforward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.sigmoid(x)
return x
# 创建模型和测试数据
model = SimpleNet()
test_input = torch.randn(1, 10)
# 前向传播
output = model(test_input)
# 测试训练步骤
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()
target = torch.tensor([[0.5]])
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
results["neural_network_test"] = {
"model_output": output.item(),
"loss_value": loss.item(),
"parameters_count": sum(p.numel() for p in model.parameters()),
"success": True
}
print(f"✓ 神经网络测试通过 (输出: {output.item():.6f}, 损失: {loss.item():.6f})")
except Exception as e:
results["neural_network_test"] = {"success": False, "error": str(e)}
print(f"✗ 神经网络测试失败: {e}")
return results
deftest_performance():
"""增强版性能测试 - 多种计算类型和更大数据规模"""
import torch
results = {}
# 测试CPU性能
print("测试CPU性能...")
cpu_tests = {}
try:
# 基础矩阵乘法测试
size = 2000
a_cpu = torch.randn(size, size)
b_cpu = torch.randn(size, size)
start_time = time.time()
c_cpu = torch.matmul(a_cpu, b_cpu)
cpu_time = time.time() - start_time
cpu_tests["matrix_multiplication"] = {
"matrix_size": size,
"computation_time_s": cpu_time,
"flops_estimate": (2 * size**3) / cpu_time / 1e9
}
print(f"✓ CPU矩阵乘法: {cpu_time:.3f}s (约 {cpu_tests['matrix_multiplication']['flops_estimate']:.2f} GFLOPS)")
# 额外CPU测试:卷积运算
batch_size, channels, height, width = 4, 64, 128, 128
kernel_size = 3
input_cpu = torch.randn(batch_size, channels, height, width)
kernel_cpu = torch.randn(64, channels, kernel_size, kernel_size)
start_time = time.time()
output_cpu = torch.nn.functional.conv2d(input_cpu, kernel_cpu, padding=1)
conv_time = time.time() - start_time
cpu_tests["convolution"] = {
"input_shape": [batch_size, channels, height, width],
"computation_time_s": conv_time,
"operations": batch_size * channels * 64 * height * width * kernel_size * kernel_size
}
print(f"✓ CPU卷积运算: {conv_time:.3f}s")
results["cpu_performance"] = cpu_tests
except Exception as e:
results["cpu_performance"] = {"error": str(e)}
print(f"✗ CPU性能测试失败: {e}")
# 增强GPU性能测试(如果可用)
if torch.cuda.is_available():
print("\n增强GPU性能测试...")
gpu_tests = {}
try:
device = torch.device('cuda:0')
# 测试1:大规模矩阵乘法
print("测试1: 大规模矩阵乘法")
size = 4096# 4K x 4K 矩阵
a_gpu = torch.randn(size, size, device=device)
b_gpu = torch.randn(size, size, device=device)
# 预热
for _ inrange(5):
_ = torch.matmul(a_gpu, b_gpu)
torch.cuda.synchronize()
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
for _ inrange(10):
_ = torch.matmul(a_gpu, b_gpu)
end_event.record()
torch.cuda.synchronize()
gpu_time = start_event.elapsed_time(end_event) / 1000
gpu_tests["large_matrix_multiplication"] = {
"matrix_size": size,
"computation_time_s": gpu_time,
"flops_estimate": (2 * size**3 * 10) / gpu_time / 1e9,
"speedup_vs_cpu": cpu_tests.get("matrix_multiplication", {}).get("computation_time_s", 0) / (gpu_time / 10) if cpu_tests elseNone
}
print(f" ✓ 4K矩阵乘法: {gpu_time:.3f}s (约 {gpu_tests['large_matrix_multiplication']['flops_estimate']:.2f} GFLOPS)")
# 测试2:大规模卷积运算
print("测试2: 大规模卷积运算")
batch_size, channels, height, width = 16, 128, 256, 256
kernel_size = 3
input_gpu = torch.randn(batch_size, channels, height, width, device=device)
kernel_gpu = torch.randn(128, channels, kernel_size, kernel_size, device=device)
# 预热
for _ inrange(5):
_ = torch.nn.functional.conv2d(input_gpu, kernel_gpu, padding=1)
torch.cuda.synchronize()
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
for _ inrange(10):
_ = torch.nn.functional.conv2d(input_gpu, kernel_gpu, padding=1)
end_event.record()
torch.cuda.synchronize()
conv_gpu_time = start_event.elapsed_time(end_event) / 1000
gpu_tests["large_convolution"] = {
"input_shape": [batch_size, channels, height, width],
"computation_time_s": conv_gpu_time,
"operations": batch_size * channels * 128 * height * width * kernel_size * kernel_size * 10,
"speedup_vs_cpu": cpu_tests.get("convolution", {}).get("computation_time_s", 0) / (conv_gpu_time / 10) if cpu_tests elseNone
}
print(f" ✓ 大规模卷积: {conv_gpu_time:.3f}s")
# 测试3:张量运算(逐元素操作)
print("测试3: 张量逐元素运算")
tensor_size = [1024, 1024, 16] # 1GB+ 张量
x_gpu = torch.randn(*tensor_size, device=device)
y_gpu = torch.randn(*tensor_size, device=device)
torch.cuda.synchronize()
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
for _ inrange(100):
_ = x_gpu * y_gpu + torch.sin(x_gpu) - torch.exp(y_gpu)
end_event.record()
torch.cuda.synchronize()
elementwise_time = start_event.elapsed_time(end_event) / 1000
gpu_tests["elementwise_operations"] = {
"tensor_size": tensor_size,
"computation_time_s": elementwise_time,
"operations_per_second": 100 * 4 * torch.prod(torch.tensor(tensor_size)).item() / elementwise_time / 1e9
}
print(f" ✓ 逐元素运算: {elementwise_time:.3f}s")
# 测试4:内存带宽测试
print("测试4: 内存带宽测试")
memory_size = 1024 * 1024 * 1024# 1GB
data_gpu = torch.randn(memory_size // 4, device=device) # float32占4字节
torch.cuda.synchronize()
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
for _ inrange(10):
_ = data_gpu * 2.0# 简单的内存操作
end_event.record()
torch.cuda.synchronize()
memory_time = start_event.elapsed_time(end_event) / 1000
gpu_tests["memory_bandwidth"] = {
"memory_size_gb": memory_size / (1024**3),
"computation_time_s": memory_time,
"bandwidth_gbs": (memory_size * 10) / memory_time / (1024**3)
}
print(f" ✓ 内存带宽: {memory_time:.3f}s (约 {gpu_tests['memory_bandwidth']['bandwidth_gbs']:.1f} GB/s)")
# 测试5:多GPU测试(如果可用)
if torch.cuda.device_count() > 1:
print("测试5: 多GPU并行测试")
multi_gpu_results = {}
for i inrange(torch.cuda.device_count()):
device_i = torch.device(f'cuda:{i}')
size = 2048
a_multi = torch.randn(size, size, device=device_i)
b_multi = torch.randn(size, size, device=device_i)
torch.cuda.synchronize(device_i)
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
for _ inrange(5):
_ = torch.matmul(a_multi, b_multi)
end_event.record()
torch.cuda.synchronize(device_i)
multi_time = start_event.elapsed_time(end_event) / 1000
multi_gpu_results[f"gpu_{i}"] = {
"matrix_size": size,
"computation_time_s": multi_time,
"flops_estimate": (2 * size**3 * 5) / multi_time / 1e9
}
print(f" GPU {i}: {multi_time:.3f}s")
gpu_tests["multi_gpu"] = multi_gpu_results
results["gpu_performance"] = gpu_tests
# 总体性能摘要
if gpu_tests and cpu_tests:
avg_gpu_speedup = sum([
gpu_tests["large_matrix_multiplication"].get("speedup_vs_cpu", 0),
gpu_tests["large_convolution"].get("speedup_vs_cpu", 0)
]) / 2
print(f"\n✓ GPU平均加速比: {avg_gpu_speedup:.1f}x")
except Exception as e:
results["gpu_performance"] = {"error": str(e)}
print(f"✗ GPU性能测试失败: {e}")
return results
defprint_summary(info_dict):
"""打印摘要信息"""
print("\n" + "="*80)
print("PyTorch环境摘要")
print("="*80)
# 系统信息
sys_info = info_dict["system_info"]
print(f"系统: {sys_info['platform']}")
print(f"Python: {sys_info['python_version']} ({sys_info['python_implementation']})")
# NVIDIA信息
nvidia_info = info_dict["nvidia_info"]
if nvidia_info["driver_version"]:
print(f"NVIDIA驱动: {nvidia_info['driver_version']}")
if nvidia_info["cuda_version"]:
print(f"系统CUDA: {nvidia_info['cuda_version']}")
# PyTorch信息
torch_info = info_dict["torch_info"]
print(f"PyTorch版本: {torch_info['torch_version']}")
print(f"PyTorch CUDA版本: {torch_info['torch_cuda_version']}")
print(f"CUDA可用: {torch_info['cuda_available']}")
if torch_info["cuda_available"]:
print(f"GPU数量: {torch_info['cuda_device_count']}")
for i, device inenumerate(torch_info["devices"]):
print(f" GPU {i}: {device['name']}")
print(f" 计算能力: {device['compute_capability']}")
print(f" 显存: {device['total_memory_gb']:.2f} GB")
# 测试结果摘要
print("\n测试结果摘要:")
test_results = info_dict["test_results"]
for test_name, result in test_results.items():
ifisinstance(result, dict) and"success"in result:
status = "✓ 通过"if result["success"] else"✗ 失败"
print(f" {test_name.replace('_', ' ').title()}: {status}")
# 性能摘要
if"performance_test"in info_dict:
perf = info_dict["performance_test"]
if"cpu_performance"in perf and"computation_time_s"in perf["cpu_performance"]:
print(f"\nCPU性能: {perf['cpu_performance']['computation_time_s']:.3f}s")
if"gpu_performance"in perf and"computation_time_s"in perf["gpu_performance"]:
print(f"GPU性能: {perf['gpu_performance']['computation_time_s']:.3f}s")
if perf["gpu_performance"]["speedup_vs_cpu"]:
print(f"GPU加速比: {perf['gpu_performance']['speedup_vs_cpu']:.1f}x")
defmain():
"""主函数"""
print("正在收集PyTorch环境信息...")
print("-" * 80)
# 收集所有信息
all_info = {
"timestamp": datetime.now().isoformat(),
"script_version": "1.0.0",
"system_info": get_system_info(),
"nvidia_info": get_nvidia_info(),
}
# 检查是否安装了torch
try:
import torch
all_info["torch_info"] = get_torch_info()
# 运行测试
print("\n运行PyTorch功能测试...")
print("-" * 80)
all_info["test_results"] = test_basic_torch_operations()
print("\n运行性能测试...")
print("-" * 80)
all_info["performance_test"] = test_performance()
except ImportError:
print("错误: PyTorch未安装!")
print("请使用以下命令安装PyTorch:")
print(" pip install torch torchvision torchaudio")
all_info["torch_info"] = {"error": "PyTorch not installed"}
all_info["test_results"] = {"error": "PyTorch not installed"}
all_info["performance_test"] = {"error": "PyTorch not installed"}
# 打印摘要
print_summary(all_info)
main()
关注我,一起探索具身智能的无限可能。