1. NumPy简介
什么是NumPy?
NumPy(Numerical Python)是Python中用于科学计算的基础库,提供高性能的多维数组对象(ndarray)以及处理这些数组的工具。它是几乎所有Python科学计算库的基础,如Pandas、SciPy、Matplotlib等。
NumPy的核心特性
# numpy_features.pyimport numpy as npnumpy_features = { "ndarray": "多维数组对象,存储同类型数据", "广播": "不同形状数组之间的算术运算", "向量化操作": "对整个数组进行操作,无需循环", "线性代数": "矩阵运算、特征值、SVD等", "傅里叶变换": "信号处理功能", "随机数生成": "各种概率分布的随机数", "与C/C++集成": "高性能数值计算"}print("NumPy核心特性:")for feature, description in numpy_features.items(): print(f" • {feature}: {description}")
NumPy安装
# 基础安装pip install numpy# 使用condaconda install numpy# 安装特定版本pip install numpy==1.24.0# 开发版本pip install numpy --upgrade
2. NumPy数组基础
创建NumPy数组
# array_creation.pyimport numpy as npprint("=== NumPy数组创建 ===")# 1. 从Python列表创建print("\n1. 从Python列表创建:")list_data = [1, 2, 3, 4, 5]arr1 = np.array(list_data)print(f" 列表: {list_data}")print(f" NumPy数组: {arr1}")print(f" 类型: {type(arr1)}")print(f" 数组类型: {arr1.dtype}")# 2. 创建多维数组print("\n2. 创建多维数组:")arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])print(f" 2D数组:\n{arr2d}")print(f" 形状: {arr2d.shape}")# 3. 特殊数组创建函数print("\n3. 特殊数组创建函数:")# 全零数组zeros = np.zeros((3, 4))print(f" 全零数组(3x4):\n{zeros}")# 全一数组ones = np.ones((2, 3))print(f" 全一数组(2x3):\n{ones}")# 单位矩阵eye = np.eye(3)print(f" 单位矩阵(3x3):\n{eye}")# 对角矩阵diag = np.diag([1, 2, 3])print(f" 对角矩阵:\n{diag}")# 4. 序列数组print("\n4. 序列数组:")# arange (类似range)arr_range = np.arange(10) # 0到9print(f" arange(10): {arr_range}")arr_range_step = np.arange(0, 10, 2) # 0到9,步长为2print(f" arange(0, 10, 2): {arr_range_step}")# linspace (等间距)arr_linspace = np.linspace(0, 1, 5) # 0到1之间5个点print(f" linspace(0, 1, 5): {arr_linspace}")# 5. 随机数组print("\n5. 随机数组:")np.random.seed(42) # 设置随机种子,保证结果可重复# 均匀分布 [0, 1)rand_uniform = np.random.rand(3, 3)print(f" 均匀分布(3x3):\n{rand_uniform}")# 标准正态分布rand_normal = np.random.randn(3, 3)print(f" 标准正态分布(3x3):\n{rand_normal}")# 随机整数rand_int = np.random.randint(0, 10, size=(3, 3))print(f" 随机整数[0,10)(3x3):\n{rand_int}")# 6. 空数组(包含未初始化数据)print("\n6. 空数组:")empty_arr = np.empty((2, 3))print(f" 空数组(2x3):\n{empty_arr}")# 7. 指定数据类型的数组print("\n7. 指定数据类型的数组:")arr_float = np.array([1, 2, 3], dtype=np.float64)print(f" 浮点数组: {arr_float}, 类型: {arr_float.dtype}")arr_int32 = np.array([1, 2, 3], dtype=np.int32)print(f" 32位整数数组: {arr_int32}, 类型: {arr_int32.dtype}")arr_complex = np.array([1, 2, 3], dtype=np.complex128)print(f" 复数数组: {arr_complex}, 类型: {arr_complex.dtype}")
数组属性
# array_properties.pyimport numpy as np# 创建一个示例数组arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])print("=== NumPy数组属性 ===\n")print(f"数组:\n{arr}\n")# 基本属性print("1. 基本属性:")print(f" ndim (维度): {arr.ndim}")print(f" shape (形状): {arr.shape}")print(f" size (元素总数): {arr.size}")print(f" dtype (数据类型): {arr.dtype}")print(f" itemsize (每个元素字节数): {arr.itemsize} 字节")print(f" nbytes (总字节数): {arr.nbytes} 字节")print(f" flags (内存信息):")# 标志信息flags_info = { 'C_CONTIGUOUS': 'C顺序连续', 'F_CONTIGUOUS': 'Fortran顺序连续', 'OWNDATA': '拥有数据', 'WRITEABLE': '可写', 'ALIGNED': '内存对齐', 'WRITEBACKIFCOPY': '如果需要则写回', 'UPDATEIFCOPY': '如果副本则更新'}for flag, description in flags_info.items(): if hasattr(arr.flags, flag.lower()): value = getattr(arr.flags, flag.lower()) print(f" {description}: {value}")# 数据类型转换print("\n2. 数据类型转换:")arr_float = arr.astype(np.float32)print(f" 转换为float32:\n{arr_float}")print(f" 数据类型: {arr_float.dtype}")# 改变形状(不改变数据)print("\n3. 形状改变:")reshaped = arr.reshape(4, 3)print(f" 重塑为(4,3):\n{reshaped}")# 展平数组print("\n4. 展平数组:")flattened = arr.flatten() # 返回拷贝print(f" flatten() (拷贝): {flattened}")raveled = arr.ravel() # 可能返回视图print(f" ravel() (视图): {raveled}")# 转置print("\n5. 转置:")transposed = arr.Tprint(f" 转置:\n{transposed}")print(f" 形状: {transposed.shape}")# 内存布局print("\n6. 内存布局:")print(f" C顺序 (行优先): {np.ones((3,3), order='C').flags['C_CONTIGUOUS']}")print(f" F顺序 (列优先): {np.ones((3,3), order='F').flags['F_CONTIGUOUS']}")
3. 数组索引和切片
基本索引和切片
# array_indexing.pyimport numpy as npprint("=== NumPy数组索引和切片 ===\n")# 创建示例数组arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])print(f"一维数组: {arr}\n")print("1. 一维数组索引和切片:")# 基本索引print(f" arr[0] = {arr[0]}")print(f" arr[-1] = {arr[-1]}")print(f" arr[5] = {arr[5]}")# 切片 [start:stop:step]print("\n 切片操作:")print(f" arr[2:5] = {arr[2:5]}") # 索引2到4print(f" arr[:5] = {arr[:5]}") # 开始到索引4print(f" arr[5:] = {arr[5:]}") # 索引5到结束print(f" arr[::2] = {arr[::2]}") # 每隔一个print(f" arr[1::2] = {arr[1::2]}") # 从索引1开始每隔一个print(f" arr[::-1] = {arr[::-1]}") # 反转数组print("\n2. 多维数组索引和切片:")arr2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])print(f" 2D数组:\n{arr2d}\n")# 二维索引print(f" arr2d[0, 0] = {arr2d[0, 0]}") # 第一行第一列print(f" arr2d[1, 2] = {arr2d[1, 2]}") # 第二行第三列print(f" arr2d[-1, -1] = {arr2d[-1, -1]}") # 最后一行最后一列# 行和列切片print("\n 行和列切片:")print(f" 第一行: {arr2d[0, :]}")print(f" 第二列: {arr2d[:, 1]}")print(f" 前两行: \n{arr2d[:2, :]}")print(f" 后两列: \n{arr2d[:, -2:]}")# 子矩阵print("\n 子矩阵:")print(f" 中间2x2子矩阵: \n{arr2d[0:2, 1:3]}")print("\n3. 三维数组索引:")arr3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]])print(f" 3D数组形状: {arr3d.shape}")print(f" arr3d[0, 1, 0] = {arr3d[0, 1, 0]}")print(f" 第一个矩阵: \n{arr3d[0]}")print(f" 所有矩阵的第一行: \n{arr3d[:, 0, :]}")print("\n4. 布尔索引:")arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])# 创建布尔掩码mask = arr > 5print(f" 数组: {arr}")print(f" 掩码(arr > 5): {mask}")print(f" arr[mask] = {arr[mask]}")# 使用条件直接索引print(f" arr[arr % 2 == 0] = {arr[arr % 2 == 0]}") # 偶数print(f" arr[(arr > 3) & (arr < 8)] = {arr[(arr > 3) & (arr < 8)]}")print("\n5. 花式索引 (Fancy Indexing):")arr = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])# 使用整数数组索引indices = [0, 2, 4, 6]print(f" 数组: {arr}")print(f" 索引列表: {indices}")print(f" arr[indices] = {arr[indices]}")# 使用NumPy数组索引idx_arr = np.array([1, 3, 5])print(f" arr[idx_arr] = {arr[idx_arr]}")print("\n6. 多维花式索引:")arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])# 同时索引行和列rows = [0, 2, 3]cols = [1, 2, 0]print(f" 2D数组:\n{arr2d}")print(f" 行索引: {rows}")print(f" 列索引: {cols}")print(f" arr2d[rows, cols] = {arr2d[rows, cols]}")print("\n7. 视图 vs 副本:")arr = np.array([1, 2, 3, 4, 5])print(f" 原始数组: {arr}")# 切片创建视图(共享数据)arr_view = arr[1:4]arr_view[0] = 99print(f" 切片视图修改后: {arr}")# 重置数组arr = np.array([1, 2, 3, 4, 5])# 使用copy()创建副本arr_copy = arr[1:4].copy()arr_copy[0] = 99print(f" 副本修改后: {arr} (原始数组不变)")
4. 数组操作
数组运算
# array_operations.pyimport numpy as npprint("=== NumPy数组运算 ===\n")# 创建示例数组a = np.array([1, 2, 3, 4])b = np.array([5, 6, 7, 8])print(f"数组a: {a}")print(f"数组b: {b}\n")print("1. 算术运算:")print(f" a + b = {a + b}")print(f" a - b = {a - b}")print(f" a * b = {a * b}")print(f" a / b = {a / b}")print(f" a ** 2 = {a ** 2}")print(f" a % 3 = {a % 3}")print("\n2. 比较运算:")print(f" a > 2 = {a > 2}")print(f" a == b = {a == b}")print(f" a != b = {a != b}")print("\n3. 矩阵运算:")m1 = np.array([[1, 2], [3, 4]])m2 = np.array([[5, 6], [7, 8]])print(f" 矩阵m1:\n{m1}")print(f" 矩阵m2:\n{m2}")# 矩阵乘法print(f"\n 矩阵乘法(np.dot):\n{np.dot(m1, m2)}")print(f" 矩阵乘法(@运算符):\n{m1 @ m2}")print(f" 逐元素乘法:\n{m1 * m2}")# 转置print(f"\n 转置:")print(f" m1.T = \n{m1.T}")print("\n4. 广播机制:")print(" 广播允许不同形状的数组进行运算")# 数组与标量print(f" a + 10 = {a + 10}")print(f" a * 2 = {a * 2}")# 不同形状的数组arr1 = np.array([1, 2, 3])arr2 = np.array([[10], [20], [30]])print(f"\n 数组1(形状{arr1.shape}): {arr1}")print(f" 数组2(形状{arr2.shape}):\n{arr2}")print(f" 广播加法:\n{arr1 + arr2}")# 更多广播例子matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])row_vector = np.array([10, 20, 30])col_vector = np.array([[10], [20], [30]])print(f"\n 矩阵(3x3):\n{matrix}")print(f" 行向量: {row_vector}")print(f" 矩阵 + 行向量:\n{matrix + row_vector}")print(f"\n 列向量:\n{col_vector}")print(f" 矩阵 + 列向量:\n{matrix + col_vector}")print("\n5. 通用函数(ufunc):")print(" 通用函数是对数组元素进行元素级运算的函数")arr = np.array([0, np.pi/2, np.pi, 3*np.pi/2])print(f"\n 数组: {arr}")# 数学函数print(f" sin(arr) = {np.sin(arr)}")print(f" cos(arr) = {np.cos(arr)}")print(f" exp(arr) = {np.exp(arr)}")print(f" log(arr + 1) = {np.log(arr + 1)}")print(f" sqrt(arr) = {np.sqrt(arr)}")# 聚合函数print("\n6. 聚合运算:")arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])print(f" 数组: {arr}")print(f" sum() = {np.sum(arr)}")print(f" mean() = {np.mean(arr)}")print(f" std() = {np.std(arr)}")print(f" var() = {np.var(arr)}")print(f" min() = {np.min(arr)}")print(f" max() = {np.max(arr)}")print(f" argmin() = {np.argmin(arr)} (最小值的索引)")print(f" argmax() = {np.argmax(arr)} (最大值的索引)")print(f" cumsum() = {np.cumsum(arr)} (累积和)")print(f" cumprod() = {np.cumprod(arr)} (累积积)")print("\n7. 多维数组聚合:")arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])print(f" 2D数组:\n{arr2d}")print(f" 整体求和: {np.sum(arr2d)}")print(f" 按列求和(axis=0): {np.sum(arr2d, axis=0)}")print(f" 按行求和(axis=1): {np.sum(arr2d, axis=1)}")print(f" 每列平均值: {np.mean(arr2d, axis=0)}")print(f" 每行最大值: {np.max(arr2d, axis=1)}")
数组操作函数
# array_manipulation.pyimport numpy as npprint("=== NumPy数组操作函数 ===\n")print("1. 形状操作:")arr = np.arange(12)print(f" 原始数组: {arr}")# reshapereshaped = arr.reshape(3, 4)print(f" 重塑为3x4:\n{reshaped}")# resizeresized = np.resize(arr, (3, 5)) # 可以改变大小,重复或截断数据print(f" 调整为3x5:\n{resized}")# flatten和ravelprint(f"\n flatten() (总是返回拷贝): {arr.flatten()}")print(f" ravel() (可能返回视图): {arr.ravel()}")print("\n2. 转置操作:")arr2d = np.array([[1, 2, 3], [4, 5, 6]])print(f" 原始数组:\n{arr2d}")print(f" 转置:\n{arr2d.T}")print(f" 换轴(3D数组):")arr3d = np.arange(24).reshape(2, 3, 4)print(f" 3D数组形状: {arr3d.shape}")print(f" 交换轴0和1: {np.transpose(arr3d, (1, 0, 2)).shape}")print(f" 交换轴0和2: {np.transpose(arr3d, (2, 1, 0)).shape}")print("\n3. 连接数组:")a = np.array([1, 2, 3])b = np.array([4, 5, 6])print(f" 数组a: {a}")print(f" 数组b: {b}")# 水平连接print(f" 水平连接(concatenate): {np.concatenate([a, b])}")print(f" 水平连接(hstack): {np.hstack([a, b])}")# 垂直连接(需要是2D)a2d = a.reshape(1, -1)b2d = b.reshape(1, -1)print(f" 垂直连接(vstack):\n{np.vstack([a2d, b2d])}")# 列堆叠print(f" 列堆叠(column_stack):\n{np.column_stack([a, b])}")print("\n4. 分割数组:")arr = np.arange(12).reshape(3, 4)print(f" 数组:\n{arr}")# 水平分割split_h = np.hsplit(arr, 2)print(f" 水平分割为2部分:")for i, part in enumerate(split_h): print(f" 部分{i+1}:\n{part}")# 垂直分割split_v = np.vsplit(arr, 3)print(f" 垂直分割为3部分:")for i, part in enumerate(split_v): print(f" 部分{i+1}:\n{part}")print("\n5. 添加和删除元素:")arr = np.array([1, 2, 3, 4, 5])# 添加元素appended = np.append(arr, [6, 7])print(f" 添加元素: {appended}")# 插入元素inserted = np.insert(arr, 2, [99, 100]) # 在索引2处插入print(f" 插入元素: {inserted}")# 删除元素deleted = np.delete(arr, [1, 3]) # 删除索引1和3的元素print(f" 删除元素: {deleted}")print("\n6. 排序:")arr = np.array([3, 1, 4, 1, 5, 9, 2, 6, 5])print(f" 原始数组: {arr}")# 排序sorted_arr = np.sort(arr)print(f" 排序后: {sorted_arr}")# 返回排序索引sort_indices = np.argsort(arr)print(f" 排序索引: {sort_indices}")print(f" 使用索引获取排序数组: {arr[sort_indices]}")# 原地排序arr_copy = arr.copy()arr_copy.sort()print(f" 原地排序: {arr_copy}")print("\n7. 搜索:")arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])# 查找满足条件的元素索引indices = np.where(arr > 5)print(f" 大于5的索引: {indices}")print(f" 大于5的元素: {arr[indices]}")# 查找最大值/最小值索引print(f" 最大值索引: {np.argmax(arr)}")print(f" 最小值索引: {np.argmin(arr)}")# 非零元素索引arr_with_zeros = np.array([0, 1, 0, 3, 0, 5])nonzero_indices = np.nonzero(arr_with_zeros)print(f" 非零元素索引: {nonzero_indices}")print(f" 非零元素: {arr_with_zeros[nonzero_indices]}")print("\n8. 集合操作:")arr1 = np.array([1, 2, 3, 4, 5])arr2 = np.array([4, 5, 6, 7, 8])print(f" 数组1: {arr1}")print(f" 数组2: {arr2}")print(f" 并集: {np.union1d(arr1, arr2)}")print(f" 交集: {np.intersect1d(arr1, arr2)}")print(f" 差集(arr1 - arr2): {np.setdiff1d(arr1, arr2)}")print(f" 对称差集: {np.setxor1d(arr1, arr2)}")print(f" 是否在arr2中: {np.in1d(arr1, arr2)}")
5. 线性代数
# linear_algebra.pyimport numpy as npprint("=== NumPy线性代数 ===\n")print("1. 矩阵运算:")A = np.array([[1, 2], [3, 4]])B = np.array([[5, 6], [7, 8]])print(f" 矩阵A:\n{A}")print(f" 矩阵B:\n{B}")# 矩阵乘法print(f"\n 矩阵乘法:")print(f" np.dot(A, B):\n{np.dot(A, B)}")print(f" A @ B:\n{A @ B}")# 矩阵转置print(f"\n 转置:")print(f" A.T:\n{A.T}")# 矩阵求逆print(f"\n 矩阵求逆:")try: A_inv = np.linalg.inv(A) print(f" A的逆矩阵:\n{A_inv}") print(f" A @ A_inv (验证):\n{A @ A_inv}")except np.linalg.LinAlgError: print(" 矩阵不可逆")print("\n2. 线性方程组求解:")# 求解 Ax = bA = np.array([[3, 1], [1, 2]])b = np.array([9, 8])print(f" 系数矩阵A:\n{A}")print(f" 常数向量b: {b}")x = np.linalg.solve(A, b)print(f" 解x: {x}")print(f" 验证A @ x: {A @ x}")print("\n3. 特征值和特征向量:")A = np.array([[4, -2], [1, 1]])eigenvalues, eigenvectors = np.linalg.eig(A)print(f" 矩阵A:\n{A}")print(f" 特征值: {eigenvalues}")print(f" 特征向量:\n{eigenvectors}")# 验证: A * v = λ * vfor i in range(len(eigenvalues)): v = eigenvectors[:, i] λ = eigenvalues[i] print(f"\n 验证特征值{i+1}:") print(f" A @ v: {A @ v}") print(f" λ * v: {λ * v}") print(f" 是否相等: {np.allclose(A @ v, λ * v)}")print("\n4. 行列式:")A = np.array([[1, 2], [3, 4]])det_A = np.linalg.det(A)print(f" 矩阵A:\n{A}")print(f" 行列式det(A): {det_A}")print("\n5. 矩阵的迹:")trace_A = np.trace(A)print(f" 矩阵A的迹: {trace_A}")print("\n6. 矩阵分解:")# QR分解A = np.array([[12, -51, 4], [6, 167, -68], [-4, 24, -41]])print(f" 矩阵A:\n{A}")Q, R = np.linalg.qr(A)print(f"\n QR分解:")print(f" Q矩阵:\n{Q}")print(f" R矩阵:\n{R}")print(f" 验证Q @ R:\n{Q @ R}")# SVD分解print(f"\n SVD分解:")U, S, Vt = np.linalg.svd(A)print(f" U矩阵:\n{U}")print(f" 奇异值: {S}")print(f" V转置矩阵:\n{Vt}")# 从奇异值重建对角矩阵Sigma = np.zeros(A.shape)Sigma[:len(S), :len(S)] = np.diag(S)print(f" Σ矩阵:\n{Sigma}")print(f" 验证U @ Σ @ Vt:\n{U @ Sigma @ Vt}")print("\n7. 范数:")x = np.array([1, 2, 3, 4])print(f" 向量x: {x}")print(f" L1范数: {np.linalg.norm(x, ord=1)}")print(f" L2范数: {np.linalg.norm(x, ord=2)}")print(f" 无穷范数: {np.linalg.norm(x, ord=np.inf)}")A = np.array([[1, 2], [3, 4]])print(f"\n 矩阵A:\n{A}")print(f" 弗罗贝尼乌斯范数: {np.linalg.norm(A, 'fro')}")
6. 随机数生成
# random_numbers.pyimport numpy as npimport matplotlib.pyplot as pltprint("=== NumPy随机数生成 ===\n")# 设置随机种子,保证结果可重复np.random.seed(42)print("1. 基本随机数:")# 均匀分布 [0, 1)uniform_random = np.random.rand(5)print(f" 均匀分布[0,1): {uniform_random}")# 标准正态分布normal_random = np.random.randn(5)print(f" 标准正态分布: {normal_random}")# 随机整数random_int = np.random.randint(0, 10, size=5)print(f" 随机整数[0,10): {random_int}")print("\n2. 随机抽样:")arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])# 随机选择choices = np.random.choice(arr, size=5, replace=False)print(f" 从数组随机选择(无放回): {choices}")# 带权重的随机选择weights = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] # 均匀权重weighted_choices = np.random.choice(arr, size=5, p=weights)print(f" 带权重随机选择: {weighted_choices}")print("\n3. 随机排列:")# 打乱数组arr_copy = arr.copy()np.random.shuffle(arr_copy)print(f" 打乱数组: {arr_copy}")# 生成随机排列permutation = np.random.permutation(arr)print(f" 随机排列: {permutation}")print("\n4. 各种概率分布:")# 正态分布(指定均值和标准差)normal = np.random.normal(loc=0, scale=1, size=1000)print(f" 正态分布(均值=0, 标准差=1): 样本数=1000")# 均匀分布(指定范围)uniform = np.random.uniform(low=0, high=10, size=1000)print(f" 均匀分布[0,10): 样本数=1000")# 二项分布binomial = np.random.binomial(n=10, p=0.5, size=1000)print(f" 二项分布(n=10, p=0.5): 样本数=1000")# 泊松分布poisson = np.random.poisson(lam=5, size=1000)print(f" 泊松分布(λ=5): 样本数=1000")# 指数分布exponential = np.random.exponential(scale=1.0, size=1000)print(f" 指数分布(scale=1.0): 样本数=1000")# 可视化(可选)def plot_distributions(): """绘制各种分布""" fig, axes = plt.subplots(2, 3, figsize=(15, 10)) # 正态分布 axes[0, 0].hist(normal, bins=30, alpha=0.7) axes[0, 0].set_title('正态分布 N(0,1)') # 均匀分布 axes[0, 1].hist(uniform, bins=30, alpha=0.7) axes[0, 1].set_title('均匀分布 U(0,10)') # 二项分布 axes[0, 2].hist(binomial, bins=range(0, 12), alpha=0.7, align='left', rwidth=0.8) axes[0, 2].set_title('二项分布 B(10,0.5)') # 泊松分布 axes[1, 0].hist(poisson, bins=range(0, 15), alpha=0.7, align='left', rwidth=0.8) axes[1, 0].set_title('泊松分布 P(5)') # 指数分布 axes[1, 1].hist(exponential, bins=30, alpha=0.7) axes[1, 1].set_title('指数分布 Exp(1)') # 移除空子图 fig.delaxes(axes[1, 2]) plt.tight_layout() plt.show()# 如果需要可视化,取消注释下面的代码# plot_distributions()print("\n5. 随机种子:")# 设置随机种子np.random.seed(123)rand1 = np.random.rand(3)print(f" 种子123的随机数: {rand1}")np.random.seed(123)rand2 = np.random.rand(3)print(f" 相同种子123的随机数: {rand2}")print(f" 是否相同: {np.array_equal(rand1, rand2)}")print("\n6. 随机状态对象:")# 创建随机状态对象rng = np.random.RandomState(42)random_state_numbers = rng.rand(3)print(f" 随机状态对象的随机数: {random_state_numbers}")# 使用默认随机生成器default_rng = np.random.default_rng(42)modern_random = default_rng.random(3)print(f" 现代随机生成器的随机数: {modern_random}")
7. 文件输入输出
# file_io.pyimport numpy as npimport osprint("=== NumPy文件输入输出 ===\n")# 创建示例数据arr1 = np.array([1, 2, 3, 4, 5])arr2 = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])arr3 = np.arange(12).reshape(3, 4)print("1. 二进制文件(.npy, .npz):")# 保存单个数组到.npy文件np.save('array1.npy', arr1)print(f" 保存数组到 array1.npy")# 加载.npy文件loaded_arr1 = np.load('array1.npy')print(f" 从 array1.npy 加载: {loaded_arr1}")# 保存多个数组到.npz文件np.savez('arrays.npz', arr1=arr1, arr2=arr2, arr3=arr3)print(f"\n 保存多个数组到 arrays.npz")# 加载.npz文件loaded_npz = np.load('arrays.npz')print(f" 从 arrays.npz 加载:")print(f" arr1: {loaded_npz['arr1']}")print(f" arr2:\n{loaded_npz['arr2']}")print(f" arr3:\n{loaded_npz['arr3']}")print("\n2. 文本文件:")# 保存到文本文件np.savetxt('array.txt', arr3, fmt='%d', delimiter=',')print(f" 保存数组到 array.txt (CSV格式)")# 从文本文件加载loaded_txt = np.loadtxt('array.txt', delimiter=',')print(f" 从 array.txt 加载:\n{loaded_txt}")# 有标题行的CSV文件header = "Col1,Col2,Col3,Col4"data = arr3np.savetxt('array_with_header.csv', data, delimiter=',', header=header, fmt='%d')print(f"\n 保存带标题的CSV文件")# 跳过标题行加载loaded_csv = np.loadtxt('array_with_header.csv', delimiter=',', skiprows=1)print(f" 加载CSV文件(跳过标题):\n{loaded_csv}")print("\n3. 结构化数组:")# 创建结构化数组dtype = [('name', 'U10'), ('age', 'i4'), ('score', 'f4')]students = np.array([('Alice', 20, 85.5), ('Bob', 21, 92.0), ('Charlie', 19, 78.5)], dtype=dtype)print(f" 结构化数组:\n{students}")print(f" 按字段访问: {students['name']}")# 保存结构化数组np.save('students.npy', students)# 加载结构化数组loaded_students = np.load('students.npy', allow_pickle=True)print(f"\n 加载的结构化数组:\n{loaded_students}")print("\n4. 内存映射文件:")# 创建大型数组large_array = np.random.randn(1000, 1000)# 保存为内存映射文件np.save('large_array.npy', large_array)# 加载为内存映射mmap_array = np.load('large_array.npy', mmap_mode='r')print(f" 内存映射数组形状: {mmap_array.shape}")print(f" 内存映射类型: {type(mmap_array)}")# 访问部分数据(不会全部加载到内存)subset = mmap_array[:10, :10]print(f" 访问前10x10子集:\n{subset}")print("\n5. 清理生成的文件:")# 删除生成的文件files_to_remove = ['array1.npy', 'arrays.npz', 'array.txt', 'array_with_header.csv', 'students.npy', 'large_array.npy']for file in files_to_remove: if os.path.exists(file): os.remove(file) print(f" 已删除: {file}")print("\n6. 其他格式:")# 使用genfromtxt处理缺失值data_with_missing = """1,2,3,45,,7,89,10,11,12"""with open('data_with_missing.csv', 'w') as f: f.write(data_with_missing)loaded_with_missing = np.genfromtxt('data_with_missing.csv', delimiter=',', filling_values=-999)print(f" 处理缺失值:\n{loaded_with_missing}")# 清理if os.path.exists('data_with_missing.csv'): os.remove('data_with_missing.csv')
8. 性能优化和矢量化
# performance.pyimport numpy as npimport timeprint("=== NumPy性能优化 ===\n")print("1. 矢量化 vs 循环:")# 创建大型数组size = 1000000arr = np.random.rand(size)print(f" 数组大小: {size:,}")# Python循环print("\n Python循环:")start = time.time()result_python = []for x in arr: result_python.append(x * 2 + 1)python_time = time.time() - startprint(f" 时间: {python_time:.4f}秒")# NumPy矢量化print("\n NumPy矢量化:")start = time.time()result_numpy = arr * 2 + 1numpy_time = time.time() - startprint(f" 时间: {numpy_time:.4f}秒")speedup = python_time / numpy_timeprint(f"\n NumPy比Python循环快 {speedup:.1f} 倍")print("\n2. 广播性能:")# 测试广播性能matrix = np.random.rand(1000, 1000)vector = np.random.rand(1000)print(f" 矩阵形状: {matrix.shape}")print(f" 向量形状: {vector.shape}")# 使用循环start = time.time()result_loop = np.empty_like(matrix)for i in range(matrix.shape[0]): result_loop[i] = matrix[i] + vectorloop_time = time.time() - startprint(f" 循环加法时间: {loop_time:.4f}秒")# 使用广播start = time.time()result_broadcast = matrix + vector.reshape(1, -1)broadcast_time = time.time() - startprint(f" 广播加法时间: {broadcast_time:.4f}秒")print(f"\n 广播比循环快 {loop_time/broadcast_time:.1f} 倍")print("\n3. 内存布局优化:")# C顺序和F顺序的性能比较size = 1000arr_c = np.ones((size, size), order='C') # 行优先arr_f = np.ones((size, size), order='F') # 列优先print(f" 数组大小: {size}x{size}")# 按行求和(C顺序更快)start = time.time()row_sum_c = np.sum(arr_c, axis=1)c_row_time = time.time() - startstart = time.time()row_sum_f = np.sum(arr_f, axis=1)f_row_time = time.time() - startprint(f" 按行求和 - C顺序: {c_row_time:.4f}秒, F顺序: {f_row_time:.4f}秒")print(f" C顺序快 {f_row_time/c_row_time:.1f} 倍")# 按列求和(F顺序更快)start = time.time()col_sum_c = np.sum(arr_c, axis=0)c_col_time = time.time() - startstart = time.time()col_sum_f = np.sum(arr_f, axis=0)f_col_time = time.time() - startprint(f" 按列求和 - C顺序: {c_col_time:.4f}秒, F顺序: {f_col_time:.4f}秒")print(f" F顺序快 {c_col_time/f_col_time:.1f} 倍")print("\n4. 就地操作:")# 测试就地操作 vs 创建新数组arr = np.random.rand(1000000)# 创建新数组start = time.time()new_arr = arr * 2 + 1new_time = time.time() - startprint(f" 创建新数组时间: {new_time:.4f}秒")# 就地操作start = time.time()arr *= 2arr += 1inplace_time = time.time() - startprint(f" 就地操作时间: {inplace_time:.4f}秒")print(f" 就地操作快 {new_time/inplace_time:.1f} 倍")print("\n5. 视图 vs 副本:")# 测试视图和副本的性能差异arr = np.random.rand(1000000)# 视图(不复制数据)start = time.time()view = arr[::2] # 创建视图view_sum = np.sum(view)view_time = time.time() - startprint(f" 视图操作时间: {view_time:.4f}秒")# 副本(复制数据)start = time.time()copy = arr[::2].copy() # 创建副本copy_sum = np.sum(copy)copy_time = time.time() - startprint(f" 副本操作时间: {copy_time:.4f}秒")print(f" 视图比副本快 {copy_time/view_time:.1f} 倍")print("\n6. 预分配数组:")# 测试预分配数组的性能size = 1000000# 不预分配(动态追加)start = time.time()result = np.array([])for i in range(size): result = np.append(result, i)dynamic_time = time.time() - startprint(f" 动态追加时间: {dynamic_time:.4f}秒")# 预分配数组start = time.time()result_preallocated = np.empty(size)for i in range(size): result_preallocated[i] = ipreallocated_time = time.time() - startprint(f" 预分配时间: {preallocated_time:.4f}秒")print(f" 预分配快 {dynamic_time/preallocated_time:.1f} 倍")
总结
NumPy是Python科学计算的基石,今天的学习涵盖了以下核心内容:
关键收获
NumPy数组基础:理解了ndarray对象的创建、属性和基本操作
数组索引和切片:掌握了多种数组访问和修改数据的方法
数组运算:学会了矢量化操作、广播机制和通用函数
数组操作:熟悉了数组的形状改变、连接、分割等操作
线性代数:掌握了矩阵运算、特征值、线性方程组求解等
随机数生成:了解了各种概率分布的随机数生成方法
文件I/O:学会了保存和加载NumPy数组数据
性能优化:理解了NumPy的矢量化优势和相关性能技巧
NumPy核心优势
最佳实践
避免Python循环:尽可能使用矢量化操作
使用广播:合理利用广播机制进行数组运算
注意内存布局:根据访问模式选择C顺序或F顺序
预分配数组:避免动态追加数组
使用视图而非副本:减少不必要的数据复制
常见错误和解决方案
# common_mistakes.pyimport numpy as npprint("=== NumPy常见错误和解决方案 ===\n")print("1. 浮点数精度问题:")# 浮点数比较a = 0.1 + 0.2b = 0.3print(f" 直接比较: {a == b}")print(f" 使用np.isclose: {np.isclose(a, b)}")print(f" 使用容差比较: {abs(a - b) < 1e-10}")print("\n2. 广播错误:")# 形状不兼容try: a = np.array([1, 2, 3]) b = np.array([1, 2]) result = a + b # 会报错except ValueError as e: print(f" 广播错误: {e}")print("\n3. 视图和副本混淆:")a = np.array([1, 2, 3, 4, 5])b = a[2:4] # 视图b[0] = 99print(f" 修改视图后原数组: {a}")a = np.array([1, 2, 3, 4, 5])b = a[2:4].copy() # 副本b[0] = 99print(f" 修改副本后原数组: {a}")print("\n4. 整数溢出:")# Python整数无限精度,但NumPy有固定精度large_int = 2**31 - 1print(f" Python整数: {large_int}")arr = np.array([large_int], dtype=np.int32)print(f" int32数组: {arr}")print(f" 加1后: {arr + 1}") # 可能溢出print("\n5. 内存错误:")# 大型数组操作try: huge_array = np.ones((10000, 10000, 10000)) # 可能内存不足except MemoryError as e: print(f" 内存错误: {e}") print(" 解决方案: 使用内存映射或分块处理")
明天我们将学习Pandas基础,这是数据分析的核心库,建立在NumPy之上,提供了更高级的数据结构和数据操作功能。