Python 系统教程
Python 标准库速查手册
Python 标准库速查手册
所属阶段:附录
建议学习时间:随用随查
重要程度:⭐⭐⭐⭐⭐
一、文本处理
| 模块 | 用途简介 | 最常用函数/方法 |
|------|---------|----------------|
| string | 字符串常量与模板 | string.ascii_letters、string.digits、string.Template |
| re | 正则表达式 | re.match/search/findall/sub/compile/split |
| textwrap | 文本折行与缩进处理 | textwrap.wrap/fill/indent/dedent/shorten |
| unicodedata | Unicode 字符数据库 | unicodedata.name/category/normalize |
| difflib | 序列差异比较 | difflib.SequenceMatcher/unified_diff/ndiff |
| fnmatch | Unix Shell 风格通配符 | fnmatch.fnmatch/filter |
| glob | 文件路径通配符 | glob.glob/iglob |
1.1 re 正则表达式常用示例
python
import re
text = "联系电话:138-1234-5678,邮箱:user@example.com"
# search:找第一个匹配
phone = re.search(r"\d{3}-\d{4}-\d{4}", text)
print(phone.group()) # 138-1234-5678
# findall:找所有匹配,返回列表
emails = re.findall(r"[\w.+-]+@[\w-]+\.[\w.]+", text)
print(emails) # ['user@example.com']
# sub:替换
masked = re.sub(r"(\d{3})-\d{4}", r"\1-****", text)
print(masked) # 138-****-5678
# compile:预编译提高性能(复用正则)
pattern = re.compile(r"\d+", re.IGNORECASE)
numbers = pattern.findall("abc 123 def 456") # ['123', '456']
# 命名分组
match = re.search(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})", "2024-03-15")
if match:
print(match.group("year")) # 2024
print(match.groupdict()) # {'year': '2024', 'month': '03', 'day': '15'}
1.2 textwrap 文本处理
python
import textwrap
long_text = "Python 是一种高级编程语言,以其简洁的语法和强大的库生态系统而闻名于世。"
# 按宽度折行
lines = textwrap.wrap(long_text, width=20)
print("\n".join(lines))
# 缩进
indented = textwrap.indent(long_text, prefix=" ")
# 去除公共缩进(适合处理多行字符串)
code = """
def hello():
print("world")
"""
clean = textwrap.dedent(code).strip()
二、数据类型
| 模块 | 用途简介 | 最常用函数/类 |
|------|---------|--------------|
| datetime | 日期和时间 | datetime/date/time/timedelta/timezone |
| calendar | 日历相关操作 | calendar.monthrange/isleap/weekday |
| collections | 高性能容器数据类型 | Counter/OrderedDict/defaultdict/namedtuple/deque/ChainMap |
| heapq | 堆队列(优先队列) | heapq.heappush/heappop/nlargest/nsmallest/heapify |
| array | 高效数值数组 | array.array(比列表节省内存) |
| bisect | 有序列表的二分算法 | bisect.bisectleft/bisectright/insort |
| enum | 枚举类型 | Enum/IntEnum/Flag/auto |
| dataclasses | 数据类装饰器 | @dataclass/field/asdict/astuple |
2.1 datetime 常用操作
python
from datetime import datetime, date, timedelta, timezone
# 当前时间
now = datetime.now() # 本地时间
utc_now = datetime.now(timezone.utc) # UTC 时间
# 格式化输出
print(now.strftime("%Y-%m-%d %H:%M:%S")) # 2024-03-15 10:30:45
print(now.strftime("%Y年%m月%d日")) # 2024年03月15日
# 解析字符串
dt = datetime.strptime("2024-03-15 10:30:45", "%Y-%m-%d %H:%M:%S")
dt2 = datetime.fromisoformat("2024-03-15T10:30:45") # ISO 格式(Python 3.7+)
# 日期计算(timedelta)
tomorrow = now + timedelta(days=1)
last_week = now - timedelta(weeks=1)
diff = datetime(2024, 12, 31) - now
print(f"距年末还有 {diff.days} 天")
# 时间戳转换
timestamp = now.timestamp() # 转为 Unix 时间戳
dt_from_ts = datetime.fromtimestamp(timestamp) # 从时间戳转换
# 日期属性
print(now.year, now.month, now.day)
print(now.weekday()) # 0=周一,6=周日
print(now.isoweekday()) # 1=周一,7=周日
2.2 collections 高级容器
python
from collections import Counter, defaultdict, deque, namedtuple, OrderedDict, ChainMap
# Counter:计数器(词频统计神器)
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
counter = Counter(words)
print(counter) # Counter({'apple': 3, 'banana': 2, 'cherry': 1})
print(counter.most_common(2)) # [('apple', 3), ('banana', 2)]
counter.update(["apple", "date"]) # 追加计数
# defaultdict:带默认值的字典
word_positions = defaultdict(list)
for i, word in enumerate(words):
word_positions[word].append(i)
print(dict(word_positions)) # {'apple': [0, 2, 5], 'banana': [1, 4], ...}
# deque:双端队列(O(1) 左右增删,比 list.insert(0) 快)
dq = deque([1, 2, 3], maxlen=5) # maxlen 限制最大长度(超出自动丢弃旧元素)
dq.appendleft(0) # 左端添加
dq.popleft() # 左端弹出
dq.rotate(1) # 向右旋转1位
# namedtuple:命名元组(轻量级类)
Point = namedtuple("Point", ["x", "y", "z"])
p = Point(1, 2, 3)
print(p.x, p.y) # 按名称访问
print(p._asdict()) # 转字典
# ChainMap:多字典合并(优先级搜索)
defaults = {"color": "blue", "size": "medium"}
user_config = {"color": "red"}
config = ChainMap(user_config, defaults)
print(config["color"]) # "red"(user_config 优先)
print(config["size"]) # "medium"(从 defaults 取)
2.3 heapq 优先队列
python
import heapq
# 最小堆
nums = [5, 2, 8, 1, 9, 3]
heapq.heapify(nums) # 原地转换为堆,O(n)
heapq.heappush(nums, 0) # 插入,O(log n)
smallest = heapq.heappop(nums) # 弹出最小值,O(log n)
# 取最大/最小的 N 个元素(比 sorted() 更高效)
data = [5, 2, 8, 1, 9, 3, 7, 4, 6]
print(heapq.nlargest(3, data)) # [9, 8, 7]
print(heapq.nsmallest(3, data)) # [1, 2, 3]
# 按对象属性排序(最小堆,用元组的第一个元素比较)
tasks = [(1, "低优先级"), (3, "紧急任务"), (2, "普通任务")]
heapq.heapify(tasks)
while tasks:
priority, task = heapq.heappop(tasks)
print(f"优先级 {priority}:{task}")
2.4 enum 枚举
python
from enum import Enum, IntEnum, Flag, auto
class Color(Enum):
RED = 1
GREEN = 2
BLUE = 3
class Permission(Flag):
"""位标志枚举(可组合)"""
READ = auto() # 自动赋值 1
WRITE = auto() # 自动赋值 2
EXECUTE = auto() # 自动赋值 4
# 使用枚举
color = Color.RED
print(color.name) # "RED"
print(color.value) # 1
print(Color["BLUE"]) # Color.BLUE(从字符串访问)
print(Color(2)) # Color.GREEN(从值访问)
# 组合权限(Flag 枚举)
perm = Permission.READ | Permission.WRITE
print(Permission.READ in perm) # True
print(perm) # Permission.READ|WRITE
三、数学与数值
| 模块 | 用途简介 | 最常用函数 |
|------|---------|-----------|
| math | 常用数学函数 | ceil/floor/sqrt/log/log2/log10/sin/cos/pi/e/inf/isnan |
| cmath | 复数数学函数 | cmath.sqrt/polar/rect/phase |
| decimal | 高精度十进制计算 | Decimal/getcontext/ROUNDHALFUP |
| fractions | 有理数 | Fraction(1, 3) 精确表示 1/3 |
| random | 随机数生成 | random/randint/choice/choices/sample/shuffle/seed/uniform |
| statistics | 统计函数 | mean/median/mode/stdev/variance/quantiles/correlation |
3.1 math 常用函数
python
import math
# 基础运算
print(math.ceil(3.2)) # 4(向上取整)
print(math.floor(3.9)) # 3(向下取整)
print(math.sqrt(16)) # 4.0
print(math.pow(2, 10)) # 1024.0
print(math.log(1000, 10)) # 3.0(以10为底的对数)
print(math.log2(256)) # 8.0
print(math.factorial(5)) # 120
# 三角函数(参数为弧度)
print(math.sin(math.pi / 2)) # 1.0
print(math.cos(0)) # 1.0
print(math.degrees(math.pi)) # 180.0(弧度转角度)
print(math.radians(180)) # 3.14...(角度转弧度)
# 常量
print(math.pi) # 3.14159...
print(math.e) # 2.71828...
print(math.inf) # 无穷大
print(math.isnan(float("nan"))) # True
print(math.isinf(math.inf)) # True
# 最大公约数和最小公倍数
print(math.gcd(12, 8)) # 4
print(math.lcm(4, 6)) # 12(Python 3.9+)
3.2 decimal 高精度计算
python
from decimal import Decimal, getcontext, ROUND_HALF_UP
# 解决浮点精度问题
print(0.1 + 0.2) # 0.30000000000000004(浮点误差)
print(Decimal("0.1") + Decimal("0.2")) # 0.3(精确)
# 设置全局精度(默认28位有效数字)
getcontext().prec = 50
# 金融计算(四舍五入到2位小数)
price = Decimal("19.99")
quantity = Decimal("3")
total = price * quantity
total_rounded = total.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)
print(total_rounded) # 59.97
3.3 random 随机数
python
import random
# 设置随机种子(使结果可复现,测试时常用)
random.seed(42)
# 基本随机
print(random.random()) # [0.0, 1.0) 之间的浮点数
print(random.uniform(1.5, 5.5)) # [a, b] 之间的浮点数
print(random.randint(1, 100)) # [1, 100] 之间的整数
# 序列操作
fruits = ["苹果", "香蕉", "橙子", "葡萄", "西瓜"]
print(random.choice(fruits)) # 随机选一个
print(random.choices(fruits, k=3)) # 有放回抽样 3 个
print(random.sample(fruits, k=3)) # 无放回抽样 3 个(不重复)
random.shuffle(fruits) # 原地随机打乱
print(fruits)
# 按权重抽样
items = ["A", "B", "C"]
weights = [0.7, 0.2, 0.1] # A 出现概率 70%
print(random.choices(items, weights=weights, k=10))
四、文件与路径
| 模块 | 用途简介 | 最常用函数/类 |
|------|---------|--------------|
| os | 操作系统接口 | os.getcwd/chdir/makedirs/listdir/remove/rename/environ/getenv |
| os.path | 路径操作(旧风格) | join/split/basename/dirname/exists/isfile/isdir/abspath/getsize |
| pathlib | 面向对象路径(推荐)| Path/glob/rglob/mkdir/unlink/readtext/writetext/stat |
| shutil | 高级文件操作 | copy/copy2/move/rmtree/makearchive/unpackarchive/disk_usage |
| tempfile | 临时文件和目录 | TemporaryFile/NamedTemporaryFile/TemporaryDirectory/mkstemp |
| glob | 路径通配符 | glob.glob/iglob |
| fnmatch | 文件名匹配 | fnmatch/filter |
4.1 pathlib 面向对象路径(推荐方式)
python
from pathlib import Path
# 创建路径对象(跨平台,自动处理 / 和 \ 的问题)
p = Path("/home/user/documents")
current = Path(".") # 当前目录
home = Path.home() # 用户家目录
# 路径拼接(使用 / 运算符)
config_file = home / ".config" / "app" / "config.yaml"
print(config_file)
# 路径属性
print(config_file.name) # "config.yaml"(文件名)
print(config_file.stem) # "config"(无后缀的文件名)
print(config_file.suffix) # ".yaml"(后缀)
print(config_file.parent) # 父目录
print(config_file.parts) # 路径组成部分元组
# 路径判断
print(config_file.exists()) # 是否存在
print(config_file.is_file()) # 是否是文件
print(config_file.is_dir()) # 是否是目录
# 文件操作
config_file.parent.mkdir(parents=True, exist_ok=True) # 创建目录(含父目录)
config_file.write_text("key: value\n", encoding="utf-8") # 写文件
content = config_file.read_text(encoding="utf-8") # 读文件
config_file.unlink() # 删除文件
# 遍历目录
for file in Path(".").glob("**/*.py"): # 递归查找所有 .py 文件
print(file)
for item in Path(".").iterdir(): # 遍历当前目录
print(f"{'目录' if item.is_dir() else '文件'}: {item.name}")
# 文件信息
stat = config_file.stat()
print(f"大小:{stat.st_size} 字节")
print(f"修改时间:{stat.st_mtime}")
4.2 shutil 高级文件操作
python
import shutil
from pathlib import Path
# 复制文件
shutil.copy("source.txt", "dest.txt") # 复制文件(不含元数据)
shutil.copy2("source.txt", "dest.txt") # 复制文件(含元数据)
shutil.copytree("src_dir/", "dest_dir/") # 复制整个目录树
# 移动文件/目录
shutil.move("old_path", "new_path")
# 删除整个目录
shutil.rmtree("dir_to_delete/")
# 打包/解包
shutil.make_archive("backup", "zip", "source_dir/") # 打包为 ZIP
shutil.make_archive("backup", "tar", "source_dir/") # 打包为 TAR
shutil.unpack_archive("backup.zip", "extract_dir/") # 解包
# 磁盘使用情况
usage = shutil.disk_usage("/")
print(f"总空间:{usage.total / 1e9:.1f} GB")
print(f"已用:{usage.used / 1e9:.1f} GB")
print(f"可用:{usage.free / 1e9:.1f} GB")
4.3 tempfile 临时文件
python
import tempfile
# 临时文件(用完自动删除)
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=True) as f:
f.write("临时内容")
print(f.name) # 临时文件路径
# 临时目录(用完自动删除整个目录)
with tempfile.TemporaryDirectory() as tmpdir:
print(tmpdir)
# 在临时目录中进行操作...
temp_file = Path(tmpdir) / "test.txt"
temp_file.write_text("测试内容")
# 离开 with 块后,整个临时目录被删除
五、数据格式
| 模块 | 用途简介 | 最常用函数 |
|------|---------|-----------|
| json | JSON 序列化 | json.dumps/loads/dump/load |
| csv | CSV 文件读写 | csv.reader/writer/DictReader/DictWriter |
| configparser | INI 格式配置文件 | ConfigParser.read/get/set/sections/options |
| pickle | Python 对象序列化 | pickle.dumps/loads/dump/load |
| struct | C 结构体二进制打包 | struct.pack/unpack/calcsize |
| base64 | Base64 编解码 | base64.b64encode/b64decode/urlsafe_b64encode |
| hashlib | 加密哈希函数 | hashlib.md5/sha256/sha512/new/update/digest/hexdigest |
5.1 json 常用操作
python
import json
from datetime import datetime
data = {
"name": "张三",
"age": 28,
"skills": ["Python", "FastAPI"],
"address": {"city": "北京", "district": "朝阳区"},
}
# 序列化(Python 对象 → JSON 字符串)
json_str = json.dumps(data, ensure_ascii=False, indent=2) # ensure_ascii=False 保留中文
print(json_str)
# 反序列化(JSON 字符串 → Python 对象)
data2 = json.loads(json_str)
# 写入文件
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 从文件读取
with open("data.json", "r", encoding="utf-8") as f:
data3 = json.load(f)
# 处理 datetime(不可直接序列化,需自定义编码器)
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
data_with_date = {"created_at": datetime.now()}
print(json.dumps(data_with_date, cls=DateTimeEncoder))
5.2 csv 读写
python
import csv
# 写入 CSV
rows = [
["姓名", "年龄", "城市"],
["张三", 25, "北京"],
["李四", 30, "上海"],
]
with open("output.csv", "w", newline="", encoding="utf-8-sig") as f:
writer = csv.writer(f)
writer.writerows(rows)
# 读取 CSV
with open("output.csv", "r", encoding="utf-8-sig") as f:
reader = csv.reader(f)
for row in reader:
print(row)
# DictWriter/DictReader(推荐,用列名映射字段)
fieldnames = ["姓名", "年龄", "城市"]
with open("output.csv", "w", newline="", encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({"姓名": "王五", "年龄": 35, "城市": "广州"})
with open("output.csv", "r", encoding="utf-8-sig") as f:
reader = csv.DictReader(f)
for row in reader:
print(row["姓名"], row["年龄"]) # 按列名访问
5.3 hashlib 哈希计算
python
import hashlib
# 计算文件的 MD5(校验文件完整性)
def file_md5(filepath: str) -> str:
"""计算文件的 MD5 哈希值"""
md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(65536), b""): # 分块读取大文件
md5.update(chunk)
return md5.hexdigest()
# SHA-256(更安全,密码存储用 bcrypt 而非这个)
text = "Hello, World!"
sha256 = hashlib.sha256(text.encode("utf-8")).hexdigest()
print(sha256)
# HMAC(消息认证码)
import hmac
key = b"secret-key"
message = b"important data"
mac = hmac.new(key, message, hashlib.sha256).hexdigest()
六、网络通信
| 模块 | 用途简介 | 最常用函数/类 |
|------|---------|--------------|
| socket | 底层网络通信 | socket.socket/bind/listen/accept/connect/send/recv |
| http.client | HTTP 客户端(底层) | HTTPConnection/HTTPSConnection/request/getresponse |
| urllib.request | URL 请求 | urllib.request.urlopen/urlretrieve/Request |
| urllib.parse | URL 解析与编码 | urlparse/urlencode/quote/unquote/urljoin |
| email | 邮件构建 | email.mime.multipart/text/MIMEMultipart |
| smtplib | 发送邮件 | smtplib.SMTP/SMTP_SSL/sendmail/login |
6.1 urllib 常用操作
python
from urllib.parse import urlparse, urlencode, quote, unquote, urljoin
from urllib.request import urlopen, Request
import json
# URL 解析
url = "https://api.example.com/users?page=1&size=20#section"
parsed = urlparse(url)
print(parsed.scheme) # https
print(parsed.netloc) # api.example.com
print(parsed.path) # /users
print(parsed.query) # page=1&size=20
# URL 编码
params = {"name": "张三", "city": "北京", "age": 25}
encoded = urlencode(params)
print(encoded) # name=%E5%BC%A0%E4%B8%89&city=%E5%8C%97%E4%BA%AC&age=25
# URL 拼接
base = "https://api.example.com/v1/"
path = "users/123/orders"
print(urljoin(base, path)) # https://api.example.com/v1/users/123/orders
# 发送 HTTP 请求(简单场景,复杂场景用 requests 或 httpx)
request = Request(
"https://httpbin.org/get",
headers={"User-Agent": "Python/3.11", "Accept": "application/json"},
)
with urlopen(request, timeout=10) as response:
data = json.loads(response.read().decode("utf-8"))
print(data["headers"]["User-Agent"])
6.2 smtplib 发送邮件
python
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
def send_email(
smtp_host: str,
smtp_port: int,
username: str,
password: str,
to: str,
subject: str,
body: str,
attachment_path: str = None,
):
"""发送 HTML 邮件(可选附件)"""
msg = MIMEMultipart("alternative")
msg["From"] = username
msg["To"] = to
msg["Subject"] = subject
# 添加 HTML 正文
html_part = MIMEText(body, "html", "utf-8")
msg.attach(html_part)
# 可选:添加附件
if attachment_path:
with open(attachment_path, "rb") as f:
part = MIMEBase("application", "octet-stream")
part.set_payload(f.read())
encoders.encode_base64(part)
import os
part.add_header("Content-Disposition", f'attachment; filename="{os.path.basename(attachment_path)}"')
msg.attach(part)
# 发送邮件(SSL 加密连接)
with smtplib.SMTP_SSL(smtp_host, smtp_port) as server:
server.login(username, password)
server.sendmail(username, to, msg.as_string())
print(f"邮件已发送到 {to}")
七、并发编程
| 模块 | 用途简介 | 最常用类/函数 |
|------|---------|--------------|
| threading | 线程(共享内存,受 GIL 限制) | Thread/Lock/RLock/Event/Condition/Semaphore/Timer |
| multiprocessing | 进程(独立内存,绕过 GIL) | Process/Pool/Queue/Pipe/Manager/Value/Array |
| concurrent.futures | 高层并发接口 | ThreadPoolExecutor/ProcessPoolExecutor/Future/as_completed/wait |
| asyncio | 异步 I/O 框架 | run/create_task/gather/wait/sleep/Queue/Lock/Event/Semaphore |
| queue | 线程安全队列 | Queue/LifoQueue/PriorityQueue/SimpleQueue |
| threading | 线程本地存储 | threading.local() |
7.1 concurrent.futures 并发执行
python
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
import time
def io_task(url: str) -> dict:
"""I/O 密集型任务(网络请求)"""
time.sleep(0.1) # 模拟网络请求
return {"url": url, "status": 200}
def cpu_task(n: int) -> int:
"""CPU 密集型任务(计算)"""
return sum(i * i for i in range(n))
urls = [f"https://example.com/{i}" for i in range(10)]
# I/O 密集型 → 线程池
with ThreadPoolExecutor(max_workers=5) as executor:
# 提交所有任务
futures = [executor.submit(io_task, url) for url in urls]
# 按完成顺序处理结果(不按提交顺序)
for future in as_completed(futures):
result = future.result()
print(f"完成:{result['url']}")
# map 方式(顺序提交,按顺序返回结果)
with ThreadPoolExecutor(max_workers=5) as executor:
results = list(executor.map(io_task, urls))
# CPU 密集型 → 进程池(绕过 GIL)
numbers = [10**6, 10**7, 10**6, 10**7]
with ProcessPoolExecutor(max_workers=4) as executor:
results = list(executor.map(cpu_task, numbers))
print(results)
7.2 asyncio 异步编程
python
import asyncio
async def fetch(url: str) -> dict:
"""模拟异步 HTTP 请求"""
await asyncio.sleep(0.1) # 异步等待(释放事件循环)
return {"url": url, "data": "..."}
async def main():
urls = [f"https://api.example.com/{i}" for i in range(10)]
# 并发执行所有请求
results = await asyncio.gather(*[fetch(url) for url in urls])
print(f"完成 {len(results)} 个请求")
# 超时控制
try:
result = await asyncio.wait_for(fetch("slow.example.com"), timeout=0.05)
except asyncio.TimeoutError:
print("请求超时")
# 创建任务(后台运行)
task = asyncio.create_task(fetch("background.example.com"))
# ... 做其他事情 ...
result = await task # 等待任务完成
asyncio.run(main())
# asyncio.Queue:异步生产者-消费者
async def producer(queue: asyncio.Queue, items: list):
for item in items:
await queue.put(item)
await queue.put(None) # 发送结束信号
async def consumer(queue: asyncio.Queue, name: str):
while True:
item = await queue.get()
if item is None:
break
await asyncio.sleep(0.01) # 处理
queue.task_done()
print(f"[{name}] 处理:{item}")
async def pipeline():
queue = asyncio.Queue(maxsize=10)
items = list(range(20))
await asyncio.gather(
producer(queue, items),
consumer(queue, "消费者1"),
consumer(queue, "消费者2"),
)
asyncio.run(pipeline())
八、调试与测试
| 模块 | 用途简介 | 最常用函数/类 |
|------|---------|--------------|
| pdb | Python 调试器 | pdb.set_trace()/breakpoint()/step/next/continue/print/quit |
| traceback | 异常堆栈信息 | traceback.formatexc/printexc/extract_tb |
| logging | 日志记录框架 | getLogger/DEBUG/INFO/WARNING/ERROR/CRITICAL/FileHandler/Formatter |
| warnings | 警告机制 | warnings.warn/filterwarnings/simplefilter |
| unittest | 单元测试框架 | TestCase/assertEqual/assertTrue/assertRaises/setUp/tearDown/mock |
| doctest | 文档字符串测试 | doctest.testmod/rundocstringexamples |
| cProfile | 性能分析 | cProfile.run/Profile.enable/disable/print_stats |
| timeit | 代码片段计时 | timeit.timeit/repeat |
8.1 logging 日志最佳实践
python
import logging
import sys
from pathlib import Path
def setup_logger(
name: str,
level: str = "INFO",
log_file: str = None,
) -> logging.Logger:
"""
配置 logger(最佳实践模板)
控制台 + 文件 双输出
"""
logger = logging.getLogger(name)
logger.setLevel(getattr(logging, level.upper()))
# 统一的格式
formatter = logging.Formatter(
fmt="%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# 控制台 Handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
# 文件 Handler(可选)
if log_file:
Path(log_file).parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.FileHandler(log_file, encoding="utf-8")
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger
# 使用
logger = setup_logger("myapp", level="DEBUG", log_file="logs/app.log")
logger.debug("调试信息")
logger.info("普通信息")
logger.warning("警告:磁盘使用率达到 80%")
logger.error("错误:数据库连接失败")
logger.critical("严重:服务即将崩溃")
# 带结构化数据的日志
logger.info("用户登录", extra={"user_id": 123, "ip": "127.0.0.1"})
# 捕获异常并记录完整堆栈
try:
result = 1 / 0
except Exception:
logger.exception("计算出错") # 自动附加 traceback
8.2 timeit 性能测试
python
import timeit
# 比较两种方法的性能
def method1():
return [str(i) for i in range(1000)]
def method2():
result = []
for i in range(1000):
result.append(str(i))
return result
# 执行 1000 次,返回总时间(秒)
t1 = timeit.timeit(method1, number=1000)
t2 = timeit.timeit(method2, number=1000)
print(f"列表推导式:{t1:.4f}s")
print(f"for 循环:{t2:.4f}s")
print(f"列表推导式快 {t2/t1:.2f} 倍")
# 简单字符串测试
result = timeit.timeit("'-'.join(str(n) for n in range(100))", number=100000)
print(f"耗时:{result:.4f}s")
总结
| 类别 | 推荐模块 | 不推荐(改用) |
|------|---------|----------------|
| 路径操作 | pathlib.Path | os.path(旧风格)|
| HTTP 请求 | httpx / requests | urllib.request(复杂请求)|
| 数据校验 | pydantic | 手写校验逻辑 |
| 并发(I/O) | asyncio / ThreadPoolExecutor | 裸 threading |
| 并发(CPU) | ProcessPoolExecutor | 裸 multiprocessing |
| 日志 | logging(配置好的)| print |
| 时间 | datetime | time.time()(仅限时间戳)|
| 随机 | random(非安全)/ secrets(安全) | 自己写伪随机 |
回到目录:README