🐍 Python Day41:requests 库 — 优雅地发送 HTTP 请求
🕐 预计用时:2-3 小时 | 🎯 目标:掌握 GET/POST、参数/Headers、JSON 响应、Session
📖 今日目录
1. 为什么用 requests?
requests 是 Python 最流行的 HTTP 库——一行代码发请求,比 http.client 简洁 10 倍。
# 安装: pip install requests
import requests
# 一行 GET 请求
r = requests.get("https://httpbin.org/get")
print(r.status_code) # 200
print(r.text) # 响应内容
# 对比 http.client:需要 10 行才能做到同样的事
2. GET 请求
import requests
# 基本 GET 请求
r = requests.get("https://httpbin.org/get")
# 响应信息
print(r.status_code) # 200
print(r.reason) # OK
print(r.headers) # 响应头(字典)
print(r.text) # 响应文本(str)
print(r.content) # 响应内容(bytes)
print(r.encoding) # 编码:utf-8
print(r.url) # 请求的 URL
print(r.elapsed) # 请求耗时
# 获取网页内容
r = requests.get("https://example.com")
print(r.text[:200]) # HTML 前 200 字符
# 获取 JSON 数据
r = requests.get("https://httpbin.org/json")
data = r.json() # 直接解析为 Python 字典
print(data)
3. 传递参数
import requests
# 方式1: params 参数(推荐)
params = {"page": 1, "size": 20, "keyword": "Python"}
r = requests.get("https://httpbin.org/get", params=params)
print(r.url)
# https://httpbin.org/get?page=1&size=20&keyword=Python
# 方式2: 手动拼接 URL(不推荐)
r = requests.get("https://httpbin.org/get?page=1&size=20")
# 复杂参数
params = {
"filters[]": ["python", "java"], # 同名参数
"sort": "-created_at",
"page": 1
}
r = requests.get("https://httpbin.org/get", params=params)
print(r.url)
# https://httpbin.org/get?filters%5B%5D=python&filters%5B%5D=java&sort=-created_at&page=1
4. 自定义 Headers
import requests
# 自定义 Headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "application/json",
"Authorization": "Bearer my-token-xxx",
"X-Custom-Header": "custom-value"
}
r = requests.get("https://httpbin.org/headers", headers=headers)
print(r.json()["headers"]["Authorization"]) # Bearer my-token-xxx
⚠️ 为什么要设置 User-Agent?
很多网站会检查 User-Agent,如果是 Python 默认的(如 python-requests/2.x),会被拒绝。
设置成浏览器的 User-Agent 可以避免被屏蔽。
5. POST 请求
import requests
# POST 表单数据
data = {"username": "admin", "password": "123456"}
r = requests.post("https://httpbin.org/post", data=data)
print(r.json()["form"]) # {'username': 'admin', 'password': '123456'}
# POST JSON 数据
json_data = {"name": "张三", "age": 25, "skills": ["Python", "Java"]}
r = requests.post("https://httpbin.org/post", json=json_data)
print(r.json()["json"]) # {'name': '张三', 'age': 25, 'skills': ['Python', 'Java']}
# data= 发送表单编码(application/x-www-form-urlencoded)
# json= 发送 JSON(application/json)
# POST 带 Headers
headers = {"Authorization": "Bearer xxx", "Content-Type": "application/json"}
r = requests.post("https://httpbin.org/post", json={"key": "value"}, headers=headers)
# PUT / DELETE / PATCH(用法相同)
r = requests.put("https://httpbin.org/put", json={"name": "新名字"})
r = requests.delete("https://httpbin.org/delete")
r = requests.patch("https://httpbin.org/patch", json={"age": 26})
6. JSON 响应处理
import requests
r = requests.get("https://httpbin.org/json")
# .json() 自动解析 JSON
data = r.json()
print(type(data)) # <class 'dict'>
print(data["slideshow"]["title"])
# 判断请求是否成功
if r.status_code == 200:
result = r.json()
print("成功:", result)
else:
print(f"失败: {r.status_code}")
# r.raise_for_status() — 如果状态码不是 2xx,抛出异常
try:
r = requests.get("https://httpbin.org/status/404")
r.raise_for_status() # 抛出 HTTPError
except requests.exceptions.HTTPError as e:
print(f"HTTP 错误: {e}") # 404 Client Error
💡 推荐模式:
r.raise_for_status() 配合 try/except,比手动检查 status_code 更 Pythonic。
7. Session — 保持登录状态
import requests
# 普通请求:每次独立,Cookie 不保留
r1 = requests.get("https://httpbin.org/cookies/set/name/zhangsan")
r2 = requests.get("https://httpbin.org/cookies")
print(r2.json()) # {} — Cookie 没了!
# Session 请求:自动管理 Cookie
session = requests.Session()
r1 = session.get("https://httpbin.org/cookies/set/name/zhangsan")
r2 = session.get("https://httpbin.org/cookies")
print(r2.json()) # {'name': 'zhangsan'} — Cookie 保留了!
session.close()
# Session 会自动处理:
# 1. Cookie 的存储和发送
# 2. 连接池复用(性能更好)
# 3. 重定向跟踪
# 用 with 语句自动关闭
with requests.Session() as s:
s.headers.update({"User-Agent": "MyApp/1.0"})
# 所有请求共享 Headers
r1 = s.get("https://httpbin.org/get")
r2 = s.post("https://httpbin.org/post", json={"key": "value"})
8. 文件上传
import requests
# 上传文件
files = {"file": open("report.csv", "rb")}
r = requests.post("https://httpbin.org/post", files=print(r.json()["files"])
# 指定文件名和类型
files = {
"file": ("report.csv", open("report.csv", "rb"), "text/csv")
}
r = requests.post("https://httpbin.org/post", files=files)
# 上传多个文件
files = [
("files", ("a.txt", open("a.txt", "rb"))),
("files", ("b.txt", open("b.txt", "rb"))),
]
r = requests.post("https://httpbin.org/post", files=files)
9. 超时与异常处理
import requests
# 设置超时(秒)— 必须设置!
try:
r = requests.get("https://httpbin.org/delay/5", timeout=3)
except requests.exceptions.Timeout:
print("⏰ 请求超时")
# 分别设置连接超时和读取超时
r = requests.get("https://httpbin.org/get", timeout=(3, 5))
# 连接超时 3 秒,读取超时 5 秒
# 常见异常
try:
r = requests.get("https://invalid-domain.com", timeout=5)
except requests.exceptions.ConnectionError:
print("❌ 连接错误(DNS 解析失败、服务器拒绝等)")
except requests.exceptions.Timeout:
print("⏰ 超时")
except requests.exceptions.HTTPError as e:
print(f"❌ HTTP 错误: {e}")
except requests.exceptions.RequestException as e:
print(f"❌ 请求错误: {e}") # 所有异常的父类
⚠️ 生产环境必须设置 timeout!不设 timeout,如果服务器不响应,你的程序会永远卡住。
10. 代理设置
import requests
# 设置代理
proxies = {
"http": "http://proxy.example.com:8080",
"https": "http://proxy.example.com:8080",
}
r = requests.get("https://httpbin.org/ip", proxies=proxies)
# 带认证的代理
proxies = {
"http": "http://user:password@proxy.example.com:8080",
}
# 使用环境变量(推荐)
# export HTTP_PROXY=http://proxy.example.com:8080
# export HTTPS_PROXY=http://proxy.example.com:8080
# requests 会自动读取环境变量
11. 实战:API 客户端
import requests
class APIClient:
"""通用 API 客户端"""
def __init__(self, base_url, token=None):
self.base_url = base_url.rstrip("/")
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "MyApp/1.0",
"Accept": "application/json"
})
if token:
self.session.headers["Authorization"] = f"Bearer {token}"
def _request(self, method, path, **kwargs):
url = f"{self.base_url}{path}"
kwargs.setdefault("timeout", 10)
r = self.session.request(method, url, **kwargs)
r.raise_for_status()
return r.json()
def get(self, path, params=None):
return self._request("GET", path, params=params)
def post(self, path, data=None):
return self._request("POST", path, json=data)
def put(self, path, data=None):
return self._request("PUT", path, json=data)
def delete(self, path):
return self._request("DELETE", path)
def close(self):
self.session.close()
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
# 使用
with APIClient("https://httpbin.org") as api:
# GET 请求
result = api.get("/get", params={"page": 1})
print(result["url"])
# POST 请求
result = api.post("/post", data={"name": "张三"})
print(result["json"])
12. 今日小结
| | |
|---|
requests.get() | | params |
requests.post() | | data |
requests.put() | | json |
requests.delete() | | |
requests.Session() | | |
响应对象
r.status_coder.textr.json()r.headersr.raise_for_status()
🎯 练习建议:
1. 用 requests 调用 GitHub API,获取某个仓库的信息
2. 用 Session 登录一个网站(模拟登录)
3. 写一个批量图片下载器(并发请求 + 保存文件)
📚 Day41 完成!明天学习 BeautifulSoup — 解析 HTML 网页