一、文件操作概述
1. 什么是文件操作?
文件操作是指对计算机文件进行读取、写入、修改、删除等操作。Python 提供了内置的文件操作函数,使得处理文件变得简单高效。
2. 文件操作的基本流程
# 文件操作三步曲# 1. 打开文件file = open("example.txt", "r")# 2. 操作文件(读或写)content = file.read()# 3. 关闭文件file.close()
二、打开文件 - open() 函数
1. open() 函数的基本语法
# 基本语法file_object = open(file, mode='r', encoding=None)# 参数说明:# file: 文件路径(字符串)# mode: 打开模式(默认 'r' 只读)# encoding: 编码方式(推荐使用 'utf-8')
2. 文件打开模式详解
3. 常用打开模式示例
# 只读模式(文本)file = open("data.txt", "r", encoding="utf-8")# 写入模式(会覆盖原有内容)file = open("data.txt", "w", encoding="utf-8")# 追加模式(在末尾添加)file = open("data.txt", "a", encoding="utf-8")# 读写模式file = open("data.txt", "r+", encoding="utf-8")# 二进制读取模式(用于图片、视频等)file = open("image.jpg", "rb")# 二进制写入模式file = open("image_copy.jpg", "wb")# 独占创建模式(文件已存在会报错)file = open("new.txt", "x", encoding="utf-8")
三、关闭文件 - close() 方法
1. 基本关闭操作
# 打开文件file = open("data.txt", "r", encoding="utf-8")# 读取内容content = file.read()# 关闭文件file.close()# 关闭后不能再操作文件# file.read() # ValueError: I/O operation on closed file
2. 使用 with 语句自动关闭(推荐)
# with 语句会自动关闭文件,即使发生异常也会关闭with open("data.txt", "r", encoding="utf-8") as file: content = file.read() print(content)# 退出 with 块后,文件自动关闭# 同时打开多个文件with open("source.txt", "r") as source, open("dest.txt", "w") as dest: dest.write(source.read())
四、读取文件
1. read() - 读取全部内容
# 读取整个文件with open("data.txt", "r", encoding="utf-8") as file: content = file.read() print(f"文件内容:\n{content}")# 读取指定字节数with open("data.txt", "r", encoding="utf-8") as file: # 读取前10个字符 partial = file.read(10) print(f"前10个字符: {partial}") # 继续读取剩下的 remaining = file.read() print(f"剩余内容: {remaining}")
2. readline() - 读取一行
# 读取一行withopen("data.txt", "r", encoding="utf-8") asfile: line = file.readline() print(f"第一行: {line}") line2 = file.readline() print(f"第二行: {line2}")# 逐行读取全部内容withopen("data.txt", "r", encoding="utf-8") asfile: line = file.readline() while line: print(f"行: {line.strip()}") line = file.readline()
3. readlines() - 读取所有行到列表
# 一次性读取所有行with open("data.txt", "r", encoding="utf-8") as file: lines = file.readlines() print(f"共 {len(lines)} 行") for i, line in enumerate(lines, 1): print(f"{i}: {line.strip()}")# 注意:readlines() 会包含换行符# 可以使用 strip() 去除换行符
4. 遍历文件对象(推荐)
# 最推荐的方式:直接遍历文件对象with open("data.txt", "r", encoding="utf-8") as file: for line in file: print(line.strip()) # 处理每一行# 这种方式内存效率最高,适合大文件
五、写入文件
1. write() - 写入字符串
# 写入字符串withopen("output.txt", "w", encoding="utf-8") asfile: file.write("Hello, World!\n") file.write("这是第二行\n")# 注意:write() 不会自动添加换行符,需要手动添加 \n
2. writelines() - 写入字符串列表
# 写入多行lines = ["第一行\n", "第二行\n", "第三行\n"]withopen("output.txt", "w", encoding="utf-8") asfile: file.writelines(lines)# 注意:writelines() 不会自动添加换行符# 需要确保每个字符串末尾包含 \n
3. 追加写入
# 追加模式(不覆盖原有内容)withopen("output.txt", "a", encoding="utf-8") asfile: file.write("这是追加的内容\n")# 多次执行会不断追加内容
六、文件指针操作
1. tell() - 获取当前位置
with open("data.txt", "r", encoding="utf-8") as file: print(f"初始位置: {file.tell()}") # 0 content = file.read(5) print(f"读取5个字符后: {file.tell()}") content = file.read(3) print(f"再读取3个字符后: {file.tell()}")
2. seek() - 移动指针
with open("data.txt", "r", encoding="utf-8") as file: # 移动到第10个字节 file.seek(10) print(f"位置: {file.tell()}") # 从当前位置移动5个字节 file.seek(5, 1) # 1 表示相对当前位置 print(f"位置: {file.tell()}") # 从文件末尾移动 file.seek(-10, 2) # 2 表示从文件末尾 print(f"位置: {file.tell()}")# seek() 参数说明:# seek(offset, whence)# whence: 0-文件开头(默认),1-当前位置,2-文件末尾
七、二进制文件操作
1. 读写二进制文件
# 写入二进制数据data = b'Hello, Binary World!'with open("binary.bin", "wb") as file: file.write(data)# 读取二进制文件with open("binary.bin", "rb") as file: content = file.read() print(f"二进制内容: {content}") print(f"解码后: {content.decode('utf-8')}")# 处理图片文件with open("image.jpg", "rb") as source: image_data = source.read()with open("image_copy.jpg", "wb") as dest: dest.write(image_data)
2. 使用 struct 处理二进制数据
import struct# 打包数据为二进制withopen("data.bin", "wb") asfile: # 写入整数、浮点数、字符串 file.write(struct.pack('i', 42)) # 4字节整数 file.write(struct.pack('f', 3.14159)) # 4字节浮点数 file.write(struct.pack('10s', b'Python')) # 10字节字符串# 读取二进制数据with open("data.bin", "rb") as file: number = struct.unpack('i', file.read(4))[0] pi = struct.unpack('f', file.read(4))[0] text = struct.unpack('10s', file.read(10))[0].decode().strip('\x00') print(f"整数: {number}") print(f"浮点数: {pi}") print(f"字符串: {text}")
八、实际应用示例
1. 文件复制
def copy_file(source_path, dest_path, chunk_size=8192): """ 复制文件(支持大文件) Args: source_path: 源文件路径 dest_path: 目标文件路径 chunk_size: 每次读取的块大小(字节) """ with open(source_path, 'rb') as source: with open(dest_path, 'wb') as dest: while True: chunk = source.read(chunk_size) if not chunk: break dest.write(chunk) print(f"文件复制完成: {source_path} -> {dest_path}")# 使用示例# copy_file("source.txt", "destination.txt")
2. 文本文件统计
def analyze_text_file(file_path): """ 分析文本文件,统计行数、单词数、字符数 """ line_count = 0 word_count = 0 char_count = 0 with open(file_path, 'r', encoding='utf-8') as file: for line in file: line_count += 1 char_count += len(line) word_count += len(line.split()) print(f"文件: {file_path}") print(f"行数: {line_count}") print(f"单词数: {word_count}") print(f"字符数: {char_count}") return { 'lines': line_count, 'words': word_count, 'characters': char_count }# 使用示例# stats = analyze_text_file("data.txt")
3. 查找和替换
def find_and_replace(file_path, search_text, replace_text): """ 在文件中查找并替换文本 """ # 先读取全部内容 with open(file_path, 'r', encoding='utf-8') as file: content = file.read() # 执行替换 new_content = content.replace(search_text, replace_text) # 写回文件 if new_content != content: with open(file_path, 'w', encoding='utf-8') as file: file.write(new_content) print(f"已替换 '{search_text}' -> '{replace_text}'") return True else: print(f"未找到 '{search_text}'") return False# 使用示例# find_and_replace("config.txt", "localhost", "127.0.0.1")
4. 大文件逐行处理
def process_large_file(file_path, processor_func): """ 逐行处理大文件(内存友好) Args: file_path: 文件路径 processor_func: 处理每一行的函数 """ processed_count = 0 error_count = 0 with open(file_path, 'r', encoding='utf-8') as file: for line_num, line in enumerate(file, 1): try: result = processor_func(line.strip()) if result: processed_count += 1 # 每处理1000行显示进度 if line_num % 1000 == 0: print(f"已处理 {line_num} 行...") except Exception as e: error_count += 1 print(f"第 {line_num} 行处理错误: {e}") print(f"\n处理完成!") print(f"总行数: {line_num}") print(f"成功处理: {processed_count}") print(f"错误数: {error_count}")# 使用示例# def process_line(line):# # 处理每一行# if line:# return line.upper()# return None# # process_large_file("large_data.txt", process_line)
5. CSV 文件处理
import csvdef read_csv_file(file_path): """ 读取 CSV 文件 """ data = [] with open(file_path, 'r', encoding='utf-8') as file: # 使用 csv.reader reader = csv.reader(file) headers = next(reader) # 读取表头 print(f"列名: {headers}") for row in reader: data.append(row) print(f"共读取 {len(data)} 行数据") return datadef write_csv_file(file_path, headers, data): """ 写入 CSV 文件 """ with open(file_path, 'w', encoding='utf-8', newline='') as file: writer = csv.writer(file) writer.writerow(headers) writer.writerows(data) print(f"已写入 {len(data)} 行数据到 {file_path}")def dict_csv_file(file_path): """ 使用字典方式处理 CSV """ with open(file_path, 'r', encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: print(f"行数据: {row}")# 使用示例# data = [# ['Alice', 25, 'Engineer'],# ['Bob', 30, 'Designer'],# ['Charlie', 35, 'Manager']# ]# write_csv_file("employees.csv", ['Name', 'Age', 'Job'], data)# read_csv_file("employees.csv")
6. JSON 文件处理
import jsondef save_json(file_path, data): """ 保存数据为 JSON 文件 """ with open(file_path, 'w', encoding='utf-8') as file: json.dump(data, file, ensure_ascii=False, indent=2) print(f"已保存到 {file_path}")def load_json(file_path): """ 加载 JSON 文件 """ with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) print(f"已加载 {file_path}") return datadef append_json(file_path, new_data): """ 追加数据到 JSON 文件(如果文件存在) """ try: # 读取现有数据 existing_data = load_json(file_path) except FileNotFoundError: existing_data = [] # 追加新数据 if isinstance(existing_data, list): existing_data.append(new_data) else: existing_data = [existing_data, new_data] # 保存 save_json(file_path, existing_data)# 使用示例# config = {# "name": "MyApp",# "version": "1.0.0",# "debug": True,# "database": {# "host": "localhost",# "port": 3306# }# }# save_json("config.json", config)# loaded = load_json("config.json")
九、错误处理
1. 文件操作常见异常
def safe_file_operation(): """安全的文件操作,处理常见异常""" try: with open("nonexistent.txt", "r", encoding="utf-8") as file: content = file.read() except FileNotFoundError as e: print(f"文件不存在: {e}") except PermissionError as e: print(f"权限不足: {e}") except UnicodeDecodeError as e: print(f"编码错误: {e}") except IOError as e: print(f"I/O 错误: {e}") except Exception as e: print(f"其他错误: {e}") else: print("文件读取成功") finally: print("文件操作完成")# 带重试的文件操作import timedef retry_file_operation(file_path, max_retries=3): """重试文件操作""" for attempt in range(max_retries): try: with open(file_path, 'r', encoding='utf-8') as file: return file.read() except FileNotFoundError as e: if attempt == max_retries - 1: raise print(f"文件不存在,等待后重试... ({attempt + 1}/{max_retries})") time.sleep(1) except PermissionError as e: if attempt == max_retries - 1: raise print(f"权限不足,等待后重试... ({attempt + 1}/{max_retries})") time.sleep(2) return None
十、总结
文件操作模式速查表
最佳实践
# 1. 始终使用 with 语句with open("file.txt", "r") as f: content = f.read() # 自动关闭文件# 2. 指定编码(特别是文本文件)with open("file.txt", "r", encoding="utf-8") as f: content = f.read()# 3. 处理大文件时逐行处理with open("large.txt", "r") as f: for line in f: process(line) # 内存友好# 4. 使用异常处理try: with open("file.txt", "r") as f: content = f.read()except FileNotFoundError: print("文件不存在")# 5. 使用路径库处理路径from pathlib import Pathfile_path = Path("data") / "config.txt"with open(file_path, "r") as f: content = f.read()
掌握文件基本操作是 Python 编程的基础技能,通过合理使用不同的打开模式和操作方法,可以高效地处理各种文件操作需求。