数据持久化的最简单类型。仅仅是在一个文件名下的字节流,把数据从文件写入内存,从内存写入文件。
f = open(filename,mode,buffering)
f:open()返回的文件对象
filename:文件的字符串名
mode:可选参数,打开模式和文件类型
buffering:可选,文件的缓冲区,默认值为-1
mode = 控制“怎么操作文件
f = open("a.txt", "w")f.write("hello")
#示例f.write("hello")f.write("world")#文件内容helloworld
想换行要自己写
f.writelines(["a\n", "b\n", "c\n"])
👉一次写入多个字符串(列表)
f = open("a.txt", "r")content = f.read()print(content)
特点:返回类型:str;读取全部内容;文件大时可能占内存
f = open("a.txt", "r")line = f.readline()print(line)
特点:返回字符串;每次一行;包含 \n
f = open("a.txt", "r")lines = f.readlines()print(lines)
特点:返回list[str];每一行是一个元素;内存占用比 readline 高
关闭的作用是终止对外部文件的连接,同时将缓存区的数据刷新到硬盘上。- 使用上下文管理器(context manager)
确保在退出后自动关闭文件--with 语句通过上下文管理器机制,保证文件在使用结束后自动关闭,避免资源泄漏with open("a.txt", "r") as f: content = f.read() print(content)
三、结构化文本文件:CSV
CSV(Comma-Separated Values)
#示例name,age,cityAlice,20,SeattleBob,25,NYC
值没有类型,所有值都是字符串;不能指定字体颜色等样式;不能指定单元格的宽高,不能合并单元格;没有多个工作表;不能嵌入图像图表。import csvwith open("a.csv", "r") as f: reader = csv.reader(f) for row in reader: print(row)
#输出['name', 'age', 'city']['Alice', '20', 'Seattle']['Bob', '25', 'NYC']
与reader类似,但返回的每一个单元格都放在一个元组的值内。import csvwith open("a.csv", "r") as f: reader = csv.DictReader(f) for row in reader: print(row)
#输出{'name': 'Alice', 'age': '20', 'city': 'Seattle'}{'name': 'Bob', 'age': '25', 'city': 'NYC'}
import csvwith open("a.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerow(["name", "age", "city"]) writer.writerow(["Alice", 20, "Seattle"])
import csvwith open("a.csv", "w", newline="") as f: fieldnames = ["name", "age", "city"] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerow({"name": "Alice", "age": 20, "city": "Seattle"}) writer.writerow({"name": "Bob", "age": 25, "city": "NYC"})
四、结构化文本文件:Excel
用来读写扩展名为xlsx/xlsm/xltx/xltm的文件一个workbook对象代表一个Excel文档,使用该方法创建一个worksheet对象后才能打开一个表import openpyxlwb = openpyxl.Workbook()ws = wb.activesheet.title = "学生信息"
#方法1:指定坐标sheet["A1"] = "姓名"sheet["B1"] = "年龄"sheet["A2"] = "Alice"sheet["B2"] = 20
#方法2:row/columnsheet.cell(row=3, column=1, value="Bob")sheet.cell(row=3, column=2, value=25)
wb = openpyxl.load_workbook("students.xlsx")
import openpyxl# 创建Excelwb = openpyxl.Workbook()sheet = wb.activesheet["A1"] = "姓名"sheet["B1"] = "年龄"sheet["A2"] = "Alice"sheet["B2"] = 20wb.save("students.xlsx")# 读取Excelwb2 = openpyxl.load_workbook("students.xlsx")sheet2 = wb2.activeprint(sheet2["A2"].value)print(sheet2["B2"].value)
#获取单元格内容print(sheet["A1"].value)#使用 row / column 获取print(sheet.cell(row=2, column=1).value)
cell.coordinate:返回单元格左标
cell.value:返回单元格的值
cell.row:返回单元格所在的行坐标
cell.column:返回单元格列坐标
五、结构化文本文件:PDF
#1️⃣打开 PDFfrom pypdf import PdfReaderreader = PdfReader("a.pdf")#2️⃣获取页数print(len(reader.pages))#3️⃣读取某一页page = reader.pages[0]#4️⃣提取文本text = page.extract_text()print(text)
#1️⃣创建写对象from pypdf import PdfWriterwriter = PdfWriter()#2️⃣添加页面writer.add_page(page)#3️⃣保存文件with open("new.pdf", "wb") as f: writer.write(f)
from pypdf import PdfReader, PdfWriterwriter = PdfWriter()for file in ["a.pdf", "b.pdf"]: reader = PdfReader(file) for page in reader.pages: writer.add_page(page)withopen("merged.pdf", "wb") as f: writer.write(f)
#1️⃣获取页面page = reader.pages[0]#2️⃣读取文本print(page.extract_text())#3️⃣旋转页面page.rotate(90)#4️⃣添加到 writerwriter.add_page(page)