作为Windows下的Python开发者,你是否经常遇到这样的场景:老板给你一个复杂的Excel表格,要求批量处理数据、生成报表,或者需要定期更新Excel中的数据?手工操作不仅效率低下,还容易出错。
本文将带你深入掌握openpyxl这个强大的Excel操作库,从基础读写到高级应用,让你告别重复的Excel操作,实现真正的办公自动化。无论你是Python新手还是有一定经验的开发者,都能在这里找到实用的解决方案。
在众多Python Excel操作库中,为什么推荐openpyxl?让我们来看看实际场景:
传统方式的痛点:
openpyxl的优势:
首先安装必要的库:
pip install openpyxl pandasfrom openpyxl import Workbookfrom openpyxl.utils import get_column_letter# 创建新的工作簿wb = Workbook()ws = wb.active # 获取活动工作表ws.title = "销售数据"# 设置工作表名称# 写入数据ws['A1'] = '产品名称'ws['B1'] = '销售额'ws['C1'] = '日期'# 批量写入数据data = [ ['手机', 5000, '2024-01-01'], ['电脑', 8000, '2024-01-02'], ['平板', 3000, '2024-01-03']]for row_num, row_data inenumerate(data, start=2):for col_num, value inenumerate(row_data, start=1): ws.cell(row=row_num, column=col_num, value=value)# 保存文件wb.save('销售数据.xlsx')print("✅ Excel文件创建成功!")
from openpyxl import load_workbookdefread_excel_data(filename):"""读取Excel数据的通用函数""" wb = load_workbook(filename) ws = wb.active# 获取所有数据 data = []for row in ws.iter_rows(min_row=2, values_only=True):ifany(row): # 跳过空行 data.append(row)return data# 使用示例data = read_excel_data('销售数据.xlsx')for row in data:print(f"产品: {row[0]}, 销售额: {row[1]}, 日期: {row[2]}")
这是最常见的业务场景,让我们创建一个完整的销售数据处理程序:
from openpyxl import Workbookfrom openpyxl.styles import Font, PatternFill, Border, Side, Alignmentfrom openpyxl.utils import get_column_letterfrom datetime import datetimeimport randomclassSalesReportGenerator:def__init__(self):self.wb = Workbook()self.ws = self.wb.activeself.ws.title = "销售报表"defcreate_header(self):"""创建表头""" headers = ['序号', '产品名称', '销售员', '销售额', '销售日期', '完成率']# 写入表头for col, header inenumerate(headers, 1): cell = self.ws.cell(row=1, column=col, value=header)# 设置表头样式 cell.font = Font(bold=True, color='FFFFFF') cell.fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid') cell.alignment = Alignment(horizontal='center', vertical='center')# 设置列宽 column_widths = [8, 15, 12, 12, 15, 10]for i, width inenumerate(column_widths, 1):self.ws.column_dimensions[get_column_letter(i)].width = widthdefadd_sales_data(self, data):"""添加销售数据"""for row_num, item inenumerate(data, 2):for col_num, value inenumerate(item, 1): cell = self.ws.cell(row=row_num, column=col_num, value=value)# 设置数据样式 cell.alignment = Alignment(horizontal='center', vertical='center')# 销售额列特殊处理if col_num == 4: # 销售额列 cell.number_format = '¥#,##0.00'if value > 8000: cell.fill = PatternFill(start_color='90EE90', end_color='90EE90', fill_type='solid')# 完成率列特殊处理if col_num == 6: # 完成率列 cell.number_format = '0.0%'if value > 0.9: cell.font = Font(color='008000', bold=True)defadd_summary(self, start_row):"""添加汇总信息""" summary_row = start_row + 1# 添加汇总标题self.ws.cell(row=summary_row, column=1, value='汇总统计')self.ws.merge_cells(f'A{summary_row}:F{summary_row}') summary_cell = self.ws.cell(row=summary_row, column=1) summary_cell.font = Font(bold=True, size=14) summary_cell.alignment = Alignment(horizontal='center') summary_cell.fill = PatternFill(start_color='FFE4B5', end_color='FFE4B5', fill_type='solid')# 添加统计公式 total_row = summary_row + 1self.ws.cell(row=total_row, column=3, value='总销售额:')self.ws.cell(row=total_row, column=4, value=f'=SUM(D2:D{start_row})')self.ws.cell(row=total_row, column=4).number_format = '¥#,##0.00' avg_row = total_row + 1self.ws.cell(row=avg_row, column=3, value='平均销售额:')self.ws.cell(row=avg_row, column=4, value=f'=AVERAGE(D2:D{start_row})')self.ws.cell(row=avg_row, column=4).number_format = '¥#,##0.00'defsave_report(self, filename):"""保存报表"""self.wb.save(filename)print(f"✅ 销售报表已保存为: {filename}")# 使用示例defgenerate_sample_data():"""生成示例数据""" products = ['iPhone 15', '华为P60', '小米14', 'iPad Pro', 'MacBook', '华为手表'] sales_people = ['张三', '李四', '王五', '赵六', '钱七'] data = []for i inrange(20): data.append([ i + 1, random.choice(products), random.choice(sales_people), random.randint(3000, 12000), datetime.now().strftime('%Y-%m-%d'), random.uniform(0.7, 1.1) ])return data# 生成报表generator = SalesReportGenerator()generator.create_header()sample_data = generate_sample_data()generator.add_sales_data(sample_data)generator.add_summary(len(sample_data) + 1)generator.save_report('销售报表_自动生成.xlsx')
在实际工作中,经常需要处理不规范的Excel数据:
from openpyxl import load_workbookimport refrom datetime import datetimeclassExcelDataCleaner:def__init__(self, filename):self.filename = filenameself.wb = load_workbook(filename)self.ws = self.wb.activeself.errors = []defclean_phone_numbers(self, column):"""清洗电话号码格式""" phone_pattern = re.compile(r'^1[3-9]\d{9}$')for row inrange(2, self.ws.max_row + 1): cell = self.ws.cell(row=row, column=column)if cell.value:# 移除所有非数字字符 phone = re.sub(r'[^\d]', '', str(cell.value))if phone_pattern.match(phone): cell.value = phone cell.fill = PatternFill(start_color='90EE90', end_color='90EE90', fill_type='solid')else: cell.fill = PatternFill(start_color='FFB6C1', end_color='FFB6C1', fill_type='solid')self.errors.append(f'第{row}行电话号码格式错误: {cell.value}')defvalidate_email(self, column):"""验证邮箱格式""" email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')for row inrange(2, self.ws.max_row + 1): cell = self.ws.cell(row=row, column=column)if cell.value:if email_pattern.match(str(cell.value)): cell.fill = PatternFill(start_color='90EE90', end_color='90EE90', fill_type='solid')else: cell.fill = PatternFill(start_color='FFB6C1', end_color='FFB6C1', fill_type='solid')self.errors.append(f'第{row}行邮箱格式错误: {cell.value}')defremove_duplicates(self, key_column):"""移除重复数据""" seen_values = set() rows_to_delete = []for row inrange(2, self.ws.max_row + 1): key_value = self.ws.cell(row=row, column=key_column).valueif key_value in seen_values: rows_to_delete.append(row)else: seen_values.add(key_value)# 从后往前删除行,避免索引变化for row inreversed(rows_to_delete):self.ws.delete_rows(row)returnlen(rows_to_delete)defgenerate_error_report(self):"""生成错误报告"""ifself.errors:# 创建新的工作表 error_ws = self.wb.create_sheet("数据错误报告") error_ws.cell(row=1, column=1, value="错误信息")for i, error inenumerate(self.errors, 2): error_ws.cell(row=i, column=1, value=error)print(f"⚠️ 发现 {len(self.errors)} 个数据错误,详情请查看'数据错误报告'工作表")else:print("✅ 数据验证通过,未发现错误")defsave_cleaned_data(self, output_filename):"""保存清洗后的数据"""self.wb.save(output_filename)print(f"✅ 清洗后的数据已保存为: {output_filename}")# 使用示例cleaner = ExcelDataCleaner('原始数据.xlsx')cleaner.clean_phone_numbers(3) # 假设电话号码在第3列cleaner.validate_email(4) # 假设邮箱在第4列duplicates_count = cleaner.remove_duplicates(1) # 以第1列为唯一键cleaner.generate_error_report()cleaner.save_cleaned_data('清洗后数据.xlsx')print(f"🗑️ 移除了 {duplicates_count} 条重复数据")
openpyxl还支持创建各种图表,让数据可视化:
from openpyxl import Workbookfrom openpyxl.chart import BarChart, LineChart, ReferenceclassExcelChartGenerator:def__init__(self):self.wb = Workbook()self.ws = self.wb.activeself.ws.title = "销售数据分析"defcreate_sales_chart(self):"""创建销售柱状图"""# 准备数据 months = ['1月', '2月', '3月', '4月', '5月', '6月'] sales_data = [15000, 18000, 22000, 25000, 28000, 32000]# 写入数据self.ws.cell(row=1, column=1, value='月份')self.ws.cell(row=1, column=2, value='销售额')for i, (month, sales) inenumerate(zip(months, sales_data), 2):self.ws.cell(row=i, column=1, value=month)self.ws.cell(row=i, column=2, value=sales)# 创建柱状图 chart = BarChart() chart.type = "col" chart.style = 10 chart.title = "月度销售额统计" chart.y_axis.title = '销售额(元)' chart.x_axis.title = '月份'# 设置数据范围 data = Reference(self.ws, min_col=2, min_row=1, max_row=7, max_col=2) cats = Reference(self.ws, min_col=1, min_row=2, max_row=7) chart.add_data(data, titles_from_data=True) chart.set_categories(cats)# 添加到工作表self.ws.add_chart(chart, "D2")return chartdefcreate_trend_chart(self):"""创建趋势线图"""# 在新的区域添加趋势数据 start_row = 10 days = list(range(1, 31)) trend_data = [1000 + i * 50 + (i % 7) * 200for i in days]self.ws.cell(row=start_row, column=1, value='日期')self.ws.cell(row=start_row, column=2, value='日销售额')for i, (day, sales) inenumerate(zip(days, trend_data)):self.ws.cell(row=start_row + i + 1, column=1, value=f'{day}日')self.ws.cell(row=start_row + i + 1, column=2, value=sales)# 创建折线图 line_chart = LineChart() line_chart.title = "日销售额趋势" line_chart.style = 13 line_chart.y_axis.title = '销售额(元)' line_chart.x_axis.title = '日期' data = Reference(self.ws, min_col=2, min_row=start_row, max_row=start_row + 30, max_col=2) cats = Reference(self.ws, min_col=1, min_row=start_row + 1, max_row=start_row + 30) line_chart.add_data(data, titles_from_data=True) line_chart.set_categories(cats)# 添加到工作表self.ws.add_chart(line_chart, "D25")defsave_chart_report(self, filename):"""保存图表报告"""self.wb.save(filename)print(f"📊 图表报告已保存为: {filename}")# 使用示例chart_gen = ExcelChartGenerator()chart_gen.create_sales_chart()chart_gen.create_trend_chart()chart_gen.save_chart_report('销售数据分析图表.xlsx')
import pandas as pd from openpyxl import load_workbook from openpyxl.styles import Font, PatternFill from openpyxl.utils.dataframe import dataframe_to_rows from openpyxl.workbook import Workbook defpandas_openpyxl_integration(): """演示pandas与openpyxl的集成使用"""# 使用pandas处理数据 df = pd.DataFrame({ '产品': ['A', 'B', 'C', 'D'], '销量': [100, 150, 200, 120], '单价': [50, 80, 60, 90] }) # 计算总金额 df['总金额'] = df['销量'] * df['单价'] # 创建工作簿 wb = Workbook() ws = wb.active # 将DataFrame写入Excel for r in dataframe_to_rows(df, index=False, header=True): ws.append(r) # 使用openpyxl添加格式 for cell in ws[1]: # 表头行 cell.font = Font(bold=True) cell.fill = PatternFill(start_color='CCCCCC', end_color='CCCCCC', fill_type='solid') wb.save('pandas_openpyxl集成示例.xlsx') pandas_openpyxl_integration()from openpyxl.styles import Protection from openpyxl.workbook import Workbook defprotect_excel_sheet(): """演示Excel工作表保护""" wb = Workbook() ws = wb.active # 添加数据 ws['A1'] = '受保护的数据' ws['B1'] = '可编辑区域'# 设置B1为可编辑 ws['B1'].protection = Protection(locked=False) # 保护工作表 ws.protection.sheet = True ws.protection.password = '123456' ws.protection.enable() wb.save('受保护的Excel文件.xlsx') print("🔒 Excel文件已设置保护密码") protect_excel_sheet()通过本文的深入学习,相信你已经掌握了openpyxl的精髓。让我们总结三个关键要点:
1. 🚀 自动化思维:将重复性的Excel操作转化为Python代码,一次编写,终身受益。无论是数据清洗、报表生成还是格式化,都可以通过编程实现。
2. 📊 格式与数据并重:openpyxl不仅能处理数据,更能保持和创建专业的Excel格式。样式设置、条件格式、图表生成,让你的报表更加专业美观。
3. 🔧 实战导向:本文提供的所有代码都来自真实的业务场景。建议你将这些代码模板保存下来,根据实际需求进行修改和扩展。
下一步学习建议:
记住,Python开发的魅力就在于能够将复杂的业务需求转化为简洁优雅的代码。在上位机开发和日常办公中,掌握Excel自动化操作将让你的工作效率成倍提升!
如果这篇文章对你有帮助,别忘了点赞收藏,让更多同行看到这些实用的编程技巧**!有问题欢迎在评论区交流讨论。