当前位置：首页>python>Python 批量将同样模板的word内容转换成ppt

Python 批量将同样模板的word内容转换成ppt

2026-07-04 14:11:39

Python 批量将同样模板的word内容转换成ppt（代码都有AI生成，思路自己的）

1.利用python找出ppt模板的所有占位符

2.利用python解析所有word的内容

3.用AI写的代码批量生成ppt-过程中出现内容重复3遍的问题，AI死活找不到原因，还是自己找到的原因（word模板合并3个单元格合并为一个的原因）解决的

代码如下

Ppt解析

from pptx import Presentation

# 替换为你的模板文件路径

prs = Presentation(r"D:\claim相关\for board\演示文稿2.pptx")

# 遍历所有幻灯片

for slide_idx, slide in enumerate(prs.slides):

print(f"\n--- 幻灯片 {slide_idx + 1} 的占位符信息 ---")

# 遍历当前幻灯片上的所有占位符

for placeholder in slide.placeholders:

# 获取占位符的索引 (idx)

idx = placeholder.placeholder_format.idx

# 获取占位符的名称 (例如: 'Title 1')

name = placeholder.name

# 获取占位符的类型 (例如: TITLE, BODY, OBJECT)

ph_type = placeholder.placeholder_format.type

print(f" 占位符索引: {idx}, 名称: '{name}', 类型: {ph_type}")

Word 解析

import os

import tempfile

import shutil

from docx import Document

from docx.table import Table

from docx.text.paragraph import Paragraph

def extract_word_content(word_path, extract_specific_fields=True):

"""

从 Word 文档中提取所有内容：文本、表格、图片。

参数:

word_path: Word 文件路径

extract_specific_fields: 是否提取特定的质检字段（客户、款号等），默认为 True

dict 包含:

- full_text: 所有段落文本（按顺序，用换行分隔）

- paragraphs: 段落文本列表

- tables: 表格数据列表（每个表格是一个二维列表）

- images: 提取的图片临时文件路径列表

- specific_fields: (可选) 字典，包含 customer, style_number 等字段

"""

doc = Document(word_path)

# 存储结果

result = {

"full_text": "",

"paragraphs": [],

"tables": [],

"images": []

}

# 1. 提取所有段落文本（保持顺序）

for para in doc.paragraphs:

text = para.text.strip()

if text: # 只保留非空段落

result["paragraphs"].append(text)

result["full_text"] = "\n".join(result["paragraphs"])

# 2. 提取所有表格数据

for table in doc.tables:

table_data = []

for row in table.rows:

row_data = [cell.text.strip() for cell in row.cells]

table_data.append(row_data)

result["tables"].append(table_data)

# 3. 提取所有内嵌图片（保存到临时文件夹）

temp_img_dir = tempfile.mkdtemp(prefix="word_images_")

img_paths = []

for rel in doc.part.rels.values():

if "image" in rel.target_ref.lower():

img_data = rel.target_part.blob

# 生成唯一文件名

img_ext = rel.target_ref.split('.')[-1].lower()

img_name = f"img_{len(img_paths) + 1}.{img_ext}"

img_path = os.path.join(temp_img_dir, img_name)

with open(img_path, "wb") as f:

f.write(img_data)

img_paths.append(img_path)

result["images"] = img_paths

# 注意：临时目录需要调用者自行清理（使用 shutil.rmtree）

# 4. 提取特定质检字段（可选）

if extract_specific_fields:

specific = {

"customer": "",

"style_number": "",

"complain_date": "",

"main_issue": "",

"reason": "",

"corrective_action": ""

}

# 遍历所有表格，查找关键词

for table_data in result["tables"]:

for row in table_data:

if len(row) < 2:

continue

key, value = row[0], row[1]

key_lower = key.lower()

if "customer" in key_lower:

specific["customer"] = value

elif "style number" in key_lower:

specific["style_number"] = value

elif "complain date" in key_lower:

specific["complain_date"] = value

elif "main issue" in key_lower:

specific["main_issue"] = value

elif "reason" in key_lower:

specific["reason"] = value

elif "corrective action" in key_lower:

specific["corrective_action"] = value

result["specific_fields"] = specific

# 附加原始 doc 对象（可选，用于更高级的操作）

result["_doc"] = doc

return result

# 使用示例

if __name__ == "__main__":

word_file = r"D:\claim相关\for board\新建文件夹\1047539_20251201.docx"

content = extract_word_content(word_file)

print("=== 段落文本（前5段）===")

for i, para in enumerate(content["paragraphs"][:5]):

print(f"{i + 1}. {para}")

print("\n=== 表格数据 ===")

for tidx, table in enumerate(content["tables"]):

print(f"表格 {tidx + 1}: {len(table)} 行")

for row in table[:2]: # 只打印前两行

print(" | ".join(row))

print("\n=== 提取的图片 ===")

for img in content["images"]:

print(img)

if "specific_fields" in content:

print("\n=== 质检字段 ===")

for k, v in content["specific_fields"].items():

print(f"{k}: {v}")

# 清理临时图片文件夹（根据需要）

if content["images"]:

img_dir = os.path.dirname(content["images"][0])

shutil.rmtree(img_dir, ignore_errors=True)

Word 转化为ppt

import os

import tempfile

import shutil

from docx import Document

from pptx import Presentation

from pptx.util import Pt

from PIL import Image

# ======================== 配置参数 ========================

PPT_TEMPLATE_PATH = r"D:\claim相关\for board\演示文稿2.pptx"

WORD_FOLDER_PATH = r"D:\claim相关\for board\新建文件夹"

OUTPUT_PPT_FOLDER = r"D:\claim相关\输出PPT"

# 占位符索引（根据你的模板实际情况调整）

IMG_PLACEHOLDER_IDX = 20 # 图片占位符

TABLE2_PLACEHOLDER_IDX = 24 # Word第2个表格内容占位符

TABLE3_PLACEHOLDER_IDX = 13 # Word第3个表格内容占位符

# ======================== 工具函数 ========================

def extract_table_with_merged_cells(table):

"""

从Word表格中提取数据，正确处理合并单元格。

返回二维列表，每个单元格只出现一次（合并后的单元格内容填充到起始位置，其余位置为空字符串）。

"""

# 获取表格的最大行列数（通过遍历所有行和单元格，记录最大列数）

rows = []

max_cols = 0

for row in table.rows:

cells = row.cells

max_cols = max(max_cols, len(cells))

rows.append([cell.text.strip() for cell in cells])

# 补全每行列数（不够的用空字符串填充）

for row in rows:

while len(row) < max_cols:

row.append("")

# 解析合并信息：水平合并（gridSpan）和垂直合并（vMerge）

# 由于 python-docx 不直接提供合并信息，需要通过底层XML获取

tbl_xml = table._tbl

namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}

# 记录被合并覆盖的单元格（行,列）

covered = set()

# 获取所有单元格的合并属性

for row_idx, row in enumerate(table.rows):

for col_idx, cell in enumerate(row.cells):

tc = cell._tc

tcPr = tc.find('.//w:tcPr', namespaces=namespaces)

if tcPr is not None:

# 水平合并：gridSpan

grid_span = tcPr.find('.//w:gridSpan', namespaces=namespaces)

if grid_span is not None:

span = int(grid_span.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 1))

# 标记被合并的后续列

for i in range(1, span):

covered.add((row_idx, col_idx + i))

# 垂直合并：vMerge

v_merge = tcPr.find('.//w:vMerge', namespaces=namespaces)

if v_merge is not None:

val = v_merge.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 'continue')

if val == 'continue':

# 当前单元格是垂直合并的后续部分，标记为覆盖

covered.add((row_idx, col_idx))

# 构建最终数据，跳过被覆盖的单元格

final_data = []

for row_idx in range(len(rows)):

new_row = []

for col_idx in range(max_cols):

if (row_idx, col_idx) in covered:

continue # 跳过被合并的单元格

# 获取单元格文本（可能来自上一行的合并起始单元格，但起始单元格的文本已在对应位置）

if col_idx < len(rows[row_idx]):

new_row.append(rows[row_idx][col_idx])

else:

new_row.append("")

final_data.append(new_row)

# 删除全空的行

final_data = [row for row in final_data if any(cell.strip() for cell in row)]

return final_data

def extract_word_data(word_path):

"""从Word提取图片、第二个表格、第三个表格（正确处理合并单元格）"""

doc = Document(word_path)

# 1. 提取第一张图片

image_path = None

temp_img_dir = tempfile.mkdtemp(prefix="word_img_")

for rel in doc.part.rels.values():

if "image" in rel.target_ref.lower():

img_data = rel.target_part.blob

ext = rel.target_ref.split('.')[-1].lower()

img_filename = f"img_{os.path.basename(word_path).split('.')[0]}.{ext}"

img_path = os.path.join(temp_img_dir, img_filename)

with open(img_path, "wb") as f:

f.write(img_data)

image_path = img_path

break

# 2. 提取所有表格（使用合并单元格处理函数）

tables_data = []

for table in doc.tables:

table_content = extract_table_with_merged_cells(table)

tables_data.append(table_content)

# 第二个表格（索引1），取前5行

table2_content = []

if len(tables_data) >= 2:

table2_content = tables_data[1][:5]

# 第三个表格（索引2），取全部行

table3_content = []

if len(tables_data) >= 3:

table3_content = tables_data[2]

return {

"image_path": image_path,

"temp_dir": temp_img_dir,

"table2": table2_content,

"table3": table3_content

}

def format_table_as_text(table_data, cell_separator="\t", row_separator="\n"):

"""将二维表格数据格式化为纯文本"""

if not table_data:

return "（无数据）"

lines = [cell_separator.join(row) for row in table_data]

return row_separator.join(lines)

def add_picture_with_ratio(slide, image_path, placeholder_idx):

"""插入图片并保持原始比例，完全填充占位符区域（可能裁剪）"""

try:

placeholder = slide.placeholders[placeholder_idx]

left, top, width, height = placeholder.left, placeholder.top, placeholder.width, placeholder.height

with Image.open(image_path) as img:

img_width, img_height = img.size

ratio = max(width / img_width, height / img_height)

new_width = int(img_width * ratio)

new_height = int(img_height * ratio)

left_offset = left + (width - new_width) // 2

top_offset = top + (height - new_height) // 2

sp = placeholder._sp

sp.getparent().remove(sp)

slide.shapes.add_picture(image_path, left_offset, top_offset, new_width, new_height)

return True

except KeyError:

print(f"警告：占位符索引 {placeholder_idx} 不存在，图片插入失败")

return False

except Exception as e:

print(f"图片插入异常：{e}")

return False

def set_placeholder_text(slide, placeholder_idx, text):

"""设置占位符文本，先清空原有内容"""

try:

placeholder = slide.placeholders[placeholder_idx]

text_frame = placeholder.text_frame

text_frame.clear()

p = text_frame.paragraphs[0] if text_frame.paragraphs else text_frame.add_paragraph()

p.text = text

p.font.size = Pt(12)

return True

except KeyError:

print(f"警告：占位符索引 {placeholder_idx} 不存在，文本设置失败")

return False

except Exception as e:

print(f"设置文本异常：{e}")

return False

def fill_ppt_template(template_path, word_data, output_path):

"""填充PPT模板"""

prs = Presentation(template_path)

slide = prs.slides[0]

if word_data["image_path"] and os.path.exists(word_data["image_path"]):

add_picture_with_ratio(slide, word_data["image_path"], IMG_PLACEHOLDER_IDX)

table2_text = format_table_as_text(word_data["table2"])

set_placeholder_text(slide, TABLE2_PLACEHOLDER_IDX, table2_text)

table3_text = format_table_as_text(word_data["table3"])

set_placeholder_text(slide, TABLE3_PLACEHOLDER_IDX, table3_text)

prs.save(output_path)

print(f"✅ 已生成PPT：{output_path}")

# ======================== 主程序 ========================

def main():

os.makedirs(OUTPUT_PPT_FOLDER, exist_ok=True)

for word_file in os.listdir(WORD_FOLDER_PATH):

if not word_file.endswith(".docx") or word_file.startswith("~$"):

continue

word_path = os.path.join(WORD_FOLDER_PATH, word_file)

print(f"\n正在处理：{word_file}")

try:

word_data = extract_word_data(word_path)

# 调试输出（可选）

# print("第三个表格处理后的数据：", word_data["table3"])

ppt_name = f"{os.path.splitext(word_file)[0]}_报告.pptx"

ppt_path = os.path.join(OUTPUT_PPT_FOLDER, ppt_name)

fill_ppt_template(PPT_TEMPLATE_PATH, word_data, ppt_path)

if word_data["temp_dir"] and os.path.exists(word_data["temp_dir"]):

shutil.rmtree(word_data["temp_dir"], ignore_errors=True)

except Exception as e:

print(f"❌ 处理文件 {word_file} 时出错：{e}")

print("\n🎉 所有Word文档已批量转换为PPT！")

if __name__ == "__main__":

main()

本文来自网友投稿或网络内容，如有侵犯您的权益请联系我们删除，联系邮箱：wyl860211@qq.com 。

Python 批量将同样模板的word内容转换成ppt

最新文章

热门文章

随机文章

Python 批量将同样模板的word内容转换成ppt

上班摸鱼写了一篇Python实现自动化办公流程手册

在线安装:Linux装Python,两行命令搞定!手把手教你避开所有坑

最新文章

热门文章

随机文章