from gtts import gTTSimport osimport re# ============ 1. 粘贴你的文本(保持空行段落格式)=============text_content = """
这里放置文本
"""# ====2. 核心逻辑:先按空行拆段落,再拆分长段落(兼容1000+字符====# 步骤1:基础清理(新增:去除「数字」序号括号 + 保留空行/大小写/标点)# 1.1 去除「1」「2」...「37」这类序号括号(核心新增逻辑)clean_text = re.sub(r"「\d+」\n?", "", text_content)# 1.2 保留原有清理逻辑(去除星号)clean_text = re.sub(r'\*', '', clean_text)# 1.3 清理首尾空格,保留中间空行clean_text = clean_text.strip()# 步骤2:按空行拆分你的原始段落raw_paragraphs = re.split(r'\n\s*\n', clean_text)raw_paragraphs = [para.strip() for para in raw_paragraphs if para.strip()]# 步骤3:定义gTTS安全阈值(避免长度超限)SAFE_CHAR_LIMIT = 800 # 单段最大字符数(低于gTTS隐性限制)SAFE_WORD_LIMIT = 250 # 单段最大单词数(双重保障)# 步骤4:拆分长段落(按完整句子拆分,保持语义)final_chunks = [] # 最终用于生成音频的片段(兼容长短段落)for para_idx, para in enumerate(raw_paragraphs):# 检查当前段落是否超限para_char_count = len(para)para_word_count = len(para.split())if para_char_count <= SAFE_CHAR_LIMIT and para_word_count <= SAFE_WORD_LIMIT:# 段落长度合规,直接加入final_chunks.append({"text": para,"desc": f"你的第{para_idx + 1}段(完整)"})else:# 段落超长,按完整句子拆分sentences = re.split(r'\. ', para) # 按“句号+空格”拆句子sentences = [s.strip() + '.' for s in sentences if s.strip()]current_subchunk = ""current_char_count = 0current_word_count = 0for sent in sentences:sent_char = len(sent)sent_word = len(sent.split())# 若加入当前句子会超限,先保存当前子段if (current_char_count + sent_char > SAFE_CHAR_LIMIT) or \(current_word_count + sent_word > SAFE_WORD_LIMIT):if current_subchunk.strip():final_chunks.append({"text": current_subchunk.strip(),"desc": f"你的第{para_idx + 1}段(子段{len(final_chunks) - para_idx + 1})"})current_subchunk = ""current_char_count = 0current_word_count = 0# 加入当前句子current_subchunk += sent + ' 'current_char_count += sent_charcurrent_word_count += sent_word# 加入最后一个子段if current_subchunk.strip():final_chunks.append({"text": current_subchunk.strip(),"desc": f"你的第{para_idx + 1}段(子段{len(final_chunks) - para_idx + 1})"})# 调试:打印拆分结果(核对长段落拆分情况)print(f"📝 你的原始段落数:{len(raw_paragraphs)} 段")print(f"🔪 适配gTTS的最终片段数:{len(final_chunks)} 段")for i, chunk in enumerate(final_chunks):print(f" 片段{i + 1}:{chunk['desc']} | 字符数:{len(chunk['text'])} | 预览:{chunk['text'][:40]}...")# =========== 3. 配置存储路径与生成音频 =====================save_folder = "/Users/用户名/Dcuments/复制电脑文件夹的存储路径"final_audio_name = "英文朗读.mp3"final_audio_path = os.path.join(save_folder, final_audio_name)os.makedirs(save_folder, exist_ok=True)# 逐片段生成音频temp_audio_list = []for chunk_idx, chunk in enumerate(final_chunks):temp_path = os.path.join(save_folder, f"temp_片段{chunk_idx + 1}_{chunk['desc']}.mp3")print(f"\n🎤 生成 {chunk['desc']} 音频...")try:tts = gTTS(text=chunk["text"], lang='en', tld='co.uk', slow=False)tts.save(temp_path)file_size = os.path.getsize(temp_path) / 1024print(f" ✅ 成功 | 大小:{file_size:.1f} KB")temp_audio_list.append(temp_path)except Exception as e:print(f" ❌ 失败:{str(e)}")# ============ 4. 合并音频 + 清理临时文件====================if temp_audio_list:with open(final_audio_path, 'wb') as final_file:for temp_file in temp_audio_list:with open(temp_file, 'rb') as f:final_file.write(f.read())os.remove(temp_file)# 汇总信息total_words = sum([len(chunk["text"].split()) for chunk in final_chunks])audio_duration = round(total_words / 150, 1)final_size = os.path.getsize(final_audio_path) / 1024 / 1024print("\n" + "=" * 70)print("✅ 音频生成完成!兼容1000+字符长段落 ✅")print(f"📂 文件路径:{final_audio_path}")print(f"📖 总单词数:{total_words} | 预估时长:{audio_duration} 分钟")print(f"📦 文件大小:{final_size:.2f} MB")print("🗑️ 临时文件已清理!")print("=" * 70)else:print("\n❌ 无有效音频生成!")