基于 PyQt5 的 AI 数字人桌面应用,集成通义千问对话 + edge-tts 语音播报 + 卡通形象动画。
作者信息
运行
pip install PyQt5 edge-tts requests pygamepython ysp/code/digital_human.py
界面布局
+---------------------------+--------------------------------------+| | 对话 || 卡通数字人形象 | +----------------------------------+|| (实时动画) | | 你: 你好 ||| | | AI: 你好!有什么可以帮你的? ||| - 呼吸动画 | | 你: Python怎么学 ||| - 眨眼 | | AI: 建议从基础语法开始... ||| - 说话嘴型 | +----------------------------------+|| - 思考省略号 | || - 声波特效 | [输入消息,按回车发送...] [发送] [停][清]|| | || [设置] | 就绪 | 公众号: Python学在坚持 || 人设: [智能助手] +--------------------------------------+| 语音: [晓晓] || 语速: [====0%====] || [x] 自动语音播报 |+---------------------------+
功能说明
AI 对话
- 接入阿里通义千问 API(qwen-plus 模型)
- 5 种人设可切换:智能助手、程序员导师、心灵鸡汤、百科全书、段子手
语音播报
- 5 种语音:晓晓(女温柔)、云希(男阳光)、云健(男沉稳)、晓伊(女活泼)、云扬(男新闻)
数字人动画
纯 QPainter 绘制的卡通形象,20fps 实时动画,包含 6 种动画效果:
代码架构
digital_human.py+-- AI_CONFIG / PERSONAS # AI配置和人设定义+-- ChatWorker(QThread) # AI对话异步线程| +-- 调用通义千问API| +-- finished信号返回回复+-- TTSWorker(QThread) # TTS语音合成异步线程| +-- 调用edge-tts合成mp3| +-- finished信号返回文件路径+-- AudioPlayer # pygame音频播放封装| +-- play/stop/is_playing+-- AvatarWidget(QWidget) # 数字人形象动画控件| +-- paintEvent() # QPainter绘制卡通形象| +-- _animate() # 50ms定时器驱动动画| +-- set_speaking(bool) # 切换说话状态| +-- set_mood(str) # 切换心情(normal/happy/thinking)+-- DigitalHumanApp(QMainWindow) # 主窗口 +-- _send() # 发送消息 -> ChatWorker +-- _on_reply() # 收到回复 -> 显示 + TTS +-- _speak() # 启动TTS合成 +-- _on_tts_done() # 合成完成 -> 播放 +-- _poll_audio() # 200ms轮询播放状态 -> 控制动画
数据流
用户输入文字 | vChatWorker [子线程] | 调用通义千问API v收到AI回复 | 1. 显示在对话区 | 2. 数字人切换"开心"表情 vTTSWorker [子线程] | 调用edge-tts合成mp3 | 数字人切换"说话"动画 vAudioPlayer 播放mp3 | 200ms轮询播放状态 | 播放中: 数字人嘴巴动+声波+手臂摆 v播放结束 | 数字人恢复"正常"状态
关键代码解析
1. 卡通形象绘制(AvatarWidget.paintEvent)
整个数字人形象用 QPainter 逐层绘制,从下到上:
defpaintEvent(self, event):# 1. 背景渐变(浅蓝)# 2. 身体(蓝色圆角矩形 + 渐变)# 3. 头部(肤色圆形 + 径向渐变模拟立体感)# 4. 头发(贝塞尔曲线路径)# 5. 眼睛(眼白椭圆 + 瞳孔 + 高光点)# - 眨眼时画横线替代眼睛# - 瞳孔随时间左右微移# 6. 嘴巴# - 说话时:椭圆开合(mouth_open 控制高度)# - 安静时:贝塞尔曲线微笑# 7. 手臂(椭圆,说话时上下摆动)# 8. 名牌(白色圆角矩形 + "AI助手"文字)# 9. 状态特效# - 说话:头顶3层声波弧线# - 思考:头顶3个弹跳圆点
呼吸动画核心:所有部件的 Y 坐标加上 sin(tick * 0.05) * 3 的偏移,产生整体浮动效果。
2. 动画状态机
def_animate(self): self._tick += 1# 眨眼:每80帧(4秒)触发,持续4帧(0.2秒) self._blink = (self._tick % 80 < 4)# 说话嘴型:正弦波控制开合幅度if self._speaking: self._mouth_open = abs(sin(tick * 0.3)) * 0.8else: self._mouth_open = max(0, self._mouth_open - 0.1) # 渐闭 self.update() # 触发重绘
50ms 定时器(20fps)驱动,通过 _tick 计数器控制所有动画的周期和相位。
3. 异步对话 + 语音流水线
def_send(self):# 1. 数字人进入"思考"状态(头顶省略号) self.avatar.set_mood("thinking")# 2. 子线程调用AI API self.chat_worker = ChatWorker(history, persona) self.chat_worker.finished.connect(self._on_reply) self.chat_worker.start()def_on_reply(self, reply):# 3. 数字人切换"开心"(腮红) self.avatar.set_mood("happy")# 4. 自动启动TTS self.tts_worker = TTSWorker(reply, voice, rate) self.tts_worker.finished.connect(self._on_tts_done) self.avatar.set_speaking(True) # 开始说话动画def_on_tts_done(self, path):# 5. 播放音频 self.player.play(path)def_poll_audio(self):# 6. 200ms轮询,播放结束后恢复正常ifnot self.player.is_playing(): self.avatar.set_speaking(False) self.avatar.set_mood("normal")
三个异步操作串联:AI对话(QThread) -> TTS合成(QThread) -> 音频播放(pygame),全程不阻塞GUI。
人设配置
语音列表
依赖
PyQt5 # GUI框架edge-tts # 微软TTS语音合成(需联网)requests # HTTP请求(调用AI API)pygame # 音频播放
注意事项
- API Key 在代码顶部
AI_CONFIG 中配置 - edge-tts 首次使用可能需要几秒下载语音模型
-- coding: utf-8 --"""数字人助手 v1.0 - PyQt5功能:AI对话 + TTS语音播报 + 虚拟形象动画作者:杨少平 | 公众号:Python学在坚持运行:python digital_human.py依赖:pip install PyQt5 edge-tts requests pygame"""import sys, os, json, asyncio, threading, time, tempfile, random, mathfrom datetime import datetimefrom PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,QLabel, QLineEdit, QPushButton, QTextEdit, QComboBox, QSlider,QGroupBox, QFrame, QFileDialog, QMessageBox, QSplitter)from PyQt5.QtCore import Qt, QTimer, pyqtSignal, QThread, QPointF, QRectFfrom PyQt5.QtGui import (QFont, QColor, QPainter, QPen, QBrush, QRadialGradient,QLinearGradient, QPainterPath, QPixmap, QImage)==================== AI 对话 ====================AI_CONFIG = {"api_key": "sk-自己的key","base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1","model": "qwen-plus",}PERSONAS = {"智能助手": "你是一个友好的AI助手,回答简洁有趣,每次回复控制在100字以内。","程序员导师": "你是一个资深程序员导师,用通俗易懂的方式解释技术问题,回复控制在150字以内。","心灵鸡汤": "你是一个温暖的心灵导师,给人鼓励和正能量,回复控制在80字以内,带点幽默。","百科全书": "你是一个知识渊博的百科全书,回答准确简洁,控制在120字以内。","段子手": "你是一个幽默的段子手,回复风趣搞笑,控制在80字以内。",}class ChatWorker(QThread):finished = pyqtSignal(str)def __init__(self, messages, system): super().__init__() self.messages = messages self.system = systemdef run(self): try: import requests headers = {"Authorization": f"Bearer {AI_CONFIG['api_key']}", "Content-Type": "application/json"} msgs = [{"role": "system", "content": self.system}] + self.messages data = {"model": AI_CONFIG["model"], "messages": msgs, "max_tokens": 500} resp = requests.post(f"{AI_CONFIG['base_url']}/chat/completions", headers=headers, json=data, timeout=30) if resp.status_code == 200: self.finished.emit(resp.json()["choices"][0]["message"]["content"]) else: self.finished.emit(f"[API错误 {resp.status_code}]") except Exception as e: self.finished.emit(f"[请求失败: {e}]")==================== TTS 语音 ====================class TTSWorker(QThread):finished = pyqtSignal(str) # 音频文件路径def __init__(self, text, voice="zh-CN-XiaoxiaoNeural", rate=0): super().__init__() self.text = text self.voice = voice self.rate = ratedef run(self): try: import edge_tts path = os.path.join(tempfile.gettempdir(), "dh_tts.mp3") rate_str = f"{'+'if self.rate >= 0else''}{self.rate}%" loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) comm = edge_tts.Communicate(self.text, self.voice, rate=rate_str) loop.run_until_complete(comm.save(path)) loop.close() self.finished.emit(path) except Exception as e: self.finished.emit("")class AudioPlayer:def init(self):self._ok = Falsetry:import pygamepygame.mixer.init()self._ok = Trueexcept Exception:passdef play(self, path): if not self._ok or not path: return try: import pygame pygame.mixer.music.load(path) pygame.mixer.music.play() except Exception: passdef is_playing(self): if not self._ok: return False import pygame return pygame.mixer.music.get_busy()def stop(self): if not self._ok: return import pygame pygame.mixer.music.stop()==================== 数字人形象(动画绘制) ====================class AvatarWidget(QWidget):"""卡通数字人形象,带呼吸/眨眼/说话动画"""def __init__(self): super().__init__() self.setMinimumSize(280, 400) self._tick = 0 self._speaking = False self._blink = False self._mood = "normal" # normal, happy, thinking self._mouth_open = 0.0 self._bg_hue = 200 self.timer = QTimer() self.timer.timeout.connect(self._animate) self.timer.start(50) # 20fpsdef set_speaking(self, on): self._speaking = ondef set_mood(self, mood): self._mood = mooddef _animate(self): self._tick += 1 # 眨眼:每80帧眨一次,持续4帧 if self._tick % 80 < 4: self._blink = True else: self._blink = False # 说话时嘴巴动 if self._speaking: self._mouth_open = abs(math.sin(self._tick * 0.3)) * 0.8 else: self._mouth_open = max(0, self._mouth_open - 0.1) self.update()def paintEvent(self, event): p = QPainter(self) p.setRenderHint(QPainter.Antialiasing) w, h = self.width(), self.height() cx, cy = w // 2, h // 2 # 背景渐变 grad = QLinearGradient(0, 0, 0, h) grad.setColorAt(0, QColor(230, 240, 255)) grad.setColorAt(1, QColor(200, 220, 250)) p.fillRect(0, 0, w, h, grad) # 呼吸动画偏移 breath = math.sin(self._tick * 0.05) * 3 # ---- 身体 ---- body_y = cy + 80 + breath # 身体(圆角矩形) body_grad = QLinearGradient(cx - 60, body_y, cx + 60, body_y + 120) body_grad.setColorAt(0, QColor(70, 130, 230)) body_grad.setColorAt(1, QColor(50, 100, 200)) p.setBrush(body_grad) p.setPen(Qt.NoPen) body_path = QPainterPath() body_path.addRoundedRect(QRectF(cx - 55, body_y, 110, 130), 20, 20) p.drawPath(body_path) # ---- 头 ---- head_y = cy - 40 + breath head_r = 65 # 头(圆形) head_grad = QRadialGradient(cx, head_y, head_r) head_grad.setColorAt(0, QColor(255, 220, 185)) head_grad.setColorAt(1, QColor(240, 195, 155)) p.setBrush(head_grad) p.setPen(QPen(QColor(200, 160, 120), 2)) p.drawEllipse(QPointF(cx, head_y), head_r, head_r * 1.05) # ---- 头发 ---- p.setBrush(QColor(50, 40, 30)) p.setPen(Qt.NoPen) hair = QPainterPath() hair.moveTo(cx - 60, head_y - 20) hair.quadTo(cx - 70, head_y - 75, cx, head_y - 80) hair.quadTo(cx + 70, head_y - 75, cx + 60, head_y - 20) hair.quadTo(cx + 50, head_y - 50, cx, head_y - 55) hair.quadTo(cx - 50, head_y - 50, cx - 60, head_y - 20) p.drawPath(hair) # ---- 眼睛 ---- eye_y = head_y - 5 for ex in [cx - 22, cx + 22]: if self._blink: # 闭眼 p.setPen(QPen(QColor(60, 40, 20), 2)) p.drawLine(int(ex - 8), int(eye_y), int(ex + 8), int(eye_y)) else: # 眼白 p.setBrush(QColor(255, 255, 255)) p.setPen(QPen(QColor(60, 40, 20), 2)) p.drawEllipse(QPointF(ex, eye_y), 10, 12) # 瞳孔 p.setBrush(QColor(40, 30, 20)) p.setPen(Qt.NoPen) pupil_offset = math.sin(self._tick * 0.02) * 2 p.drawEllipse(QPointF(ex + pupil_offset, eye_y + 1), 5, 6) # 高光 p.setBrush(QColor(255, 255, 255)) p.drawEllipse(QPointF(ex + 3, eye_y - 3), 2, 2) # 心情表情 if self._mood == "happy": # 腮红 p.setBrush(QColor(255, 150, 150, 80)) p.setPen(Qt.NoPen) p.drawEllipse(QPointF(cx - 40, head_y + 10), 12, 8) p.drawEllipse(QPointF(cx + 40, head_y + 10), 12, 8) # ---- 嘴巴 ---- mouth_y = head_y + 25 p.setPen(QPen(QColor(200, 80, 80), 2)) if self._mouth_open > 0.1: # 说话:张嘴 p.setBrush(QColor(180, 60, 60)) mouth_h = 5 + self._mouth_open * 12 p.drawEllipse(QRectF(cx - 12, mouth_y - mouth_h / 2, 24, mouth_h)) else: # 微笑 mouth_path = QPainterPath() mouth_path.moveTo(cx - 15, mouth_y) mouth_path.quadTo(cx, mouth_y + 10, cx + 15, mouth_y) p.setBrush(Qt.NoBrush) p.drawPath(mouth_path) # ---- 手臂 ---- p.setPen(Qt.NoPen) p.setBrush(QColor(255, 210, 175)) arm_wave = math.sin(self._tick * 0.08) * 5 if self._speaking else 0 # 左臂 p.drawEllipse(QPointF(cx - 70, body_y + 40 + arm_wave), 15, 25) # 右臂 p.drawEllipse(QPointF(cx + 70, body_y + 40 - arm_wave), 15, 25) # ---- 名牌 ---- p.setPen(Qt.NoPen) p.setBrush(QColor(255, 255, 255, 200)) p.drawRoundedRect(QRectF(cx - 30, body_y + 15, 60, 20), 4, 4) p.setPen(QColor(50, 100, 200)) p.setFont(QFont("Microsoft YaHei", 8)) p.drawText(QRectF(cx - 30, body_y + 15, 60, 20), Qt.AlignCenter, "AI助手") # ---- 状态指示 ---- if self._speaking: # 说话时头顶有声波 for i in range(3): alpha = 150 - i * 40 r = 15 + i * 12 + math.sin(self._tick * 0.2 + i) * 5 p.setPen(QPen(QColor(70, 130, 230, alpha), 2)) p.setBrush(Qt.NoBrush) p.drawArc(QRectF(cx - r, head_y - 90 - r / 2, r * 2, r), 30 * 16, 120 * 16) if self._mood == "thinking": # 思考时头顶有省略号 p.setPen(Qt.NoPen) p.setBrush(QColor(100, 100, 100)) for i in range(3): dot_x = cx - 15 + i * 15 bounce = abs(math.sin(self._tick * 0.1 + i * 0.5)) * 8 p.drawEllipse(QPointF(dot_x, head_y - 85 - bounce), 4, 4) p.end()==================== 主窗口 ====================VOICES = [("zh-CN-XiaoxiaoNeural", "晓晓(女-温柔)"),("zh-CN-YunxiNeural", "云希(男-阳光)"),("zh-CN-YunjianNeural", "云健(男-沉稳)"),("zh-CN-XiaoyiNeural", "晓伊(女-活泼)"),("zh-CN-YunyangNeural", "云扬(男-新闻)"),]class DigitalHumanApp(QMainWindow):def init(self):super().init()self.setWindowTitle("🤖 数字人助手 v1.0 | Python学在坚持")self.setMinimumSize(900, 600)self.resize(1050, 680)self.player = AudioPlayer()self.chat_history = []self.chat_worker = Noneself.tts_worker = Noneself.auto_speak = Trueself._build()def _build(self): central = QWidget() self.setCentralWidget(central) root = QHBoxLayout(central) root.setContentsMargins(8, 8, 8, 8) root.setSpacing(8) # ---- 左侧:数字人形象 + 设置 ---- left = QWidget() left.setMaximumWidth(300) ll = QVBoxLayout(left) ll.setContentsMargins(0, 0, 0, 0) ll.setSpacing(6) self.avatar = AvatarWidget() ll.addWidget(self.avatar, 1) # 设置 g = QGroupBox("设置") gl = QVBoxLayout() gl.setSpacing(4) gl.addWidget(QLabel("人设:")) self.cb_persona = QComboBox() self.cb_persona.addItems(list(PERSONAS.keys())) gl.addWidget(self.cb_persona) gl.addWidget(QLabel("语音:")) self.cb_voice = QComboBox() for vid, vname in VOICES: self.cb_voice.addItem(vname) gl.addWidget(self.cb_voice) rate_row = QHBoxLayout() rate_row.addWidget(QLabel("语速:")) self.sl_rate = QSlider(Qt.Horizontal) self.sl_rate.setRange(-50, 50) self.sl_rate.setValue(0) self.lbl_rate = QLabel("0%") self.sl_rate.valueChanged.connect(lambda v: self.lbl_rate.setText(f"{v:+d}%")) rate_row.addWidget(self.sl_rate, 1) rate_row.addWidget(self.lbl_rate) gl.addLayout(rate_row) from PyQt5.QtWidgets import QCheckBox self.chk_speak = QCheckBox("自动语音播报") self.chk_speak.setChecked(True) self.chk_speak.stateChanged.connect(lambda s: setattr(self, 'auto_speak', s == Qt.Checked)) gl.addWidget(self.chk_speak) g.setLayout(gl) ll.addWidget(g) root.addWidget(left) # ---- 右侧:对话区 ---- right = QWidget() rl = QVBoxLayout(right) rl.setContentsMargins(0, 0, 0, 0) rl.setSpacing(6) rl.addWidget(QLabel("💬 对话")) self.chat_display = QTextEdit() self.chat_display.setReadOnly(True) self.chat_display.setFont(QFont("Microsoft YaHei", 11)) self.chat_display.setStyleSheet(""" QTextEdit { background: #f8f9fa; border: 1px solid #e0e0e0; border-radius: 8px; padding: 12px; } """) rl.addWidget(self.chat_display, 1) # 输入区 input_row = QHBoxLayout() self.ed_input = QLineEdit() self.ed_input.setFont(QFont("Microsoft YaHei", 12)) self.ed_input.setPlaceholderText("输入消息,按回车发送...") self.ed_input.setStyleSheet("padding:10px;border:2px solid #1976d2;border-radius:8px;") self.ed_input.returnPressed.connect(self._send) input_row.addWidget(self.ed_input, 1) self.btn_send = QPushButton("发送") self.btn_send.setStyleSheet("padding:10px 20px;background:#1976d2;color:#fff;border:none;border-radius:8px;font-size:14px;font-weight:bold;") self.btn_send.clicked.connect(self._send) input_row.addWidget(self.btn_send) btn_stop = QPushButton("⏹") btn_stop.setFixedSize(40, 40) btn_stop.setStyleSheet("background:#d32f2f;color:#fff;border:none;border-radius:20px;font-size:16px;") btn_stop.setToolTip("停止语音") btn_stop.clicked.connect(self._stop_speak) input_row.addWidget(btn_stop) btn_clear = QPushButton("🗑") btn_clear.setFixedSize(40, 40) btn_clear.setStyleSheet("background:#757575;color:#fff;border:none;border-radius:20px;font-size:16px;") btn_clear.setToolTip("清空对话") btn_clear.clicked.connect(self._clear) input_row.addWidget(btn_clear) rl.addLayout(input_row) # 状态 self.lbl_status = QLabel("就绪 | 公众号: Python学在坚持") self.lbl_status.setStyleSheet("color:#888;font-size:11px;") rl.addWidget(self.lbl_status) root.addWidget(right, 1) # 语音播放状态轮询 self.poll_timer = QTimer() self.poll_timer.timeout.connect(self._poll_audio) self.poll_timer.start(200)def _send(self): text = self.ed_input.text().strip() if not text: return if self.chat_worker and self.chat_worker.isRunning(): return self.ed_input.clear() self._append_msg("你", text, "#1976d2") self.chat_history.append({"role": "user", "content": text}) # 数字人进入思考状态 self.avatar.set_mood("thinking") self.lbl_status.setText("思考中...") self.btn_send.setEnabled(False) persona = PERSONAS[self.cb_persona.currentText()] self.chat_worker = ChatWorker(list(self.chat_history), persona) self.chat_worker.finished.connect(self._on_reply) self.chat_worker.start()def _on_reply(self, reply): self.btn_send.setEnabled(True) self.avatar.set_mood("happy") self.chat_history.append({"role": "assistant", "content": reply}) self._append_msg("AI", reply, "#388e3c") self.lbl_status.setText("回复完成") # 自动语音播报 if self.auto_speak and reply and not reply.startswith("["): self._speak(reply)def _speak(self, text): voice_idx = self.cb_voice.currentIndex() voice_id = VOICES[voice_idx][0] if 0 <= voice_idx < len(VOICES) else VOICES[0][0] rate = self.sl_rate.value() self.tts_worker = TTSWorker(text, voice_id, rate) self.tts_worker.finished.connect(self._on_tts_done) self.tts_worker.start() self.avatar.set_speaking(True) self.lbl_status.setText("语音合成中...")def _on_tts_done(self, path): if path and os.path.exists(path): self.player.play(path) self.lbl_status.setText("播报中...") else: self.avatar.set_speaking(False) self.lbl_status.setText("语音合成失败(需联网)")def _poll_audio(self): if self.player.is_playing(): self.avatar.set_speaking(True) else: if self.avatar._speaking: self.avatar.set_speaking(False) self.avatar.set_mood("normal") self.lbl_status.setText("就绪")def _stop_speak(self): self.player.stop() self.avatar.set_speaking(False) self.avatar.set_mood("normal")def _clear(self): self.chat_history.clear() self.chat_display.clear() self.avatar.set_mood("normal")def _append_msg(self, role, text, color): ts = datetime.now().strftime("%H:%M:%S") self.chat_display.append( f'<div style="margin:6px 0;">' f'<span style="color:{color};font-weight:bold;">{role}</span>' f'<span style="color:#aaa;font-size:10px;margin-left:8px;">{ts}</span>' f'<br/><span style="color:#333;">{text}</span></div>' ) self.chat_display.verticalScrollBar().setValue(self.chat_display.verticalScrollBar().maximum())==================== 启动 ====================if name == "main":app = QApplication(sys.argv)app.setFont(QFont("Microsoft YaHei", 10))win = DigitalHumanApp()win.show()sys.exit(app.exec_())