示例import osimport streamlit as stimport requestsfrom dotenv import load_dotenvload_dotenv()defcall_llm_stream(messages):"""流式调用大模型API""" api_key = os.getenv("API_KEY") base_url = os.getenv("BASE_URL") url = base_url + "/chat/completions" headers = {"Authorization": f"Bearer {api_key}","Content-Type": "application/json" } data = {"model": "glm-4-flash","messages": messages,"temperature": 0.7,"stream": True# 🔑 关键:开启流式 } response = requests.post(url, headers=headers, json=data, stream=True, timeout=60)# 逐行读取 SSE 数据for line in response.iter_lines():if line: line = line.decode("utf-8")if line.startswith("data: "): data_str = line[6:] print('data_str:',data_str)if data_str.strip() == "[DONE]":breaktry:import json chunk = json.loads(data_str) content = chunk["choices"][0].get("delta", {}).get("content", "")if content:yield content # 用生成器逐字产出except json.JSONDecodeError:continuest.title("💬 智能聊天助手")if"messages"notin st.session_state: st.session_state.messages = []with st.sidebar: system_info = st.text_input("AI 角色设定:")if system_info: st.session_state.messages.insert(0, {"role": "system", "content": system_info})if st.button("清空对话"): st.session_state.messages = [] st.rerun() # 重新运行页面# 显示历史消息for msg in st.session_state.messages:if msg["role"] != "system":with st.chat_message(msg["role"]): st.write(msg["content"])if prompt := st.chat_input("请输入问题:"):# 显示用户消息 st.session_state.messages.append({"role": "user", "content": prompt})with st.chat_message("user"): st.write(prompt)# AI流式回复with st.chat_message("assistant"): response = st.write_stream(call_llm_stream(st.session_state.messages)) # type: ignore st.session_state.messages.append({"role": "assistant", "content": response})