Spaces:

KIRA111B
/

ZYS-GuideBot

Sleeping

App Files Files Community

KIRA111B commited on Jul 2

Commit

816e8cd

verified ·

1 Parent(s): bb0d667

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -88

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
@@ -7,68 +7,27 @@ from langchain.chains import RetrievalQA
 from langchain_community.llms import LlamaCpp
 from huggingface_hub import hf_hub_download
 import os
-# --- 1. 配置部分 ---
 VECTOR_STORE_PATH = "vector_store"
 EMBEDDING_MODEL = "BAAI/bge-large-zh-v1.5"
-# 切换到 CapybaraHermes-2.5-Mistral-7B 模型
 GGUF_MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
-# 我们同样选择一个大小适中的4位量化版本
 GGUF_MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf"
-# --- 2. 加载RAG核心管道 ---
-# 将所有耗时操作封装起来，只在应用启动时执行一次
 def load_rag_chain():
     print("开始加载RAG管道...")
-    # 检查向量数据库是否存在
-    if not os.path.exists(VECTOR_STORE_PATH):
-        raise FileNotFoundError(
-            f"错误：向量数据库文件夹 '{VECTOR_STORE_PATH}' 未找到！"
-            "请确保你已经将本地生成的 'vector_store' 文件夹与 'app.py' 一起上传。"
-        )
-    # 加载Embedding模型
-    print(f"--> 正在加载Embedding模型: {EMBEDDING_MODEL}")
-    embeddings = HuggingFaceBgeEmbeddings(
-        model_name=EMBEDDING_MODEL,
-        model_kwargs={'device': 'cpu'},
-        encode_kwargs={'normalize_embeddings': True}
-    )
-    # 加载本地的FAISS向量数据库
-    print(f"--> 正在从 '{VECTOR_STORE_PATH}' 加载向量数据库...")
-    vector_store = FAISS.load_local(
-        VECTOR_STORE_PATH,
-        embeddings,
-        allow_dangerous_deserialization=True
-    )
-    # 从Hugging Face Hub下载GGUF模型文件
-    print(f"--> 开始下载/加载GGUF模型: {GGUF_MODEL_FILE} from {GGUF_MODEL_REPO}")
-    model_path = hf_hub_download(
-        repo_id=GGUF_MODEL_REPO,
-        filename=GGUF_MODEL_FILE,
-        local_dir="models", # 模型会下载到服务器的这个文件夹
-        local_dir_use_symlinks=False
-    )
-    # 初始化LlamaCpp模型加载器
-    print("--> 模型文件准备就绪，正在初始化LlamaCpp...")
-    llm = LlamaCpp(
-        model_path=model_path,
-        n_gpu_layers=0,      # 强制在CPU上运行
-        n_batch=512,         # 批处理大小
-        n_ctx=4096,          # 上下文窗口大小
-        f16_kv=True,         # 对性能有帮助
-        verbose=False        # 设为False以保持日志干净
-    )
-    # 定义Prompt模板
-    prompt_template = """<|im_start|>systemYou are a helpful assistant named "粤小智". Answer the user's question based on the provided "Context".
-Your answer should be in Chinese, clear, and step-by-step if it's an operation guide.
-If you don't know the answer from the context, just say: "抱歉，关于您的问题，我的知识库暂时没有相关信息。". Do not make up answers.
-<|im_end|>
 <|im_start|>user
 Context:
 {context}
@@ -77,45 +36,60 @@ Question:
 {question}<|im_end|>
 <|im_start|>assistant
 """
     PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
-    # 创建完整的RAG问答链
-    qa_chain = RetrievalQA.from_chain_type(
-        llm=llm,
-        chain_type="stuff",
-        retriever=vector_store.as_retriever(search_kwargs={"k": 3}), # 每次检索3个最相关的文档块
-        chain_type_kwargs={"prompt": PROMPT},
-        return_source_documents=False # 线上运行时不返回源文档
-    )
-    print("✅ RAG管道加载完毕，应用准备就绪！")
     return qa_chain
-# --- 3. Gradio应用逻辑 ---
-# 在应用启动时，执行一次加载操作
 RAG_CHAIN = load_rag_chain()
-# 定义与Gradio界面交互的函数
-def predict(message, history):
-    print(f"收到用户消息: '{message}'")
-    if not message:
-        return ""
-    result = RAG_CHAIN.invoke({"query": message})
-    response = result.get('result', "抱歉，处理时出现内部错误。").strip()
-    print(f"模型生成回答: '{response}'")
-    return response
-# --- 4. 搭建并启动Gradio界面 ---
-with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {background: linear-gradient(to right, #74ebd5, #ACB6E5)}") as demo:
     gr.Markdown("# 粤政云服务智能向导 - 我是粤小智 🤖")
-    gr.ChatInterface(
-        predict,
-        title="粤小智客服",
-        description="您好！可以向我提问关于粤政云平台使用的问题。",
-        examples=["我想建个网站，该怎么申请服务器？", "如何重置我的云主机密码？", "我的应用访问变慢了怎么办？"]
     )
-print("正在启动Garamio界面...")
-# 使用queue()可以处理并发请求，让应用更稳定
 demo.launch()

+# app.py (最终稳定版 - 使用 gr.Blocks)
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain_community.llms import LlamaCpp
 from huggingface_hub import hf_hub_download
 import os
+import time
+# --- 1. 配置 (保持不变) ---
 VECTOR_STORE_PATH = "vector_store"
 EMBEDDING_MODEL = "BAAI/bge-large-zh-v1.5"
 GGUF_MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
 GGUF_MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf"
+# --- 2. 加载RAG管道 (保持不变) ---
 def load_rag_chain():
+    # ... (这部分代码和之前完全一样，无需修改) ...
     print("开始加载RAG管道...")
+    embeddings = HuggingFaceBgeEmbeddings(model_name=EMBEDDING_MODEL, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True})
+    if not os.path.exists(VECTOR_STORE_PATH): raise FileNotFoundError(f"错误：向量数据库 '{VECTOR_STORE_PATH}' 不存在！")
+    vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
+    model_path = hf_hub_download(repo_id=GGUF_MODEL_REPO, filename=GGUF_MODEL_FILE, local_dir="models")
+    llm = LlamaCpp(model_path=model_path, n_gpu_layers=0, n_batch=512, n_ctx=4096, f16_kv=True, verbose=False)
+    prompt_template = """<|im_start|>system
+You are a helpful assistant named "粤小智". Answer the user's question in Chinese based on the provided "Context".
+If the context is not sufficient, just say: "抱歉，关于您的问题，我的知识库暂时没有相关信息。". Do not make up answers.
+Your answer should be clear and step-by-step if it's an operation guide.<|im_end|>
 <|im_start|>user
 Context:
 {context}
 {question}<|im_end|>
 <|im_start|>assistant
 """
     PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(search_kwargs={"k": 3}), chain_type_kwargs={"prompt": PROMPT})
+    print("✅ RAG管道加载完毕！")
     return qa_chain
+# --- 3. Gradio应用逻辑 (修改以适配gr.Blocks) ---
 RAG_CHAIN = load_rag_chain()
+# history是Gradio自动管理的，格式为[ [user_msg1, bot_msg1], [user_msg2, bot_msg2], ... ]
+def user(user_message, history):
+    # 将用户消息添加到聊天记录中，并返回一个空的输入框
+    return "", history + [[user_message, None]]
+def bot(history):
+    # 获取最后一条用户消息
+    user_message = history[-1][0]
+    print(f"收到用户消息: '{user_message}'")
+    # 调用RAG链获取回答
+    result = RAG_CHAIN.invoke({"query": user_message})
+    bot_message = result.get('result', "处理出错").strip()
+    # 我们模拟打字效果，让体验更好
+    history[-1][1] = ""
+    for character in bot_message:
+        history[-1][1] += character
+        time.sleep(0.02) # 每个字之间暂停0.02秒
+        yield history
+    print(f"模型生成回答: '{history[-1][1]}'")
+# --- 4. 搭建并启动界面 (使用gr.Blocks手动搭建) ---
+with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
     gr.Markdown("# 粤政云服务智能向导 - 我是粤小智 🤖")
+    chatbot = gr.Chatbot(
+        [],
+        elem_id="chatbot",
+        label="聊天窗口",
+        bubble_full_width=True,
+        height=600
+    )
+    with gr.Row():
+        txt = gr.Textbox(
+            scale=4,
+            show_label=False,
+            placeholder="在这里输入您的问题，然后按回车键...",
+            container=False,
+        )
+    # 定义回车或点击按钮后的事件流
+    txt.submit(user, [txt, chatbot], [txt, chatbot], queue=False).then(
+        bot, chatbot, chatbot
     )
+# 使用最简单的启动方式，但加入queue()来处理打字效果
+demo.queue()
 demo.launch()