Spaces:

mkschulz9
/

personal-chatbot

Sleeping

App Files Files Community

mkschulz9 commited on Oct 4

Commit

674e8c0

1 Parent(s): c19573a

bugfix: bugfixes for LLM streaming

Browse files

Files changed (2) hide show

app.py +205 -92
utils/chatLogger.py +59 -34

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import json
 import base64
 import uuid
 import logging
-from typing import Generator
 from sentence_transformers import SentenceTransformer
 from openai import OpenAI
 import gradio as gr
 from dotenv import load_dotenv
 from utils.utils import (
     get_keys_chunks,
     get_docs,
@@ -17,53 +17,92 @@ from utils.utils import (
     get_messages,
     load_knowledge_base,
 )
 from utils.chatLogger import ChatUploader
-def initialize():
-    """
-    Initializes embedding model, encodes document chunks, loads environment variables, and initializes clients.
-    """
-    logging.basicConfig(level=logging.INFO)
-    logger = logging.getLogger(__name__)
     logger.info("Initializing application...")
-    logger.info("Loading sentence embedding model...")
     embedding_model_path = "ibm-granite/granite-embedding-125m-english"
     embedding_model = SentenceTransformer(embedding_model_path)
-    logger.info("Loading and encoding document chunks...")
     knowledge_base = load_knowledge_base()
-    keys, chunks = zip(*get_keys_chunks(knowledge_base))
-    chunks_encoded = embedding_model.encode(chunks)
-    keys_chunksEncoded = list(zip(keys, chunks_encoded))
-    logger.info("Loading env variables...")
-    if not os.getenv("SPACE_ID"):
-        load_dotenv()
-    logger.info("Initializing OpenAI client...")
-    openAI_client = OpenAI(
         base_url="https://api.inference.net/v1",
-        api_key=os.getenv("INFERENCE_API_KEY"),
     )
-    logger.info("Loading Drive service account details...")
-    drive_creds_encoded = os.getenv(
-        "GOOGLE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS_BASE64"
-    ).strip()
-    service_account_json = json.loads(base64.b64decode(drive_creds_encoded).decode())
-    logger.info("Initializing ChatUploader instance...")
-    chat_uploader = ChatUploader(service_account_json)
-    logger.info("Ready for user query...")
     return (
         embedding_model,
-        keys_chunksEncoded,
         knowledge_base,
-        openAI_client,
         logger,
         chat_uploader,
     )
@@ -79,102 +118,175 @@ def initialize():
 ) = initialize()
 def rag_chatbot(
     user_message: str,
-    chat_history: list,
     browser_id: str,
-) -> Generator[list, None, None]:
     """
-    Retrieves relevant documents to user query and streams LLM response catching errors along the way.
     """
     try:
-        logger.info("Trying to encode user query and retrieve related docs...")
-        user_query_encoded = embedding_model.encode(user_message)
         top_chunk_keys = get_top_chunk_keys(
             user_query_encoded, keys_chunksEncoded, top_n=5
         )
         docs = get_docs(top_chunk_keys, knowledge_base)
     except Exception as e:
-        logger.exception(f"Error during document retrieval: {str(e)}")
         yield [
             {
                 "role": "assistant",
-                "content": f"⚠️ An error occurred during document retrieval. Please try again later.",
             }
         ]
         return
     try:
-        logger.info("Trying to call openAI chat API...")
         messages = get_messages(docs, user_message, chat_history)
-        chatCompletion_response = openAI_client.chat.completions.create(
             model="mistralai/mistral-nemo-12b-instruct/fp-8",
             messages=messages,
             stream=True,
         )
     except Exception as e:
-        logger.exception(f"Error during call to OpenAI Chat API: {str(e)}")
         yield [
             {
                 "role": "assistant",
-                "content": f"⚠️ An error occurred during client API call. Please try again later.",
             }
         ]
         return
     try:
-        logger.info("Trying to parse LLM response...")
-        llm_thinking = False
-        buffer = ""
-        chat_history.append({"role": "user", "content": user_message})
-        chat_history.append({"role": "assistant", "content": ""})
-        for chunk in chatCompletion_response:
-            chunk_content = chunk.choices[0].delta.content
-            if not chunk_content:
                 continue
-            if chunk_content == "<think>":
-                llm_thinking = True
-                yield [{"role": "assistant", "content": "Thinking..."}]
                 continue
-            if llm_thinking and chunk_content == "</think>":
-                llm_thinking = False
-                yield [{"role": "assistant", "content": "Finished thinking."}]
                 continue
-            if not llm_thinking:
-                buffer += chunk_content
-                if len(buffer) > 20 or "\n" in buffer:
-                    chat_history[-1]["content"] += buffer
-                    yield [chat_history[-1]]
-                    buffer = ""
         if buffer:
-            chat_history[-1]["content"] += buffer
-            yield [chat_history[-1]]
     except Exception as e:
-        logger.exception(f"Error during LLM response streaming: {str(e)}")
-        yield [
-            {
-                "role": "assistant",
-                "content": f"⚠️ An error occurred during LLM response streaming. Please try again later.",
-            }
-        ]
     try:
-        logger.info("Trying to upload chat history to Drive...")
-        chat_uploader.upload_chat_history(chat_history, browser_id)
     except Exception as e:
-        logger.warning(f"Warning: error during Google Drive upload: {e}")
-    logger.info("Returning chat history...")
-    return chat_history
-# Gradio app code
 with gr.Blocks() as demo:
     browser_id_state = gr.BrowserState(default_value=None)
@@ -182,7 +294,9 @@ with gr.Blocks() as demo:
     def load_browser_id(current_id):
         if current_id is None or current_id == "":
             new_id = str(uuid.uuid4())
             return new_id
         return current_id
     gr.ChatInterface(
@@ -191,20 +305,19 @@ with gr.Blocks() as demo:
         additional_inputs=browser_id_state,
         type="messages",
         examples=[
-            ["What is Matthew's educational background?"],
-            ["What machine learning projects has Matthew worked on?"],
-            ["What experience does Matthew have in software engineering?"],
-            ["Why did Matthew choose to pursue a degree in computer science?"],
-            ["Does Matthew have any leadership experience?"],
-            ["Has Matthew completed any Summer internships?"],
-            ["Tell me about some real-world projects Matthew has worked on."],
-            ["What is Matthew's greatest strength and weakness?"],
         ],
         save_history=True,
         run_examples_on_click=False,
         cache_examples=False,
     )
 if __name__ == "__main__":
     demo.launch()

 import base64
 import uuid
 import logging
+from typing import Generator, List, Dict, Tuple, Optional
 from sentence_transformers import SentenceTransformer
 from openai import OpenAI
 import gradio as gr
 from dotenv import load_dotenv
 from utils.utils import (
     get_keys_chunks,
     get_docs,
     get_messages,
     load_knowledge_base,
 )
 from utils.chatLogger import ChatUploader
+# --------------- Logging ---------------
+def _setup_logging() -> logging.Logger:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(levelname)s:%(name)s:%(message)s",
+    )
+    return logging.getLogger(__name__)
+# --------------- Initialization ---------------
+def _require_env(var: str) -> str:
+    val = os.getenv(var)
+    if not val:
+        raise RuntimeError(f"Missing required environment variable: {var}")
+    return val
+def initialize() -> Tuple[
+    SentenceTransformer,
+    List[Tuple[str, "numpy.ndarray"]],
+    Dict,
+    OpenAI,
+    logging.Logger,
+    Optional[ChatUploader],
+]:
+    logger = _setup_logging()
     logger.info("Initializing application...")
+    load_dotenv(override=False)
+    logger.info(".env loaded (override=False)")
     embedding_model_path = "ibm-granite/granite-embedding-125m-english"
     embedding_model = SentenceTransformer(embedding_model_path)
+    logger.info("Embedding model loaded: %s", embedding_model_path)
     knowledge_base = load_knowledge_base()
+    logger.info("Knowledge base loaded")
+    pairs = list(get_keys_chunks(knowledge_base))
+    if not pairs:
+        raise RuntimeError("Knowledge base is empty – no chunks to encode.")
+    keys, chunks = zip(*pairs)
+    logger.info("KB chunks extracted: %d", len(chunks))
+    chunks_encoded = embedding_model.encode(
+        list(chunks),
+        batch_size=64,
+        convert_to_numpy=True,
+        show_progress_bar=False,
+    )
+    keys_chunks_encoded = list(zip(keys, chunks_encoded))
+    logger.info("KB chunks encoded: %d", len(keys_chunks_encoded))
+    inference_api_key = _require_env("INFERENCE_API_KEY")
+    openai_client = OpenAI(
         base_url="https://api.inference.net/v1",
+        api_key=inference_api_key,
     )
+    logger.info("OpenAI client initialized (base_url=api.inference.net)")
+    chat_uploader: Optional[ChatUploader] = None
+    drive_creds_b64 = os.getenv("GOOGLE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS_BASE64")
+    if drive_creds_b64:
+        try:
+            service_account_json = json.loads(
+                base64.b64decode(drive_creds_b64).decode()
+            )
+            chat_uploader = ChatUploader(service_account_json)
+            logger.info("Google Drive uploader configured")
+        except Exception as e:
+            logger.warning(
+                "Google Drive uploader not configured (error parsing creds): %s", e
+            )
+            chat_uploader = None
+    else:
+        logger.info("Google Drive uploader not configured (no creds env var)")
+    logger.info("Initialization complete")
     return (
         embedding_model,
+        keys_chunks_encoded,
         knowledge_base,
+        openai_client,
         logger,
         chat_uploader,
     )
 ) = initialize()
+# --------------- Helpers ---------------
+def _strip_think_tags(text: str) -> str:
+    return text.replace("<think>", "").replace("</think>", "")
+def _to_minimal(history: List[Dict[str, str]]) -> List[Dict[str, str]]:
+    """Keep only role/content keys to avoid metadata/options noise in uploads."""
+    minimal: List[Dict[str, str]] = []
+    for m in history:
+        role = m.get("role")
+        content = m.get("content", "")
+        if role is None:
+            # ignore malformed entries
+            continue
+        minimal.append({"role": role, "content": content})
+    return minimal
+# --------------- RAG Chatbot ---------------
 def rag_chatbot(
     user_message: str,
+    chat_history: List[Dict[str, str]],
     browser_id: str,
+) -> Generator[List[Dict[str, str]], None, None]:
     """
+    Stream assistant output as a single growing message dict.
+    Do NOT mutate chat_history; Gradio manages it for type="messages".
     """
+    # RAG retrieval
     try:
+        logger.info("RAG: encoding query & retrieving docs")
+        user_query_encoded = embedding_model.encode(
+            [user_message], convert_to_numpy=True
+        )[0]
         top_chunk_keys = get_top_chunk_keys(
             user_query_encoded, keys_chunksEncoded, top_n=5
         )
         docs = get_docs(top_chunk_keys, knowledge_base)
+        logger.info("RAG: docs retrieved=%d (top_n=5)", len(docs))
     except Exception as e:
+        logger.error("RAG: retrieval failed: %s", e)
         yield [
             {
                 "role": "assistant",
+                "content": "⚠️ An error occurred during document retrieval. Please try again later.",
             }
         ]
         return
+    # LLM stream
     try:
+        logger.info(
+            "LLM: opening streaming completion (model=mistralai/mistral-nemo-12b-instruct/fp-8)"
+        )
         messages = get_messages(docs, user_message, chat_history)
+        chat_stream = openAI_client.chat.completions.create(
             model="mistralai/mistral-nemo-12b-instruct/fp-8",
             messages=messages,
             stream=True,
         )
+        logger.info("LLM: stream opened")
     except Exception as e:
+        logger.error("LLM: API call failed: %s", e)
         yield [
             {
                 "role": "assistant",
+                "content": "⚠️ An error occurred during client API call. Please try again later.",
             }
         ]
         return
+    # Stream parse → yield a single growing assistant message
+    assistant_msg = {"role": "assistant", "content": ""}
     try:
+        logger.info("LLM: streaming started")
+        buffer = ""
+        chunks_seen = 0
+        content_events = 0
+        chars_emitted = 0
+        for chunk in chat_stream:
+            chunks_seen += 1
+            choices = getattr(chunk, "choices", None)
+            if not choices:
                 continue
+            delta = getattr(choices[0], "delta", None)
+            if not delta:
+                continue
+            piece = getattr(delta, "content", None)
+            if piece is None:
                 continue
+            piece = _strip_think_tags(piece)
+            if not piece:
                 continue
+            content_events += 1
+            buffer += piece
+            if len(buffer) >= 24 or "\n" in buffer:
+                assistant_msg["content"] += buffer
+                chars_emitted += len(buffer)
+                yield [assistant_msg]  # append/update single assistant bubble
+                buffer = ""
         if buffer:
+            assistant_msg["content"] += buffer
+            chars_emitted += len(buffer)
+            yield [assistant_msg]
+        logger.info(
+            "LLM: streaming finished (chunks=%d, content_events=%d, chars=%d)",
+            chunks_seen,
+            content_events,
+            chars_emitted,
+        )
     except Exception as e:
+        logger.error("LLM: streaming failed: %s", e)
+        if assistant_msg["content"]:
+            assistant_msg[
+                "content"
+            ] += "\n\n⚠️ An error occurred during LLM response streaming. Please try again later."
+            yield [assistant_msg]
+        else:
+            yield [
+                {
+                    "role": "assistant",
+                    "content": "⚠️ An error occurred during LLM response streaming. Please try again later.",
+                }
+            ]
+        return
+    # --- Upload transcript (optional) — reconstruct current turn explicitly
     try:
+        if chat_uploader is not None:
+            # Gradio passes prior turns in `chat_history`. Build latest full transcript.
+            prior = _to_minimal(chat_history)
+            current_user = {"role": "user", "content": user_message}
+            final_history = prior + [
+                current_user,
+                {"role": "assistant", "content": assistant_msg["content"]},
+            ]
+            # Ensure we have a usable browser_id for the filename
+            if not browser_id:
+                browser_id = str(uuid.uuid4())
+            drive_filename = f"chat__{browser_id}.json"
+            logger.info(
+                "Upload: writing Drive file '%s' (messages=%d, mode=overwrite)",
+                drive_filename,
+                len(final_history),
+            )
+            chat_uploader.upload_chat_history(
+                final_history,
+                browser_id,
+                filename=drive_filename,
+                mode="overwrite",  # <-- overwrite-by-name semantics
+            )
+            logger.info("Upload: completed")
+        else:
+            logger.info("Upload: skipped (uploader not configured)")
     except Exception as e:
+        logger.warning("Upload: failed (non-fatal): %s", e)
+# --------------- Gradio app ---------------
 with gr.Blocks() as demo:
     browser_id_state = gr.BrowserState(default_value=None)
     def load_browser_id(current_id):
         if current_id is None or current_id == "":
             new_id = str(uuid.uuid4())
+            logger.info("Browser ID created: %s", new_id)
             return new_id
+        logger.info("Browser ID reused: %s", current_id)
         return current_id
     gr.ChatInterface(
         additional_inputs=browser_id_state,
         type="messages",
         examples=[
+            ["What is Matthew's educational background?", None],
+            ["What machine learning projects has Matthew worked on?", None],
+            ["What experience does Matthew have in software engineering?", None],
+            ["Why did Matthew choose to pursue a degree in computer science?", None],
+            ["Does Matthew have any leadership experience?", None],
+            ["Has Matthew completed any Summer internships?", None],
+            ["Tell me about some real-world projects Matthew has worked on?", None],
+            ["What is Matthew's greatest strength and weakness?", None],
         ],
         save_history=True,
         run_examples_on_click=False,
         cache_examples=False,
     )
 if __name__ == "__main__":
     demo.launch()

utils/chatLogger.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import io
 import json
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
 from google.oauth2 import service_account
@@ -13,16 +15,21 @@ class ChatUploader:
     ):
         """
         Initializes a new chat uploader instance using a service account JSON dict.
         """
         credentials = service_account.Credentials.from_service_account_info(
-            service_account_json, scopes=["https://www.googleapis.com/auth/drive"]
         )
-        self.drive_service = build("drive", "v3", credentials=credentials)
         self.root_folder_id = root_folder_id
     def _get_or_create_browser_folder(self, browser_id: str) -> str:
         """
-        Searches for an existing folder for the given browser_id. If not found, creates a folder named 'browser_{browser_id}' and returns its ID.
         """
         folder_name = f"browser_{browser_id}"
         query = (
@@ -34,35 +41,51 @@ class ChatUploader:
         if folders:
             return folders[0]["id"]
-        else:
-            metadata = {
-                "name": folder_name,
-                "mimeType": "application/vnd.google-apps.folder",
-                "parents": [self.root_folder_id],
-            }
-            folder = (
-                self.drive_service.files().create(body=metadata, fields="id").execute()
-            )
-            return folder["id"]
-    def upload_chat_history(
-        self, chat_history: list, browser_id: str, filename: str = "chat_log.json"
-    ) -> None:
         """
-        Uploads the chat log file inside the browser's folder. If the folder and/or file exists, it appends the new chat entries to the current log. Otherwise, it creates them.
         """
-        folder_id = self._get_or_create_browser_folder(browser_id)
         query = (
-            f"name = '{filename}' and '{folder_id}' in parents and "
             "mimeType = 'application/json' and trashed = false"
         )
         results = self.drive_service.files().list(q=query, fields="files(id)").execute()
         files = results.get("files", [])
-        if files:
-            file_id = files[0]["id"]
             request = self.drive_service.files().get_media(fileId=file_id)
             existing_stream = io.BytesIO()
             downloader = MediaIoBaseDownload(existing_stream, request)
@@ -72,23 +95,25 @@ class ChatUploader:
             existing_stream.seek(0)
             try:
-                existing_chat_history = json.loads(existing_stream.read())
             except json.JSONDecodeError:
-                existing_chat_history = []
-            updated_chat_history = existing_chat_history + chat_history
-            content = json.dumps(updated_chat_history, indent=2)
-            media = MediaIoBaseUpload(
-                io.BytesIO(content.encode()), mimetype="application/json"
-            )
             self.drive_service.files().update(
                 fileId=file_id, media_body=media
             ).execute()
         else:
-            content = json.dumps(chat_history, indent=2)
-            media = MediaIoBaseUpload(
-                io.BytesIO(content.encode()), mimetype="application/json"
-            )
-            metadata = {"name": filename, "parents": [folder_id]}
             self.drive_service.files().create(body=metadata, media_body=media).execute()

 import io
 import json
+from typing import List, Dict, Literal, Optional
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
 from google.oauth2 import service_account
     ):
         """
         Initializes a new chat uploader instance using a service account JSON dict.
+        By default writes into a fixed root folder.
         """
         credentials = service_account.Credentials.from_service_account_info(
+            service_account_json,
+            scopes=["https://www.googleapis.com/auth/drive"],
+        )
+        # cache_discovery=False avoids deprecation noise
+        self.drive_service = build(
+            "drive", "v3", credentials=credentials, cache_discovery=False
         )
         self.root_folder_id = root_folder_id
     def _get_or_create_browser_folder(self, browser_id: str) -> str:
         """
+        Ensure a per-browser folder 'browser_{browser_id}' exists; return its file ID.
         """
         folder_name = f"browser_{browser_id}"
         query = (
         if folders:
             return folders[0]["id"]
+        metadata = {
+            "name": folder_name,
+            "mimeType": "application/vnd.google-apps.folder",
+            "parents": [self.root_folder_id],
+        }
+        folder = self.drive_service.files().create(body=metadata, fields="id").execute()
+        return folder["id"]
+    def _find_file(self, name: str, parent_id: str) -> Optional[str]:
         """
+        Return file ID for a JSON file with given name in parent, else None.
         """
         query = (
+            f"name = '{name}' and '{parent_id}' in parents and "
             "mimeType = 'application/json' and trashed = false"
         )
         results = self.drive_service.files().list(q=query, fields="files(id)").execute()
         files = results.get("files", [])
+        return files[0]["id"] if files else None
+    def upload_chat_history(
+        self,
+        chat_history: List[Dict[str, str]],
+        browser_id: str,
+        filename: str = "chat_log.json",
+        mode: Literal["overwrite", "append"] = "overwrite",
+    ) -> None:
+        """
+        Write the chat log inside the browser's folder.
+        - overwrite (default): REPLACE file contents with the provided chat_history
+          (this is what you want to keep Drive in sync with the UI)
+        - append: read existing JSON array and extend it with chat_history
+        chat_history is expected to be the *complete* transcript you want stored
+        (for overwrite), already normalized to [{role, content}, ...].
+        """
+        folder_id = self._get_or_create_browser_folder(browser_id)
+        file_id = self._find_file(filename, folder_id)
+        payload: List[Dict[str, str]] = chat_history
+        if mode == "append" and file_id:
+            # Load existing file and extend
             request = self.drive_service.files().get_media(fileId=file_id)
             existing_stream = io.BytesIO()
             downloader = MediaIoBaseDownload(existing_stream, request)
             existing_stream.seek(0)
             try:
+                existing_chat = json.loads(existing_stream.read())
+                if isinstance(existing_chat, list):
+                    payload = existing_chat + chat_history
             except json.JSONDecodeError:
+                # Fall back to current chat_history only
+                payload = chat_history
+        content = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8")
+        media = MediaIoBaseUpload(io.BytesIO(content), mimetype="application/json")
+        if file_id:
+            # REPLACE contents
             self.drive_service.files().update(
                 fileId=file_id, media_body=media
             ).execute()
         else:
+            metadata = {
+                "name": filename,
+                "parents": [folder_id],
+                "mimeType": "application/json",
+            }
             self.drive_service.files().create(body=metadata, media_body=media).execute()