mkschulz9 commited on
Commit
674e8c0
·
1 Parent(s): c19573a

bugfix: bugfixes for LLM streaming

Browse files
Files changed (2) hide show
  1. app.py +205 -92
  2. utils/chatLogger.py +59 -34
app.py CHANGED
@@ -3,13 +3,13 @@ import json
3
  import base64
4
  import uuid
5
  import logging
 
6
 
7
- from typing import Generator
8
  from sentence_transformers import SentenceTransformer
9
  from openai import OpenAI
10
  import gradio as gr
11
-
12
  from dotenv import load_dotenv
 
13
  from utils.utils import (
14
  get_keys_chunks,
15
  get_docs,
@@ -17,53 +17,92 @@ from utils.utils import (
17
  get_messages,
18
  load_knowledge_base,
19
  )
20
-
21
  from utils.chatLogger import ChatUploader
22
 
23
 
24
- def initialize():
25
- """
26
- Initializes embedding model, encodes document chunks, loads environment variables, and initializes clients.
27
- """
28
- logging.basicConfig(level=logging.INFO)
29
- logger = logging.getLogger(__name__)
30
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  logger.info("Initializing application...")
32
- logger.info("Loading sentence embedding model...")
 
 
 
33
  embedding_model_path = "ibm-granite/granite-embedding-125m-english"
34
  embedding_model = SentenceTransformer(embedding_model_path)
 
35
 
36
- logger.info("Loading and encoding document chunks...")
37
  knowledge_base = load_knowledge_base()
38
- keys, chunks = zip(*get_keys_chunks(knowledge_base))
39
- chunks_encoded = embedding_model.encode(chunks)
40
- keys_chunksEncoded = list(zip(keys, chunks_encoded))
41
-
42
- logger.info("Loading env variables...")
43
- if not os.getenv("SPACE_ID"):
44
- load_dotenv()
 
 
 
 
 
 
 
 
 
45
 
46
- logger.info("Initializing OpenAI client...")
47
- openAI_client = OpenAI(
48
  base_url="https://api.inference.net/v1",
49
- api_key=os.getenv("INFERENCE_API_KEY"),
50
  )
51
-
52
- logger.info("Loading Drive service account details...")
53
- drive_creds_encoded = os.getenv(
54
- "GOOGLE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS_BASE64"
55
- ).strip()
56
- service_account_json = json.loads(base64.b64decode(drive_creds_encoded).decode())
57
-
58
- logger.info("Initializing ChatUploader instance...")
59
- chat_uploader = ChatUploader(service_account_json)
60
-
61
- logger.info("Ready for user query...")
 
 
 
 
 
 
 
 
 
62
  return (
63
  embedding_model,
64
- keys_chunksEncoded,
65
  knowledge_base,
66
- openAI_client,
67
  logger,
68
  chat_uploader,
69
  )
@@ -79,102 +118,175 @@ def initialize():
79
  ) = initialize()
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def rag_chatbot(
83
  user_message: str,
84
- chat_history: list,
85
  browser_id: str,
86
- ) -> Generator[list, None, None]:
87
  """
88
- Retrieves relevant documents to user query and streams LLM response catching errors along the way.
 
89
  """
 
90
  try:
91
- logger.info("Trying to encode user query and retrieve related docs...")
92
- user_query_encoded = embedding_model.encode(user_message)
 
 
93
  top_chunk_keys = get_top_chunk_keys(
94
  user_query_encoded, keys_chunksEncoded, top_n=5
95
  )
96
  docs = get_docs(top_chunk_keys, knowledge_base)
 
97
  except Exception as e:
98
- logger.exception(f"Error during document retrieval: {str(e)}")
99
  yield [
100
  {
101
  "role": "assistant",
102
- "content": f"⚠️ An error occurred during document retrieval. Please try again later.",
103
  }
104
  ]
105
  return
106
 
 
107
  try:
108
- logger.info("Trying to call openAI chat API...")
 
 
109
  messages = get_messages(docs, user_message, chat_history)
110
- chatCompletion_response = openAI_client.chat.completions.create(
111
  model="mistralai/mistral-nemo-12b-instruct/fp-8",
112
  messages=messages,
113
  stream=True,
114
  )
 
115
  except Exception as e:
116
- logger.exception(f"Error during call to OpenAI Chat API: {str(e)}")
117
  yield [
118
  {
119
  "role": "assistant",
120
- "content": f"⚠️ An error occurred during client API call. Please try again later.",
121
  }
122
  ]
123
  return
124
 
 
 
125
  try:
126
- logger.info("Trying to parse LLM response...")
127
- llm_thinking = False
128
- buffer = ""
129
- chat_history.append({"role": "user", "content": user_message})
130
- chat_history.append({"role": "assistant", "content": ""})
131
 
132
- for chunk in chatCompletion_response:
133
- chunk_content = chunk.choices[0].delta.content
134
- if not chunk_content:
 
 
 
 
 
 
135
  continue
136
-
137
- if chunk_content == "<think>":
138
- llm_thinking = True
139
- yield [{"role": "assistant", "content": "Thinking..."}]
 
140
  continue
141
 
142
- if llm_thinking and chunk_content == "</think>":
143
- llm_thinking = False
144
- yield [{"role": "assistant", "content": "Finished thinking."}]
145
  continue
146
 
147
- if not llm_thinking:
148
- buffer += chunk_content
149
 
150
- if len(buffer) > 20 or "\n" in buffer:
151
- chat_history[-1]["content"] += buffer
152
- yield [chat_history[-1]]
153
- buffer = ""
 
154
 
155
  if buffer:
156
- chat_history[-1]["content"] += buffer
157
- yield [chat_history[-1]]
 
 
 
 
 
 
 
 
 
158
  except Exception as e:
159
- logger.exception(f"Error during LLM response streaming: {str(e)}")
160
- yield [
161
- {
162
- "role": "assistant",
163
- "content": f"⚠️ An error occurred during LLM response streaming. Please try again later.",
164
- }
165
- ]
 
 
 
 
 
 
 
166
 
 
167
  try:
168
- logger.info("Trying to upload chat history to Drive...")
169
- chat_uploader.upload_chat_history(chat_history, browser_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  except Exception as e:
171
- logger.warning(f"Warning: error during Google Drive upload: {e}")
172
 
173
- logger.info("Returning chat history...")
174
- return chat_history
175
 
176
-
177
- # Gradio app code
178
  with gr.Blocks() as demo:
179
  browser_id_state = gr.BrowserState(default_value=None)
180
 
@@ -182,7 +294,9 @@ with gr.Blocks() as demo:
182
  def load_browser_id(current_id):
183
  if current_id is None or current_id == "":
184
  new_id = str(uuid.uuid4())
 
185
  return new_id
 
186
  return current_id
187
 
188
  gr.ChatInterface(
@@ -191,20 +305,19 @@ with gr.Blocks() as demo:
191
  additional_inputs=browser_id_state,
192
  type="messages",
193
  examples=[
194
- ["What is Matthew's educational background?"],
195
- ["What machine learning projects has Matthew worked on?"],
196
- ["What experience does Matthew have in software engineering?"],
197
- ["Why did Matthew choose to pursue a degree in computer science?"],
198
- ["Does Matthew have any leadership experience?"],
199
- ["Has Matthew completed any Summer internships?"],
200
- ["Tell me about some real-world projects Matthew has worked on."],
201
- ["What is Matthew's greatest strength and weakness?"],
202
  ],
203
  save_history=True,
204
  run_examples_on_click=False,
205
  cache_examples=False,
206
  )
207
 
208
-
209
  if __name__ == "__main__":
210
  demo.launch()
 
3
  import base64
4
  import uuid
5
  import logging
6
+ from typing import Generator, List, Dict, Tuple, Optional
7
 
 
8
  from sentence_transformers import SentenceTransformer
9
  from openai import OpenAI
10
  import gradio as gr
 
11
  from dotenv import load_dotenv
12
+
13
  from utils.utils import (
14
  get_keys_chunks,
15
  get_docs,
 
17
  get_messages,
18
  load_knowledge_base,
19
  )
 
20
  from utils.chatLogger import ChatUploader
21
 
22
 
23
+ # --------------- Logging ---------------
24
+ def _setup_logging() -> logging.Logger:
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format="%(levelname)s:%(name)s:%(message)s",
28
+ )
29
+ return logging.getLogger(__name__)
30
+
31
+
32
+ # --------------- Initialization ---------------
33
+ def _require_env(var: str) -> str:
34
+ val = os.getenv(var)
35
+ if not val:
36
+ raise RuntimeError(f"Missing required environment variable: {var}")
37
+ return val
38
+
39
+
40
+ def initialize() -> Tuple[
41
+ SentenceTransformer,
42
+ List[Tuple[str, "numpy.ndarray"]],
43
+ Dict,
44
+ OpenAI,
45
+ logging.Logger,
46
+ Optional[ChatUploader],
47
+ ]:
48
+ logger = _setup_logging()
49
  logger.info("Initializing application...")
50
+
51
+ load_dotenv(override=False)
52
+ logger.info(".env loaded (override=False)")
53
+
54
  embedding_model_path = "ibm-granite/granite-embedding-125m-english"
55
  embedding_model = SentenceTransformer(embedding_model_path)
56
+ logger.info("Embedding model loaded: %s", embedding_model_path)
57
 
 
58
  knowledge_base = load_knowledge_base()
59
+ logger.info("Knowledge base loaded")
60
+
61
+ pairs = list(get_keys_chunks(knowledge_base))
62
+ if not pairs:
63
+ raise RuntimeError("Knowledge base is empty – no chunks to encode.")
64
+ keys, chunks = zip(*pairs)
65
+ logger.info("KB chunks extracted: %d", len(chunks))
66
+
67
+ chunks_encoded = embedding_model.encode(
68
+ list(chunks),
69
+ batch_size=64,
70
+ convert_to_numpy=True,
71
+ show_progress_bar=False,
72
+ )
73
+ keys_chunks_encoded = list(zip(keys, chunks_encoded))
74
+ logger.info("KB chunks encoded: %d", len(keys_chunks_encoded))
75
 
76
+ inference_api_key = _require_env("INFERENCE_API_KEY")
77
+ openai_client = OpenAI(
78
  base_url="https://api.inference.net/v1",
79
+ api_key=inference_api_key,
80
  )
81
+ logger.info("OpenAI client initialized (base_url=api.inference.net)")
82
+
83
+ chat_uploader: Optional[ChatUploader] = None
84
+ drive_creds_b64 = os.getenv("GOOGLE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS_BASE64")
85
+ if drive_creds_b64:
86
+ try:
87
+ service_account_json = json.loads(
88
+ base64.b64decode(drive_creds_b64).decode()
89
+ )
90
+ chat_uploader = ChatUploader(service_account_json)
91
+ logger.info("Google Drive uploader configured")
92
+ except Exception as e:
93
+ logger.warning(
94
+ "Google Drive uploader not configured (error parsing creds): %s", e
95
+ )
96
+ chat_uploader = None
97
+ else:
98
+ logger.info("Google Drive uploader not configured (no creds env var)")
99
+
100
+ logger.info("Initialization complete")
101
  return (
102
  embedding_model,
103
+ keys_chunks_encoded,
104
  knowledge_base,
105
+ openai_client,
106
  logger,
107
  chat_uploader,
108
  )
 
118
  ) = initialize()
119
 
120
 
121
+ # --------------- Helpers ---------------
122
+ def _strip_think_tags(text: str) -> str:
123
+ return text.replace("<think>", "").replace("</think>", "")
124
+
125
+
126
+ def _to_minimal(history: List[Dict[str, str]]) -> List[Dict[str, str]]:
127
+ """Keep only role/content keys to avoid metadata/options noise in uploads."""
128
+ minimal: List[Dict[str, str]] = []
129
+ for m in history:
130
+ role = m.get("role")
131
+ content = m.get("content", "")
132
+ if role is None:
133
+ # ignore malformed entries
134
+ continue
135
+ minimal.append({"role": role, "content": content})
136
+ return minimal
137
+
138
+
139
+ # --------------- RAG Chatbot ---------------
140
  def rag_chatbot(
141
  user_message: str,
142
+ chat_history: List[Dict[str, str]],
143
  browser_id: str,
144
+ ) -> Generator[List[Dict[str, str]], None, None]:
145
  """
146
+ Stream assistant output as a single growing message dict.
147
+ Do NOT mutate chat_history; Gradio manages it for type="messages".
148
  """
149
+ # RAG retrieval
150
  try:
151
+ logger.info("RAG: encoding query & retrieving docs")
152
+ user_query_encoded = embedding_model.encode(
153
+ [user_message], convert_to_numpy=True
154
+ )[0]
155
  top_chunk_keys = get_top_chunk_keys(
156
  user_query_encoded, keys_chunksEncoded, top_n=5
157
  )
158
  docs = get_docs(top_chunk_keys, knowledge_base)
159
+ logger.info("RAG: docs retrieved=%d (top_n=5)", len(docs))
160
  except Exception as e:
161
+ logger.error("RAG: retrieval failed: %s", e)
162
  yield [
163
  {
164
  "role": "assistant",
165
+ "content": "⚠️ An error occurred during document retrieval. Please try again later.",
166
  }
167
  ]
168
  return
169
 
170
+ # LLM stream
171
  try:
172
+ logger.info(
173
+ "LLM: opening streaming completion (model=mistralai/mistral-nemo-12b-instruct/fp-8)"
174
+ )
175
  messages = get_messages(docs, user_message, chat_history)
176
+ chat_stream = openAI_client.chat.completions.create(
177
  model="mistralai/mistral-nemo-12b-instruct/fp-8",
178
  messages=messages,
179
  stream=True,
180
  )
181
+ logger.info("LLM: stream opened")
182
  except Exception as e:
183
+ logger.error("LLM: API call failed: %s", e)
184
  yield [
185
  {
186
  "role": "assistant",
187
+ "content": "⚠️ An error occurred during client API call. Please try again later.",
188
  }
189
  ]
190
  return
191
 
192
+ # Stream parse → yield a single growing assistant message
193
+ assistant_msg = {"role": "assistant", "content": ""}
194
  try:
195
+ logger.info("LLM: streaming started")
 
 
 
 
196
 
197
+ buffer = ""
198
+ chunks_seen = 0
199
+ content_events = 0
200
+ chars_emitted = 0
201
+
202
+ for chunk in chat_stream:
203
+ chunks_seen += 1
204
+ choices = getattr(chunk, "choices", None)
205
+ if not choices:
206
  continue
207
+ delta = getattr(choices[0], "delta", None)
208
+ if not delta:
209
+ continue
210
+ piece = getattr(delta, "content", None)
211
+ if piece is None:
212
  continue
213
 
214
+ piece = _strip_think_tags(piece)
215
+ if not piece:
 
216
  continue
217
 
218
+ content_events += 1
219
+ buffer += piece
220
 
221
+ if len(buffer) >= 24 or "\n" in buffer:
222
+ assistant_msg["content"] += buffer
223
+ chars_emitted += len(buffer)
224
+ yield [assistant_msg] # append/update single assistant bubble
225
+ buffer = ""
226
 
227
  if buffer:
228
+ assistant_msg["content"] += buffer
229
+ chars_emitted += len(buffer)
230
+ yield [assistant_msg]
231
+
232
+ logger.info(
233
+ "LLM: streaming finished (chunks=%d, content_events=%d, chars=%d)",
234
+ chunks_seen,
235
+ content_events,
236
+ chars_emitted,
237
+ )
238
+
239
  except Exception as e:
240
+ logger.error("LLM: streaming failed: %s", e)
241
+ if assistant_msg["content"]:
242
+ assistant_msg[
243
+ "content"
244
+ ] += "\n\n⚠️ An error occurred during LLM response streaming. Please try again later."
245
+ yield [assistant_msg]
246
+ else:
247
+ yield [
248
+ {
249
+ "role": "assistant",
250
+ "content": "⚠️ An error occurred during LLM response streaming. Please try again later.",
251
+ }
252
+ ]
253
+ return
254
 
255
+ # --- Upload transcript (optional) — reconstruct current turn explicitly
256
  try:
257
+ if chat_uploader is not None:
258
+ # Gradio passes prior turns in `chat_history`. Build latest full transcript.
259
+ prior = _to_minimal(chat_history)
260
+ current_user = {"role": "user", "content": user_message}
261
+ final_history = prior + [
262
+ current_user,
263
+ {"role": "assistant", "content": assistant_msg["content"]},
264
+ ]
265
+
266
+ # Ensure we have a usable browser_id for the filename
267
+ if not browser_id:
268
+ browser_id = str(uuid.uuid4())
269
+ drive_filename = f"chat__{browser_id}.json"
270
+
271
+ logger.info(
272
+ "Upload: writing Drive file '%s' (messages=%d, mode=overwrite)",
273
+ drive_filename,
274
+ len(final_history),
275
+ )
276
+ chat_uploader.upload_chat_history(
277
+ final_history,
278
+ browser_id,
279
+ filename=drive_filename,
280
+ mode="overwrite", # <-- overwrite-by-name semantics
281
+ )
282
+ logger.info("Upload: completed")
283
+ else:
284
+ logger.info("Upload: skipped (uploader not configured)")
285
  except Exception as e:
286
+ logger.warning("Upload: failed (non-fatal): %s", e)
287
 
 
 
288
 
289
+ # --------------- Gradio app ---------------
 
290
  with gr.Blocks() as demo:
291
  browser_id_state = gr.BrowserState(default_value=None)
292
 
 
294
  def load_browser_id(current_id):
295
  if current_id is None or current_id == "":
296
  new_id = str(uuid.uuid4())
297
+ logger.info("Browser ID created: %s", new_id)
298
  return new_id
299
+ logger.info("Browser ID reused: %s", current_id)
300
  return current_id
301
 
302
  gr.ChatInterface(
 
305
  additional_inputs=browser_id_state,
306
  type="messages",
307
  examples=[
308
+ ["What is Matthew's educational background?", None],
309
+ ["What machine learning projects has Matthew worked on?", None],
310
+ ["What experience does Matthew have in software engineering?", None],
311
+ ["Why did Matthew choose to pursue a degree in computer science?", None],
312
+ ["Does Matthew have any leadership experience?", None],
313
+ ["Has Matthew completed any Summer internships?", None],
314
+ ["Tell me about some real-world projects Matthew has worked on?", None],
315
+ ["What is Matthew's greatest strength and weakness?", None],
316
  ],
317
  save_history=True,
318
  run_examples_on_click=False,
319
  cache_examples=False,
320
  )
321
 
 
322
  if __name__ == "__main__":
323
  demo.launch()
utils/chatLogger.py CHANGED
@@ -1,5 +1,7 @@
1
  import io
2
  import json
 
 
3
  from googleapiclient.discovery import build
4
  from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
5
  from google.oauth2 import service_account
@@ -13,16 +15,21 @@ class ChatUploader:
13
  ):
14
  """
15
  Initializes a new chat uploader instance using a service account JSON dict.
 
16
  """
17
  credentials = service_account.Credentials.from_service_account_info(
18
- service_account_json, scopes=["https://www.googleapis.com/auth/drive"]
 
 
 
 
 
19
  )
20
- self.drive_service = build("drive", "v3", credentials=credentials)
21
  self.root_folder_id = root_folder_id
22
 
23
  def _get_or_create_browser_folder(self, browser_id: str) -> str:
24
  """
25
- Searches for an existing folder for the given browser_id. If not found, creates a folder named 'browser_{browser_id}' and returns its ID.
26
  """
27
  folder_name = f"browser_{browser_id}"
28
  query = (
@@ -34,35 +41,51 @@ class ChatUploader:
34
 
35
  if folders:
36
  return folders[0]["id"]
37
- else:
38
- metadata = {
39
- "name": folder_name,
40
- "mimeType": "application/vnd.google-apps.folder",
41
- "parents": [self.root_folder_id],
42
- }
43
- folder = (
44
- self.drive_service.files().create(body=metadata, fields="id").execute()
45
- )
46
- return folder["id"]
47
 
48
- def upload_chat_history(
49
- self, chat_history: list, browser_id: str, filename: str = "chat_log.json"
50
- ) -> None:
 
 
 
 
 
 
51
  """
52
- Uploads the chat log file inside the browser's folder. If the folder and/or file exists, it appends the new chat entries to the current log. Otherwise, it creates them.
53
  """
54
- folder_id = self._get_or_create_browser_folder(browser_id)
55
-
56
  query = (
57
- f"name = '{filename}' and '{folder_id}' in parents and "
58
  "mimeType = 'application/json' and trashed = false"
59
  )
60
  results = self.drive_service.files().list(q=query, fields="files(id)").execute()
61
  files = results.get("files", [])
 
62
 
63
- if files:
64
- file_id = files[0]["id"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
 
 
66
  request = self.drive_service.files().get_media(fileId=file_id)
67
  existing_stream = io.BytesIO()
68
  downloader = MediaIoBaseDownload(existing_stream, request)
@@ -72,23 +95,25 @@ class ChatUploader:
72
 
73
  existing_stream.seek(0)
74
  try:
75
- existing_chat_history = json.loads(existing_stream.read())
 
 
76
  except json.JSONDecodeError:
77
- existing_chat_history = []
 
78
 
79
- updated_chat_history = existing_chat_history + chat_history
 
80
 
81
- content = json.dumps(updated_chat_history, indent=2)
82
- media = MediaIoBaseUpload(
83
- io.BytesIO(content.encode()), mimetype="application/json"
84
- )
85
  self.drive_service.files().update(
86
  fileId=file_id, media_body=media
87
  ).execute()
88
  else:
89
- content = json.dumps(chat_history, indent=2)
90
- media = MediaIoBaseUpload(
91
- io.BytesIO(content.encode()), mimetype="application/json"
92
- )
93
- metadata = {"name": filename, "parents": [folder_id]}
94
  self.drive_service.files().create(body=metadata, media_body=media).execute()
 
1
  import io
2
  import json
3
+ from typing import List, Dict, Literal, Optional
4
+
5
  from googleapiclient.discovery import build
6
  from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
7
  from google.oauth2 import service_account
 
15
  ):
16
  """
17
  Initializes a new chat uploader instance using a service account JSON dict.
18
+ By default writes into a fixed root folder.
19
  """
20
  credentials = service_account.Credentials.from_service_account_info(
21
+ service_account_json,
22
+ scopes=["https://www.googleapis.com/auth/drive"],
23
+ )
24
+ # cache_discovery=False avoids deprecation noise
25
+ self.drive_service = build(
26
+ "drive", "v3", credentials=credentials, cache_discovery=False
27
  )
 
28
  self.root_folder_id = root_folder_id
29
 
30
  def _get_or_create_browser_folder(self, browser_id: str) -> str:
31
  """
32
+ Ensure a per-browser folder 'browser_{browser_id}' exists; return its file ID.
33
  """
34
  folder_name = f"browser_{browser_id}"
35
  query = (
 
41
 
42
  if folders:
43
  return folders[0]["id"]
 
 
 
 
 
 
 
 
 
 
44
 
45
+ metadata = {
46
+ "name": folder_name,
47
+ "mimeType": "application/vnd.google-apps.folder",
48
+ "parents": [self.root_folder_id],
49
+ }
50
+ folder = self.drive_service.files().create(body=metadata, fields="id").execute()
51
+ return folder["id"]
52
+
53
+ def _find_file(self, name: str, parent_id: str) -> Optional[str]:
54
  """
55
+ Return file ID for a JSON file with given name in parent, else None.
56
  """
 
 
57
  query = (
58
+ f"name = '{name}' and '{parent_id}' in parents and "
59
  "mimeType = 'application/json' and trashed = false"
60
  )
61
  results = self.drive_service.files().list(q=query, fields="files(id)").execute()
62
  files = results.get("files", [])
63
+ return files[0]["id"] if files else None
64
 
65
+ def upload_chat_history(
66
+ self,
67
+ chat_history: List[Dict[str, str]],
68
+ browser_id: str,
69
+ filename: str = "chat_log.json",
70
+ mode: Literal["overwrite", "append"] = "overwrite",
71
+ ) -> None:
72
+ """
73
+ Write the chat log inside the browser's folder.
74
+
75
+ - overwrite (default): REPLACE file contents with the provided chat_history
76
+ (this is what you want to keep Drive in sync with the UI)
77
+ - append: read existing JSON array and extend it with chat_history
78
+
79
+ chat_history is expected to be the *complete* transcript you want stored
80
+ (for overwrite), already normalized to [{role, content}, ...].
81
+ """
82
+ folder_id = self._get_or_create_browser_folder(browser_id)
83
+ file_id = self._find_file(filename, folder_id)
84
+
85
+ payload: List[Dict[str, str]] = chat_history
86
 
87
+ if mode == "append" and file_id:
88
+ # Load existing file and extend
89
  request = self.drive_service.files().get_media(fileId=file_id)
90
  existing_stream = io.BytesIO()
91
  downloader = MediaIoBaseDownload(existing_stream, request)
 
95
 
96
  existing_stream.seek(0)
97
  try:
98
+ existing_chat = json.loads(existing_stream.read())
99
+ if isinstance(existing_chat, list):
100
+ payload = existing_chat + chat_history
101
  except json.JSONDecodeError:
102
+ # Fall back to current chat_history only
103
+ payload = chat_history
104
 
105
+ content = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8")
106
+ media = MediaIoBaseUpload(io.BytesIO(content), mimetype="application/json")
107
 
108
+ if file_id:
109
+ # REPLACE contents
 
 
110
  self.drive_service.files().update(
111
  fileId=file_id, media_body=media
112
  ).execute()
113
  else:
114
+ metadata = {
115
+ "name": filename,
116
+ "parents": [folder_id],
117
+ "mimeType": "application/json",
118
+ }
119
  self.drive_service.files().create(body=metadata, media_body=media).execute()