bigwolfe commited on
Commit
92d421d
·
1 Parent(s): 2b43b88
Files changed (1) hide show
  1. backend/src/services/rag_index.py +118 -20
backend/src/services/rag_index.py CHANGED
@@ -33,7 +33,7 @@ from llama_index.core.llms import ChatMessage as LlamaChatMessage, MessageRole
33
 
34
  from .config import get_config
35
  from .vault import VaultService
36
- from ..models.rag import ChatMessage, ChatResponse, SourceReference, StatusResponse
37
 
38
  class RAGIndexService:
39
  """Service for managing LlamaIndex vector stores."""
@@ -60,22 +60,131 @@ class RAGIndexService:
60
  self._initialized = True
61
 
62
  def _setup_gemini(self):
63
- # ... (existing)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def get_persist_dir(self, user_id: str) -> str:
66
- # ... (existing)
 
 
 
67
 
68
  def get_or_build_index(self, user_id: str) -> VectorStoreIndex:
69
- # ... (existing)
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def build_index(self, user_id: str) -> VectorStoreIndex:
72
- # ... (existing)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  def rebuild_index(self, user_id: str) -> VectorStoreIndex:
75
- # ... (existing)
 
76
 
77
  def get_status(self, user_id: str) -> StatusResponse:
78
- # ... (existing)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  def _create_note_tool(self, user_id: str):
81
  """Create a tool for writing new notes."""
@@ -104,10 +213,6 @@ class RAGIndexService:
104
  body=content,
105
  metadata={"created_by": "gemini-agent"}
106
  )
107
- # Index the new note immediately so agent knows about it?
108
- # write_note does NOT auto-update the RAG index (it updates FTS5).
109
- # We might need to add it to the index.
110
- # For now, just return success.
111
  return f"Note created successfully at {path}"
112
  except Exception as e:
113
  return f"Failed to create note: {e}"
@@ -124,12 +229,6 @@ class RAGIndexService:
124
  path: The current path of the note (e.g. "agent-notes/My Note.md").
125
  target_folder: The destination folder (e.g. "agent-notes/archive").
126
  """
127
- # Constraint: Can only move notes created by agent (in agent-notes/)?
128
- # Or allow moving anywhere? Spec said "not deleting or editing existing".
129
- # Moving is technically deleting + creating.
130
- # Let's restrict source to agent-notes/ to be safe?
131
- # Or just allow it. "We need one for moving notes into folder".
132
-
133
  if not path.endswith(".md"):
134
  path += ".md"
135
 
@@ -153,7 +252,6 @@ class RAGIndexService:
153
  Args:
154
  folder: The path of the folder to create (e.g. "agent-notes/archive").
155
  """
156
- # Sanitize path?
157
  safe_folder = folder.strip("/")
158
 
159
  try:
@@ -262,10 +360,10 @@ class RAGIndexService:
262
  action="updated"
263
  ))
264
  elif tool_output.tool_name == "create_folder":
265
- pass # No badge for folders yet
266
 
267
  return ChatResponse(
268
  answer=str(response),
269
  sources=sources,
270
  notes_written=notes_written
271
- )
 
33
 
34
  from .config import get_config
35
  from .vault import VaultService
36
+ from ..models.rag import ChatMessage, ChatResponse, SourceReference, StatusResponse, NoteWritten
37
 
38
  class RAGIndexService:
39
  """Service for managing LlamaIndex vector stores."""
 
60
  self._initialized = True
61
 
62
  def _setup_gemini(self):
63
+ """Configure global LlamaIndex settings for Gemini."""
64
+ if not Gemini or not GeminiEmbedding:
65
+ logger.error("Google GenAI modules not loaded. RAG setup skipped.")
66
+ return
67
+
68
+ api_key = self.config.google_api_key
69
+ if not api_key:
70
+ logger.warning("GOOGLE_API_KEY not set. RAG features will fail.")
71
+ return
72
+
73
+ # Log key status (masked)
74
+ masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
75
+ logger.info(f"Configuring Gemini with API key: {masked_key}")
76
+
77
+ # Set up Gemini
78
+ try:
79
+ # Configure global settings
80
+ Settings.llm = Gemini(
81
+ model="gemini-2.0-flash",
82
+ api_key=self.config.google_api_key
83
+ )
84
+ Settings.embed_model = GeminiEmbedding(
85
+ model_name="models/text-embedding-004",
86
+ api_key=self.config.google_api_key
87
+ )
88
+ except Exception as e:
89
+ logger.error(f"Failed to setup Gemini: {e}")
90
 
91
  def get_persist_dir(self, user_id: str) -> str:
92
+ """Get persistence directory for a user's index."""
93
+ user_dir = self.config.llamaindex_persist_dir / user_id
94
+ user_dir.mkdir(parents=True, exist_ok=True)
95
+ return str(user_dir)
96
 
97
  def get_or_build_index(self, user_id: str) -> VectorStoreIndex:
98
+ """Load existing index or build a new one from vault notes."""
99
+ with self._index_lock:
100
+ persist_dir = self.get_persist_dir(user_id)
101
+
102
+ # check if index files exist (docstore.json, index_store.json etc)
103
+ try:
104
+ storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
105
+ index = load_index_from_storage(storage_context)
106
+ logger.info(f"Loaded existing index for user {user_id}")
107
+ return index
108
+ except Exception:
109
+ logger.info(f"No valid index found for {user_id}, building new one...")
110
+ return self.build_index(user_id)
111
 
112
  def build_index(self, user_id: str) -> VectorStoreIndex:
113
+ """Build a new index from the user's vault."""
114
+ if not self.config.google_api_key:
115
+ raise ValueError("GOOGLE_API_KEY required to build index")
116
+
117
+ # Read notes from VaultService
118
+ notes = self.vault_service.list_notes(user_id)
119
+ if not notes:
120
+ # Handle empty vault (Fix #8)
121
+ logger.info(f"No notes found for {user_id}, creating empty index")
122
+ index = VectorStoreIndex.from_documents([])
123
+ # Persist empty index to avoid rebuilding every time?
124
+ # LlamaIndex might not persist empty index well.
125
+ # Let's just return it.
126
+ return index
127
+
128
+ documents = []
129
+
130
+ for note_summary in notes:
131
+ path = note_summary["path"]
132
+ try:
133
+ note = self.vault_service.read_note(user_id, path)
134
+ # Create Document
135
+ metadata = {
136
+ "path": path,
137
+ "title": note["title"],
138
+ **note.get("metadata", {})
139
+ }
140
+ doc = Document(
141
+ text=note["body"],
142
+ metadata=metadata,
143
+ id_=path # Use path as ID for stability
144
+ )
145
+ documents.append(doc)
146
+ except Exception as e:
147
+ logger.warning(f"Failed to index note {path}: {e}")
148
+
149
+ logger.info(f"Indexing {len(documents)} documents for {user_id}")
150
+
151
+ index = VectorStoreIndex.from_documents(documents)
152
+
153
+ # Persist
154
+ persist_dir = self.get_persist_dir(user_id)
155
+ index.storage_context.persist(persist_dir=persist_dir)
156
+ logger.info(f"Persisted index to {persist_dir}")
157
+
158
+ return index
159
 
160
  def rebuild_index(self, user_id: str) -> VectorStoreIndex:
161
+ """Force rebuild of index."""
162
+ return self.build_index(user_id)
163
 
164
  def get_status(self, user_id: str) -> StatusResponse:
165
+ """Get index status."""
166
+ persist_dir = self.get_persist_dir(user_id)
167
+ doc_store_path = os.path.join(persist_dir, "docstore.json")
168
+
169
+ doc_count = 0
170
+ status = "building"
171
+
172
+ if os.path.exists(doc_store_path):
173
+ status = "ready"
174
+ try:
175
+ # Simple line count or file size check to avoid loading whole JSON
176
+ # Actually, docstore.json is a dict.
177
+ # Let's just load it if it's small, or stat it.
178
+ # For MVP, just checking existence is "ready".
179
+ # To get count, we can try loading keys.
180
+ import json
181
+ with open(doc_store_path, 'r') as f:
182
+ data = json.load(f)
183
+ doc_count = len(data.get("docstore/data", {}))
184
+ except Exception:
185
+ logger.warning(f"Failed to read docstore for status: {doc_store_path}")
186
+
187
+ return StatusResponse(status=status, doc_count=doc_count, last_updated=None)
188
 
189
  def _create_note_tool(self, user_id: str):
190
  """Create a tool for writing new notes."""
 
213
  body=content,
214
  metadata={"created_by": "gemini-agent"}
215
  )
 
 
 
 
216
  return f"Note created successfully at {path}"
217
  except Exception as e:
218
  return f"Failed to create note: {e}"
 
229
  path: The current path of the note (e.g. "agent-notes/My Note.md").
230
  target_folder: The destination folder (e.g. "agent-notes/archive").
231
  """
 
 
 
 
 
 
232
  if not path.endswith(".md"):
233
  path += ".md"
234
 
 
252
  Args:
253
  folder: The path of the folder to create (e.g. "agent-notes/archive").
254
  """
 
255
  safe_folder = folder.strip("/")
256
 
257
  try:
 
360
  action="updated"
361
  ))
362
  elif tool_output.tool_name == "create_folder":
363
+ pass
364
 
365
  return ChatResponse(
366
  answer=str(response),
367
  sources=sources,
368
  notes_written=notes_written
369
+ )