Bryceeee commited on
Commit
c856b02
Β·
verified Β·
1 Parent(s): 9bd564b

Update app.py

Browse files

Performance-Optimized Hugging Face Spaces Entry Point
FIXED VERSION: Preserves two-value return format (answer, footnotes)

This version fixes the ValueError by ensuring the query wrapper
returns the same format as the original RAG engine: (answer, footnotes)

Files changed (1) hide show
  1. app.py +65 -39
app.py CHANGED
@@ -1,11 +1,13 @@
1
  """
2
  Performance-Optimized Hugging Face Spaces Entry Point
3
- Solves slow response and loading issues
 
 
 
4
  """
5
  import os
6
  import sys
7
  from pathlib import Path
8
- import asyncio
9
  from concurrent.futures import ThreadPoolExecutor
10
 
11
  # Add the current directory to Python path for Spaces environment
@@ -48,14 +50,15 @@ except ImportError as e:
48
  print(f"⚠️ Scenario contextualization modules not available: {e}")
49
 
50
  # Performance configuration
51
- ENABLE_CACHING = True # Enable query caching
52
- MAX_WORKERS = 4 # Thread pool size
53
  QUERY_TIMEOUT = 30 # Query timeout in seconds
54
 
55
- # Global thread pool for async processing
56
  executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
57
 
58
- # Simple in-memory cache for queries
 
59
  query_cache = {}
60
 
61
 
@@ -64,14 +67,18 @@ def initialize_system(config: Config) -> dict:
64
  Initialize the RAG system components with performance optimization
65
 
66
  Args:
67
- config: Configuration object
68
 
69
  Returns:
70
- Dictionary containing all initialized components
 
 
 
 
71
  """
72
  print("πŸ”§ Initializing core components...")
73
 
74
- # Initialize OpenAI client
75
  if not config.openai_api_key:
76
  raise ValueError(
77
  "OPENAI_API_KEY not found! Please set it in Hugging Face Spaces Secrets. "
@@ -100,26 +107,24 @@ def initialize_system(config: Config) -> dict:
100
  vector_store_id = vector_store_details["id"]
101
  config.save_vector_store_id(vector_store_id, config.vector_store_name)
102
 
103
- # Upload files
104
  upload_stats = vector_store_manager.upload_pdf_files(pdf_files, vector_store_id)
105
  if upload_stats["successful_uploads"] == 0:
106
  raise RuntimeError("Failed to upload any files")
107
  else:
108
  print(f"βœ… Using existing vector store: {vector_store_id}")
109
 
110
- # Initialize RAG query engine
111
  print("πŸ”§ Initializing RAG engine...")
112
  rag_engine = RAGQueryEngine(client, vector_store_id, config.model)
113
 
114
- # Initialize question generator
115
  print("πŸ”§ Initializing question generator...")
116
  question_generator = QuestionGenerator(client, rag_engine)
117
 
118
- # Initialize knowledge graph generator
119
  print("πŸ”§ Initializing knowledge graph...")
120
  knowledge_graph = KnowledgeGraphGenerator(client, vector_store_id, str(config.output_dir))
121
 
122
- # Initialize optional modules (with reduced logging)
123
  user_profiling = None
124
  learning_path_generator = None
125
  adaptive_engine = None
@@ -133,6 +138,7 @@ def initialize_system(config: Config) -> dict:
133
  except Exception as e:
134
  print(f"⚠️ Error initializing Personalized Learning System: {e}")
135
 
 
136
  proactive_engine = None
137
  if PROACTIVE_LEARNING_AVAILABLE and user_profiling:
138
  try:
@@ -143,6 +149,7 @@ def initialize_system(config: Config) -> dict:
143
  except Exception as e:
144
  print(f"⚠️ Error initializing Proactive Learning Assistance: {e}")
145
 
 
146
  enhanced_rag_engine = None
147
  if SCENARIO_CONTEXTUALIZATION_AVAILABLE:
148
  try:
@@ -184,30 +191,38 @@ def create_optimized_query_wrapper(rag_engine):
184
  """
185
  Create an optimized query wrapper with caching, timeout, and async processing
186
 
 
 
187
  Args:
188
  rag_engine: The RAG query engine to wrap
189
 
190
  Returns:
191
- Optimized query function
192
  """
193
- def query_with_optimization(question: str, use_cache: bool = True) -> str:
 
 
 
194
  """
195
  Optimized query function with caching and timeout protection
196
 
197
  Args:
198
  question: User's question
199
- use_cache: Whether to use cache (default: True)
200
 
201
  Returns:
202
- Answer string
 
 
203
  """
 
204
  if not question or not question.strip():
205
- return "Please enter a question."
206
 
207
  # Normalize question for cache key
208
  cache_key = question.strip().lower()
209
 
210
- # Check cache
211
  if use_cache and ENABLE_CACHING and cache_key in query_cache:
212
  print(f"πŸ“‹ Using cached result for: {question[:50]}...")
213
  return query_cache[cache_key]
@@ -215,31 +230,42 @@ def create_optimized_query_wrapper(rag_engine):
215
  try:
216
  print(f"πŸ” Processing query: {question[:50]}...")
217
 
218
- # Execute query using thread pool (non-blocking)
219
- future = executor.submit(rag_engine.query, question)
220
 
221
- # Wait for result with timeout
222
  result = future.result(timeout=QUERY_TIMEOUT)
223
 
224
- # Cache the result
 
 
 
 
 
 
 
 
 
225
  if ENABLE_CACHING:
226
- query_cache[cache_key] = result
227
- # Limit cache size
 
228
  if len(query_cache) > 100:
229
- # Remove oldest entry
230
  query_cache.pop(next(iter(query_cache)))
231
 
232
  print(f"βœ… Query completed successfully")
233
- return result
234
 
235
  except TimeoutError:
236
  error_msg = "⏱️ Query timeout. Please try a simpler question or try again later."
237
  print(error_msg)
238
- return error_msg
 
239
  except Exception as e:
240
  error_msg = f"❌ Error processing query: {str(e)}"
241
  print(error_msg)
242
- return error_msg
243
 
244
  return query_with_optimization
245
 
@@ -249,7 +275,7 @@ def create_app():
249
  Create and return the optimized Gradio app for Hugging Face Spaces
250
 
251
  Returns:
252
- Gradio Blocks app
253
  """
254
  print("=" * 60)
255
  print("πŸš— CSRC Car Manual RAG System - Performance Optimized")
@@ -258,7 +284,7 @@ def create_app():
258
  # Load configuration
259
  config = Config()
260
 
261
- # Initialize system
262
  try:
263
  components = initialize_system(config)
264
  except Exception as e:
@@ -285,8 +311,8 @@ def create_app():
285
  # Create optimized query wrapper
286
  optimized_query = create_optimized_query_wrapper(components["rag_engine"])
287
 
288
- # Replace original RAG engine query method with optimized version
289
- original_query = components["rag_engine"].query
290
  components["rag_engine"].query = optimized_query
291
 
292
  # Build Gradio interface
@@ -305,11 +331,11 @@ def create_app():
305
  print("πŸ“¦ Creating interface components...")
306
  demo = interface_builder.create_interface()
307
 
308
- # Enable queue for better performance
309
  print("⚑ Enabling queue for better performance...")
310
  demo.queue(
311
  max_size=20, # Maximum queue size
312
- default_concurrency_limit=5 # Concurrency limit
313
  )
314
 
315
  print("βœ… Gradio interface created successfully!")
@@ -335,7 +361,7 @@ def create_app():
335
  )
336
 
337
 
338
- # Prevent multiple initializations using singleton pattern
339
  _app_instance = None
340
 
341
  def get_app():
@@ -361,8 +387,8 @@ if __name__ == "__main__":
361
  demo.launch(
362
  server_name="0.0.0.0",
363
  server_port=7860,
364
- show_error=True, # Show detailed errors
365
- favicon_path=None, # Skip favicon loading for faster startup
366
  )
367
  else:
368
  # Module-level variable for Spaces auto-detection
 
1
  """
2
  Performance-Optimized Hugging Face Spaces Entry Point
3
+ FIXED VERSION: Preserves two-value return format (answer, footnotes)
4
+
5
+ This version fixes the ValueError by ensuring the query wrapper
6
+ returns the same format as the original RAG engine: (answer, footnotes)
7
  """
8
  import os
9
  import sys
10
  from pathlib import Path
 
11
  from concurrent.futures import ThreadPoolExecutor
12
 
13
  # Add the current directory to Python path for Spaces environment
 
50
  print(f"⚠️ Scenario contextualization modules not available: {e}")
51
 
52
  # Performance configuration
53
+ ENABLE_CACHING = True # Enable query result caching
54
+ MAX_WORKERS = 4 # Thread pool worker count
55
  QUERY_TIMEOUT = 30 # Query timeout in seconds
56
 
57
+ # Global thread pool for asynchronous query processing
58
  executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
59
 
60
+ # In-memory cache for query results
61
+ # Format: {question: (answer, footnotes)}
62
  query_cache = {}
63
 
64
 
 
67
  Initialize the RAG system components with performance optimization
68
 
69
  Args:
70
+ config: Configuration object containing API keys and settings
71
 
72
  Returns:
73
+ Dictionary containing all initialized system components
74
+
75
+ Raises:
76
+ ValueError: If OPENAI_API_KEY is not configured
77
+ RuntimeError: If system initialization fails
78
  """
79
  print("πŸ”§ Initializing core components...")
80
 
81
+ # Validate OpenAI API key
82
  if not config.openai_api_key:
83
  raise ValueError(
84
  "OPENAI_API_KEY not found! Please set it in Hugging Face Spaces Secrets. "
 
107
  vector_store_id = vector_store_details["id"]
108
  config.save_vector_store_id(vector_store_id, config.vector_store_name)
109
 
110
+ # Upload PDF files to vector store
111
  upload_stats = vector_store_manager.upload_pdf_files(pdf_files, vector_store_id)
112
  if upload_stats["successful_uploads"] == 0:
113
  raise RuntimeError("Failed to upload any files")
114
  else:
115
  print(f"βœ… Using existing vector store: {vector_store_id}")
116
 
117
+ # Initialize core RAG components
118
  print("πŸ”§ Initializing RAG engine...")
119
  rag_engine = RAGQueryEngine(client, vector_store_id, config.model)
120
 
 
121
  print("πŸ”§ Initializing question generator...")
122
  question_generator = QuestionGenerator(client, rag_engine)
123
 
 
124
  print("πŸ”§ Initializing knowledge graph...")
125
  knowledge_graph = KnowledgeGraphGenerator(client, vector_store_id, str(config.output_dir))
126
 
127
+ # Initialize optional personalized learning modules
128
  user_profiling = None
129
  learning_path_generator = None
130
  adaptive_engine = None
 
138
  except Exception as e:
139
  print(f"⚠️ Error initializing Personalized Learning System: {e}")
140
 
141
+ # Initialize optional proactive learning
142
  proactive_engine = None
143
  if PROACTIVE_LEARNING_AVAILABLE and user_profiling:
144
  try:
 
149
  except Exception as e:
150
  print(f"⚠️ Error initializing Proactive Learning Assistance: {e}")
151
 
152
+ # Initialize optional scenario contextualization
153
  enhanced_rag_engine = None
154
  if SCENARIO_CONTEXTUALIZATION_AVAILABLE:
155
  try:
 
191
  """
192
  Create an optimized query wrapper with caching, timeout, and async processing
193
 
194
+ CRITICAL: This wrapper preserves the original return format: (answer, footnotes)
195
+
196
  Args:
197
  rag_engine: The RAG query engine to wrap
198
 
199
  Returns:
200
+ Optimized query function that returns (answer, footnotes)
201
  """
202
+ # Store reference to original query method
203
+ original_query = rag_engine.query
204
+
205
+ def query_with_optimization(question: str, use_cache: bool = True):
206
  """
207
  Optimized query function with caching and timeout protection
208
 
209
  Args:
210
  question: User's question
211
+ use_cache: Whether to use cached results (default: True)
212
 
213
  Returns:
214
+ Tuple of (answer: str, footnotes: list)
215
+ - answer: The response text
216
+ - footnotes: List of source references
217
  """
218
+ # Validate input
219
  if not question or not question.strip():
220
+ return "Please enter a question.", []
221
 
222
  # Normalize question for cache key
223
  cache_key = question.strip().lower()
224
 
225
+ # Check cache for previous results
226
  if use_cache and ENABLE_CACHING and cache_key in query_cache:
227
  print(f"πŸ“‹ Using cached result for: {question[:50]}...")
228
  return query_cache[cache_key]
 
230
  try:
231
  print(f"πŸ” Processing query: {question[:50]}...")
232
 
233
+ # Execute query in thread pool (non-blocking)
234
+ future = executor.submit(original_query, question)
235
 
236
+ # Wait for result with timeout protection
237
  result = future.result(timeout=QUERY_TIMEOUT)
238
 
239
+ # Handle different return formats
240
+ # Original RAG engine returns (answer, footnotes)
241
+ if isinstance(result, tuple) and len(result) == 2:
242
+ answer, footnotes = result
243
+ else:
244
+ # Fallback: if only single value returned
245
+ answer = str(result)
246
+ footnotes = []
247
+
248
+ # Cache the complete result (both answer and footnotes)
249
  if ENABLE_CACHING:
250
+ query_cache[cache_key] = (answer, footnotes)
251
+
252
+ # Limit cache size to prevent memory issues
253
  if len(query_cache) > 100:
254
+ # Remove oldest entry (FIFO)
255
  query_cache.pop(next(iter(query_cache)))
256
 
257
  print(f"βœ… Query completed successfully")
258
+ return answer, footnotes
259
 
260
  except TimeoutError:
261
  error_msg = "⏱️ Query timeout. Please try a simpler question or try again later."
262
  print(error_msg)
263
+ return error_msg, []
264
+
265
  except Exception as e:
266
  error_msg = f"❌ Error processing query: {str(e)}"
267
  print(error_msg)
268
+ return error_msg, []
269
 
270
  return query_with_optimization
271
 
 
275
  Create and return the optimized Gradio app for Hugging Face Spaces
276
 
277
  Returns:
278
+ Gradio Blocks interface
279
  """
280
  print("=" * 60)
281
  print("πŸš— CSRC Car Manual RAG System - Performance Optimized")
 
284
  # Load configuration
285
  config = Config()
286
 
287
+ # Initialize system components
288
  try:
289
  components = initialize_system(config)
290
  except Exception as e:
 
311
  # Create optimized query wrapper
312
  optimized_query = create_optimized_query_wrapper(components["rag_engine"])
313
 
314
+ # Replace RAG engine's query method with optimized version
315
+ # This maintains the (answer, footnotes) return format
316
  components["rag_engine"].query = optimized_query
317
 
318
  # Build Gradio interface
 
331
  print("πŸ“¦ Creating interface components...")
332
  demo = interface_builder.create_interface()
333
 
334
+ # Enable queue system for better concurrent performance
335
  print("⚑ Enabling queue for better performance...")
336
  demo.queue(
337
  max_size=20, # Maximum queue size
338
+ default_concurrency_limit=5 # Max concurrent requests
339
  )
340
 
341
  print("βœ… Gradio interface created successfully!")
 
361
  )
362
 
363
 
364
+ # Singleton pattern to prevent multiple initializations
365
  _app_instance = None
366
 
367
  def get_app():
 
387
  demo.launch(
388
  server_name="0.0.0.0",
389
  server_port=7860,
390
+ show_error=True, # Show detailed errors for debugging
391
+ favicon_path=None, # Skip favicon for faster startup
392
  )
393
  else:
394
  # Module-level variable for Spaces auto-detection