Spaces:

Bryceeee
/

CSRC-Car-Manual-RAG

Sleeping

App Files Files Community

Bryceeee commited on 24 days ago

Commit

c856b02

verified ·

1 Parent(s): 9bd564b

Update app.py

Browse files

Performance-Optimized Hugging Face Spaces Entry Point
FIXED VERSION: Preserves two-value return format (answer, footnotes)

This version fixes the ValueError by ensuring the query wrapper
returns the same format as the original RAG engine: (answer, footnotes)

Files changed (1) hide show

app.py +65 -39

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 """
 Performance-Optimized Hugging Face Spaces Entry Point
-Solves slow response and loading issues
 """
 import os
 import sys
 from pathlib import Path
-import asyncio
 from concurrent.futures import ThreadPoolExecutor
 # Add the current directory to Python path for Spaces environment
@@ -48,14 +50,15 @@ except ImportError as e:
     print(f"⚠️ Scenario contextualization modules not available: {e}")
 # Performance configuration
-ENABLE_CACHING = True  # Enable query caching
-MAX_WORKERS = 4  # Thread pool size
 QUERY_TIMEOUT = 30  # Query timeout in seconds
-# Global thread pool for async processing
 executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
-# Simple in-memory cache for queries
 query_cache = {}
@@ -64,14 +67,18 @@ def initialize_system(config: Config) -> dict:
     Initialize the RAG system components with performance optimization
     Args:
-        config: Configuration object
     Returns:
-        Dictionary containing all initialized components
     """
     print("🔧 Initializing core components...")
-    # Initialize OpenAI client
     if not config.openai_api_key:
         raise ValueError(
             "OPENAI_API_KEY not found! Please set it in Hugging Face Spaces Secrets. "
@@ -100,26 +107,24 @@ def initialize_system(config: Config) -> dict:
         vector_store_id = vector_store_details["id"]
         config.save_vector_store_id(vector_store_id, config.vector_store_name)
-        # Upload files
         upload_stats = vector_store_manager.upload_pdf_files(pdf_files, vector_store_id)
         if upload_stats["successful_uploads"] == 0:
             raise RuntimeError("Failed to upload any files")
     else:
         print(f"✅ Using existing vector store: {vector_store_id}")
-    # Initialize RAG query engine
     print("🔧 Initializing RAG engine...")
     rag_engine = RAGQueryEngine(client, vector_store_id, config.model)
-    # Initialize question generator
     print("🔧 Initializing question generator...")
     question_generator = QuestionGenerator(client, rag_engine)
-    # Initialize knowledge graph generator
     print("🔧 Initializing knowledge graph...")
     knowledge_graph = KnowledgeGraphGenerator(client, vector_store_id, str(config.output_dir))
-    # Initialize optional modules (with reduced logging)
     user_profiling = None
     learning_path_generator = None
     adaptive_engine = None
@@ -133,6 +138,7 @@ def initialize_system(config: Config) -> dict:
         except Exception as e:
             print(f"⚠️ Error initializing Personalized Learning System: {e}")
     proactive_engine = None
     if PROACTIVE_LEARNING_AVAILABLE and user_profiling:
         try:
@@ -143,6 +149,7 @@ def initialize_system(config: Config) -> dict:
         except Exception as e:
             print(f"⚠️ Error initializing Proactive Learning Assistance: {e}")
     enhanced_rag_engine = None
     if SCENARIO_CONTEXTUALIZATION_AVAILABLE:
         try:
@@ -184,30 +191,38 @@ def create_optimized_query_wrapper(rag_engine):
     """
     Create an optimized query wrapper with caching, timeout, and async processing
     Args:
         rag_engine: The RAG query engine to wrap
     Returns:
-        Optimized query function
     """
-    def query_with_optimization(question: str, use_cache: bool = True) -> str:
         """
         Optimized query function with caching and timeout protection
         Args:
             question: User's question
-            use_cache: Whether to use cache (default: True)
         Returns:
-            Answer string
         """
         if not question or not question.strip():
-            return "Please enter a question."
         # Normalize question for cache key
         cache_key = question.strip().lower()
-        # Check cache
         if use_cache and ENABLE_CACHING and cache_key in query_cache:
             print(f"📋 Using cached result for: {question[:50]}...")
             return query_cache[cache_key]
@@ -215,31 +230,42 @@ def create_optimized_query_wrapper(rag_engine):
         try:
             print(f"🔍 Processing query: {question[:50]}...")
-            # Execute query using thread pool (non-blocking)
-            future = executor.submit(rag_engine.query, question)
-            # Wait for result with timeout
             result = future.result(timeout=QUERY_TIMEOUT)
-            # Cache the result
             if ENABLE_CACHING:
-                query_cache[cache_key] = result
-                # Limit cache size
                 if len(query_cache) > 100:
-                    # Remove oldest entry
                     query_cache.pop(next(iter(query_cache)))
             print(f"✅ Query completed successfully")
-            return result
         except TimeoutError:
             error_msg = "⏱️ Query timeout. Please try a simpler question or try again later."
             print(error_msg)
-            return error_msg
         except Exception as e:
             error_msg = f"❌ Error processing query: {str(e)}"
             print(error_msg)
-            return error_msg
     return query_with_optimization
@@ -249,7 +275,7 @@ def create_app():
     Create and return the optimized Gradio app for Hugging Face Spaces
     Returns:
-        Gradio Blocks app
     """
     print("=" * 60)
     print("🚗 CSRC Car Manual RAG System - Performance Optimized")
@@ -258,7 +284,7 @@ def create_app():
     # Load configuration
     config = Config()
-    # Initialize system
     try:
         components = initialize_system(config)
     except Exception as e:
@@ -285,8 +311,8 @@ def create_app():
     # Create optimized query wrapper
     optimized_query = create_optimized_query_wrapper(components["rag_engine"])
-    # Replace original RAG engine query method with optimized version
-    original_query = components["rag_engine"].query
     components["rag_engine"].query = optimized_query
     # Build Gradio interface
@@ -305,11 +331,11 @@ def create_app():
         print("📦 Creating interface components...")
         demo = interface_builder.create_interface()
-        # Enable queue for better performance
         print("⚡ Enabling queue for better performance...")
         demo.queue(
             max_size=20,  # Maximum queue size
-            default_concurrency_limit=5  # Concurrency limit
         )
         print("✅ Gradio interface created successfully!")
@@ -335,7 +361,7 @@ def create_app():
         )
-# Prevent multiple initializations using singleton pattern
 _app_instance = None
 def get_app():
@@ -361,8 +387,8 @@ if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        show_error=True,  # Show detailed errors
-        favicon_path=None,  # Skip favicon loading for faster startup
     )
 else:
     # Module-level variable for Spaces auto-detection

 """
 Performance-Optimized Hugging Face Spaces Entry Point
+FIXED VERSION: Preserves two-value return format (answer, footnotes)
+This version fixes the ValueError by ensuring the query wrapper
+returns the same format as the original RAG engine: (answer, footnotes)
 """
 import os
 import sys
 from pathlib import Path
 from concurrent.futures import ThreadPoolExecutor
 # Add the current directory to Python path for Spaces environment
     print(f"⚠️ Scenario contextualization modules not available: {e}")
 # Performance configuration
+ENABLE_CACHING = True  # Enable query result caching
+MAX_WORKERS = 4  # Thread pool worker count
 QUERY_TIMEOUT = 30  # Query timeout in seconds
+# Global thread pool for asynchronous query processing
 executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
+# In-memory cache for query results
+# Format: {question: (answer, footnotes)}
 query_cache = {}
     Initialize the RAG system components with performance optimization
     Args:
+        config: Configuration object containing API keys and settings
     Returns:
+        Dictionary containing all initialized system components
+    Raises:
+        ValueError: If OPENAI_API_KEY is not configured
+        RuntimeError: If system initialization fails
     """
     print("🔧 Initializing core components...")
+    # Validate OpenAI API key
     if not config.openai_api_key:
         raise ValueError(
             "OPENAI_API_KEY not found! Please set it in Hugging Face Spaces Secrets. "
         vector_store_id = vector_store_details["id"]
         config.save_vector_store_id(vector_store_id, config.vector_store_name)
+        # Upload PDF files to vector store
         upload_stats = vector_store_manager.upload_pdf_files(pdf_files, vector_store_id)
         if upload_stats["successful_uploads"] == 0:
             raise RuntimeError("Failed to upload any files")
     else:
         print(f"✅ Using existing vector store: {vector_store_id}")
+    # Initialize core RAG components
     print("🔧 Initializing RAG engine...")
     rag_engine = RAGQueryEngine(client, vector_store_id, config.model)
     print("🔧 Initializing question generator...")
     question_generator = QuestionGenerator(client, rag_engine)
     print("🔧 Initializing knowledge graph...")
     knowledge_graph = KnowledgeGraphGenerator(client, vector_store_id, str(config.output_dir))
+    # Initialize optional personalized learning modules
     user_profiling = None
     learning_path_generator = None
     adaptive_engine = None
         except Exception as e:
             print(f"⚠️ Error initializing Personalized Learning System: {e}")
+    # Initialize optional proactive learning
     proactive_engine = None
     if PROACTIVE_LEARNING_AVAILABLE and user_profiling:
         try:
         except Exception as e:
             print(f"⚠️ Error initializing Proactive Learning Assistance: {e}")
+    # Initialize optional scenario contextualization
     enhanced_rag_engine = None
     if SCENARIO_CONTEXTUALIZATION_AVAILABLE:
         try:
     """
     Create an optimized query wrapper with caching, timeout, and async processing
+    CRITICAL: This wrapper preserves the original return format: (answer, footnotes)
     Args:
         rag_engine: The RAG query engine to wrap
     Returns:
+        Optimized query function that returns (answer, footnotes)
     """
+    # Store reference to original query method
+    original_query = rag_engine.query
+    def query_with_optimization(question: str, use_cache: bool = True):
         """
         Optimized query function with caching and timeout protection
         Args:
             question: User's question
+            use_cache: Whether to use cached results (default: True)
         Returns:
+            Tuple of (answer: str, footnotes: list)
+            - answer: The response text
+            - footnotes: List of source references
         """
+        # Validate input
         if not question or not question.strip():
+            return "Please enter a question.", []
         # Normalize question for cache key
         cache_key = question.strip().lower()
+        # Check cache for previous results
         if use_cache and ENABLE_CACHING and cache_key in query_cache:
             print(f"📋 Using cached result for: {question[:50]}...")
             return query_cache[cache_key]
         try:
             print(f"🔍 Processing query: {question[:50]}...")
+            # Execute query in thread pool (non-blocking)
+            future = executor.submit(original_query, question)
+            # Wait for result with timeout protection
             result = future.result(timeout=QUERY_TIMEOUT)
+            # Handle different return formats
+            # Original RAG engine returns (answer, footnotes)
+            if isinstance(result, tuple) and len(result) == 2:
+                answer, footnotes = result
+            else:
+                # Fallback: if only single value returned
+                answer = str(result)
+                footnotes = []
+            # Cache the complete result (both answer and footnotes)
             if ENABLE_CACHING:
+                query_cache[cache_key] = (answer, footnotes)
+                # Limit cache size to prevent memory issues
                 if len(query_cache) > 100:
+                    # Remove oldest entry (FIFO)
                     query_cache.pop(next(iter(query_cache)))
             print(f"✅ Query completed successfully")
+            return answer, footnotes
         except TimeoutError:
             error_msg = "⏱️ Query timeout. Please try a simpler question or try again later."
             print(error_msg)
+            return error_msg, []
         except Exception as e:
             error_msg = f"❌ Error processing query: {str(e)}"
             print(error_msg)
+            return error_msg, []
     return query_with_optimization
     Create and return the optimized Gradio app for Hugging Face Spaces
     Returns:
+        Gradio Blocks interface
     """
     print("=" * 60)
     print("🚗 CSRC Car Manual RAG System - Performance Optimized")
     # Load configuration
     config = Config()
+    # Initialize system components
     try:
         components = initialize_system(config)
     except Exception as e:
     # Create optimized query wrapper
     optimized_query = create_optimized_query_wrapper(components["rag_engine"])
+    # Replace RAG engine's query method with optimized version
+    # This maintains the (answer, footnotes) return format
     components["rag_engine"].query = optimized_query
     # Build Gradio interface
         print("📦 Creating interface components...")
         demo = interface_builder.create_interface()
+        # Enable queue system for better concurrent performance
         print("⚡ Enabling queue for better performance...")
         demo.queue(
             max_size=20,  # Maximum queue size
+            default_concurrency_limit=5  # Max concurrent requests
         )
         print("✅ Gradio interface created successfully!")
         )
+# Singleton pattern to prevent multiple initializations
 _app_instance = None
 def get_app():
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        show_error=True,  # Show detailed errors for debugging
+        favicon_path=None,  # Skip favicon for faster startup
     )
 else:
     # Module-level variable for Spaces auto-detection