import os import sys import re import numpy as np from dotenv import load_dotenv import gradio as gr from audio_utils import process_input_and_generate_speech, GeminiHandler, generate_tts_response import google.genai as genai from fastrtc import WebRTC, get_cloudflare_turn_credentials_async from gradio.utils import get_space from utils import process_query from audio_utils import get_transcription_or_text from logger.custom_logger import CustomLoggerTracker # Import enhanced functions from specific_utils from gradio_utils import ( get_all_document_choices, enhanced_document_upload_handler, enhanced_audio_transcription, process_text_with_audio_support, process_audio_only_response, process_both_text_and_audio_response, create_document_info_panel, get_enhanced_css ) # Setup sys.path.append(os.path.dirname(os.path.abspath(__file__))) load_dotenv() # Initialize Gemini client properly gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) custom_log = CustomLoggerTracker() logger = custom_log.get_logger("main") logger.info("Logger initialized for main module") def parse_score_safely(score_str): """Safely parse hallucination score, handling extra characters.""" if not score_str: return 0 # Extract just the number from strings like "4**" or "Score: 4" import re numbers = re.findall(r'\d+', str(score_str)) if numbers: return int(numbers[0]) return 0 def process_text_only(user_input, audio_input, chat_history): """Process input and generate only text response using enhanced functions.""" try: # Use enhanced text processing with audio support new_history, cleared_input, status_msg, stats_display = process_text_with_audio_support( user_input, audio_input, chat_history ) return new_history, status_msg, cleared_input, None except Exception as e: logger.error(f"Error in enhanced text processing: {e}") return chat_history, f"Status: Error - {str(e)}", "", None def process_audio_only(user_input, audio_input, voice_dropdown, chat_history): """Process input and generate only audio response using enhanced functions.""" try: # Use enhanced audio processing audio_response, cleared_input, status_msg, stats_display = process_audio_only_response( user_input, audio_input, voice_dropdown, chat_history ) return audio_response, status_msg, cleared_input, None except Exception as e: logger.error(f"Error in enhanced audio processing: {e}") return None, f"Status: Error - {str(e)}", "", None def process_both_text_and_audio(text_input, audio_input, voice_dropdown, chat_history): """Process input and generate both text and audio responses using enhanced functions.""" try: # Use enhanced combined processing new_history, audio_response, cleared_input, status_msg, stats_display = process_both_text_and_audio_response( text_input, audio_input, voice_dropdown, chat_history ) return new_history, audio_response, status_msg, cleared_input, None except Exception as e: logger.error(f"Error in enhanced combined processing: {e}") return chat_history, None, f"Status: Error - {str(e)}", "", None def toggle_user_doc_visibility(selected_type): """Toggle visibility of user document options.""" return gr.update(visible=(selected_type == "User-Specific Document")) def validate_environment() -> bool: """Validate required environment variables are present.""" required_env_vars = [ "GEMINI_API_KEY", "SILICONFLOW_API_KEY", "SILICONFLOW_CHAT_URL" ] missing_vars = [] for var in required_env_vars: if not os.getenv(var): missing_vars.append(var) if missing_vars: logger.warning(f"Missing environment variables: {', '.join(missing_vars)}") return False logger.info("All required environment variables are present") return True def clear_chat(): """Clear chat history.""" return [], None, "Status: Chat cleared." # Gradio Interface Configuration image_path = "assets/Compumacy-Logo-Trans2.png" # Dark Mode Professional Theme with Custom CSS dark_theme_css = """ /* Dark Mode Theme */ .gradio-container { background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f1419 100%) !important; color: #e0e6ed !important; border-radius: 20px !important; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3) !important; max-width: 1200px !important; margin: auto; } /* Header styling */ .gradio-container h1 { color: #ffffff !important; text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important; } /* Logo container */ .logo-container { background: rgba(255, 255, 255, 0.1) !important; border-radius: 15px !important; padding: 10px !important; backdrop-filter: blur(10px) !important; border: 1px solid rgba(255, 255, 255, 0.1) !important; } /* Chatbot styling */ .chatbot { background: rgba(255, 255, 255, 0.05) !important; border: 1px solid rgba(255, 255, 255, 0.1) !important; border-radius: 15px !important; } .chatbot .message { background: rgba(255, 255, 255, 0.08) !important; border: 1px solid rgba(255, 255, 255, 0.1) !important; border-radius: 10px !important; color: #e0e6ed !important; } .chatbot .message.user { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; } .chatbot .message.bot { background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important; color: white !important; } /* Input fields */ .gr-textbox, .gr-dropdown, .gr-file { background: rgba(255, 255, 255, 0.1) !important; border: 1px solid rgba(255, 255, 255, 0.2) !important; border-radius: 10px !important; color: #e0e6ed !important; backdrop-filter: blur(5px) !important; } .gr-textbox::placeholder { color: rgba(224, 230, 237, 0.6) !important; } /* Buttons */ .gr-button { border-radius: 10px !important; border: none !important; font-weight: 600 !important; text-transform: uppercase !important; letter-spacing: 0.5px !important; transition: all 0.3s ease !important; backdrop-filter: blur(10px) !important; } .text-button { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important; } .text-button:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important; } .audio-button { background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important; color: white !important; box-shadow: 0 4px 15px rgba(240, 147, 251, 0.3) !important; } .audio-button:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(240, 147, 251, 0.4) !important; } .both-button { background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important; color: white !important; box-shadow: 0 4px 15px rgba(79, 172, 254, 0.3) !important; } .both-button:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(79, 172, 254, 0.4) !important; } /* Clear button */ .clear-button { background: linear-gradient(135deg, #ff6b6b 0%, #ee5a52 100%) !important; color: white !important; box-shadow: 0 4px 15px rgba(255, 107, 107, 0.3) !important; } .clear-button:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(255, 107, 107, 0.4) !important; } /* Labels and text */ label, .gr-form label { color: #e0e6ed !important; font-weight: 500 !important; } /* Audio component */ .gr-audio { background: rgba(255, 255, 255, 0.1) !important; border: 1px solid rgba(255, 255, 255, 0.2) !important; border-radius: 15px !important; backdrop-filter: blur(10px) !important; } /* Accordion */ .gr-accordion { background: rgba(255, 255, 255, 0.05) !important; border: 1px solid rgba(255, 255, 255, 0.1) !important; border-radius: 15px !important; } /* Info boxes */ .info-box { background: rgba(79, 172, 254, 0.1) !important; border-left: 4px solid #4facfe !important; border-radius: 10px !important; padding: 15px !important; margin: 10px 0 !important; backdrop-filter: blur(5px) !important; } /* Status output */ .status-output { background: rgba(255, 255, 255, 0.1) !important; border: 1px solid rgba(255, 255, 255, 0.2) !important; border-radius: 10px !important; color: #e0e6ed !important; } /* Hide default footer */ footer { display: none !important; } /* Scrollbar styling */ ::-webkit-scrollbar { width: 8px; } ::-webkit-scrollbar-track { background: rgba(255, 255, 255, 0.1); border-radius: 4px; } ::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); } /* Responsive design */ @media (max-width: 768px) { .gradio-container { margin: 10px !important; border-radius: 15px !important; } } """ with gr.Blocks( title="Wisal Chatbot - Autism AI Assistant", theme=gr.themes.Base( primary_hue="blue", secondary_hue="purple", neutral_hue="slate", font=gr.themes.GoogleFont("Inter") ), css=dark_theme_css ) as demo: gr.HTML("""

🤖 Wisal: Autism AI Assistant

Your personalized AI assistant designed specifically for individuals with autism

""") with gr.Row(equal_height=False): with gr.Column(scale=1, min_width=200): if os.path.exists(image_path): gr.Image( value=image_path, show_label=False, container=True, height=150, width=150, elem_classes="logo-container", show_download_button=False, show_share_button=False ) else: gr.HTML("""
🤖
""") with gr.Column(scale=4): gr.HTML("""

How to use Wisal:

""") # Initialize chat history as empty list chat_history = gr.State([]) with gr.Row(): chatbot = gr.Chatbot( type='messages', label="💬 Conversation with Wisal", height=500, avatar_images=(None, image_path if os.path.exists(image_path) else None), bubble_full_width=False, show_copy_button=True, elem_classes="chatbot" ) with gr.Row(): with gr.Column(scale=1): audio_output = gr.Audio( label="🔊 Wisal's Voice Response", interactive=False, show_download_button=True, elem_classes="gr-audio" ) with gr.Column(scale=1): status_output = gr.Textbox( label="📊 System Status", interactive=False, max_lines=2, elem_classes="status-output" ) with gr.Row(): user_input = gr.Textbox( placeholder="Ask me anything about autism...", label="📝 Your Message", lines=3, scale=3, show_copy_button=True ) audio_input = gr.Audio( sources=["microphone", "upload"], type="filepath", label="🎤 Voice Input", scale=2 ) with gr.Row(): voice_dropdown = gr.Dropdown( label="🎙️ Choose Voice", choices=["Kore", "Puck", "Zephyr", "Leda", "Fenrir", "Charon", "Orus", "Aoede", "Callirrhoe"], value="Kore", scale=2 ) # Separate buttons for different response types with gr.Row(): text_only_btn = gr.Button( "📝 Generate Text Only", variant="secondary", scale=1, elem_classes="text-button" ) audio_only_btn = gr.Button( "🎵 Generate Audio Only", variant="secondary", scale=1, elem_classes="audio-button" ) both_btn = gr.Button( "💬 Generate Text & Audio", variant="primary", scale=1, elem_classes="both-button" ) # Connect the interactions text_only_btn.click( fn=process_text_only, inputs=[user_input, audio_input, chat_history], outputs=[chatbot, status_output, user_input, audio_input] ) audio_only_btn.click( fn=process_audio_only, inputs=[user_input, audio_input, voice_dropdown, chat_history], outputs=[audio_output, status_output, user_input, audio_input] ) both_btn.click( fn=process_both_text_and_audio, inputs=[user_input, audio_input, voice_dropdown, chat_history], outputs=[chatbot, audio_output, status_output, user_input, audio_input] ) # Keep the original submit functionality for the text input (defaulting to both) user_input.submit( fn=process_both_text_and_audio, inputs=[user_input, audio_input, voice_dropdown, chat_history], outputs=[chatbot, audio_output, status_output, user_input, audio_input] ) gr.HTML("
") with gr.Accordion("🔧 Advanced Options (Live Chat & Document Q&A)", open=False): with gr.Row(): with gr.Column(): try: webrtc2 = WebRTC( label="🎤 Live Voice Chat", modality="audio", mode="send-receive", elem_id="audio-source", rtc_configuration=get_cloudflare_turn_credentials_async, icon="https://www.gstatic.com/lamda/images/gemini_favicon.png" ) webrtc2.stream( GeminiHandler(), inputs=[webrtc2], outputs=[webrtc2], time_limit=180 if get_space() else None, concurrency_limit=2 if get_space() else None ) except Exception as e: logger.warning(f"WebRTC setup failed: {e}") gr.HTML("""
⚠️ Live Chat temporarily unavailable

Please use the text and audio inputs above instead.

""") with gr.Row(): with gr.Column(): # Enhanced document upload with all 4 options doc_file = gr.File( label="📎 Upload Document (PDF, DOCX, TXT)", file_types=[".pdf", ".docx", ".txt"] ) # Fixed dropdown with all 4 document types doc_type = gr.Dropdown( label="📄 Document Type", choices=get_all_document_choices(), value="user_specific", elem_classes="gr-dropdown" ) # Optional query field for immediate Q&A doc_query = gr.Textbox( label="💭 Optional: Ask about this document", placeholder="What does this document say about...", lines=2, elem_classes="gr-textbox" ) # Upload button upload_btn = gr.Button( "📤 Upload & Process", variant="primary", elem_classes="both-button" ) # Upload status display upload_status = gr.Textbox( label="📊 Upload Status", interactive=False, lines=4, elem_classes="status-output" ) with gr.Column(): # Document info panel doc_info = gr.HTML(create_document_info_panel()) # Connect upload button to enhanced handler upload_btn.click( fn=enhanced_document_upload_handler, inputs=[doc_file, doc_type, doc_query], outputs=[upload_status] ) with gr.Row(): clear_btn = gr.Button( "🗑️ Clear Chat", variant="stop", elem_classes="clear-button" ) clear_btn.click( fn=clear_chat, outputs=[chatbot, audio_output, status_output] ) # Add usage guide at the bottom gr.HTML("""

💡 Usage Guide:

📝 Text Only Mode

Perfect for quick questions when you want to read the response and save it in chat history.

🎵 Audio Only Mode

Great for hands-free interaction when you want to listen to responses without cluttering the chat.

💬 Text & Audio Mode

Best of both worlds - see and hear responses, perfect for learning and accessibility.

""") # Add this test function to main.py temporarily def test_pipeline_response(): """Test the pipeline response""" try: response = process_query("What is autism?") print(f"Response type: {type(response)}") print(f"Response length: {len(str(response))}") print(f"Response preview: {str(response)[:200]}...") return True except Exception as e: print(f"Test failed: {e}") return False def main(): """Main entry point for the application.""" logger.info("Starting Wisal application...") # Validate environment before starting if not validate_environment(): logger.error("Environment validation failed. Please check your .env file.") return try: # Test Gemini client connection logger.info("Testing Gemini client connection...") # Launch the application demo.launch( server_port=8080, server_name="0.0.0.0", # Allow external connections share=False, # Set to True if you want to create a public link favicon_path=image_path if os.path.exists(image_path) else None, show_error=True ) except Exception as e: logger.error(f"Failed to start application: {e}")