import os import tempfile import logging from pathlib import Path from typing import Optional, Tuple try: import gradio as gr GRADIO_AVAILABLE = True except ImportError: GRADIO_AVAILABLE = False logging.warning("Gradio not available") # Import our services try: from app.services.ocr_service import ocr_service from app.services.database_service import DatabaseService OCR_AVAILABLE = True except ImportError: OCR_AVAILABLE = False logging.warning("OCR service not available") logger = logging.getLogger(__name__) class LegalDashboardGradio: """ Gradio interface for Legal Dashboard """ def __init__(self): self.ocr_service = ocr_service if OCR_AVAILABLE else None self.db_service = None # Initialize database if available try: self.db_service = DatabaseService() except Exception as e: logger.warning(f"Database service not available: {e}") async def process_document(self, file) -> Tuple[str, str, str]: """ Process uploaded document and extract text """ if not file: return "❌ No file uploaded", "", "" if not self.ocr_service: return "❌ OCR service not available", "", "" try: # Get file path file_path = file.name file_extension = Path(file_path).suffix.lower() # Process based on file type if file_extension == '.pdf': result = await self.ocr_service.extract_text_from_pdf(file_path) elif file_extension in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']: result = await self.ocr_service.extract_text_from_image(file_path) else: return f"❌ Unsupported file type: {file_extension}", "", "" if result["success"]: # Process text with NLP if available processed = await self.ocr_service.process_text(result["text"]) # Create status message status = f"✅ Successfully processed using {result['method']}" # Create metadata info metadata = f""" **Processing Details:** - Method: {result['method']} - Character Count: {len(result['text'])} - Pages: {len(result.get('pages', []))} """ if processed.get('entities'): entities_info = "\n**Named Entities Found:**\n" for ent in processed['entities'][:10]: # Show first 10 entities entities_info += f"- {ent['text']} ({ent['label']})\n" metadata += entities_info return status, result["text"], metadata else: error_msg = result.get("metadata", {}).get("error", "Unknown error") return f"❌ Processing failed: {error_msg}", "", "" except Exception as e: logger.error(f"Document processing error: {e}") return f"❌ Error: {str(e)}", "", "" def search_documents(self, query: str) -> str: """ Search in processed documents """ if not query.strip(): return "Please enter a search query" if not self.db_service: return "Database service not available" try: # This would search in the database # For now, return a placeholder return f"Search results for '{query}' would appear here.\n\nDatabase integration coming soon..." except Exception as e: return f"Search error: {str(e)}" def get_system_status(self) -> str: """ Get system status information """ try: status = [] # OCR Service Status if self.ocr_service: ocr_status = self.ocr_service.get_service_status() status.append("🔍 **OCR Service:**") status.append(f" - Status: {'✅ Ready' if ocr_status['fallback_ready'] else '❌ Not Ready'}") status.append(f" - Transformers: {'✅ Available' if ocr_status['transformers_ready'] else '❌ Not Available'}") status.append(f" - spaCy: {'✅ Available' if ocr_status['spacy_ready'] else '❌ Not Available'}") status.append(f" - Models: {', '.join(ocr_status['models_loaded']) if ocr_status['models_loaded'] else 'None'}") else: status.append("🔍 **OCR Service:** ❌ Not Available") # Database Service Status if self.db_service: status.append("\n💾 **Database Service:** ✅ Available") else: status.append("\n💾 **Database Service:** ❌ Not Available") # System Info status.append(f"\n🖥️ **System Info:**") status.append(f" - Python: Available") status.append(f" - Gradio: {'✅ Available' if GRADIO_AVAILABLE else '❌ Not Available'}") return "\n".join(status) except Exception as e: return f"Error getting system status: {str(e)}" def create_gradio_interface(): """ Create and return the Gradio interface """ if not GRADIO_AVAILABLE: return None dashboard = LegalDashboardGradio() # Custom CSS css = """ .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } .main-header { text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px; } .status-box { background: #f8f9fa; border-left: 4px solid #28a745; padding: 15px; border-radius: 5px; } """ with gr.Blocks(css=css, title="Legal Dashboard", theme=gr.themes.Soft()) as iface: # Header gr.HTML("""

🏛️ Legal Dashboard

Advanced Legal Document Management System with AI-Powered OCR

""") with gr.Tab("📄 Document Processing"): gr.Markdown("## Upload and Process Documents") gr.Markdown("Upload PDF files or images to extract text using advanced OCR technology.") with gr.Row(): with gr.Column(scale=1): file_input = gr.File( label="Upload Document", file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".tiff"], type="file" ) process_btn = gr.Button("🔍 Process Document", variant="primary", size="lg") with gr.Column(scale=2): status_output = gr.Textbox( label="Processing Status", placeholder="Upload a document and click 'Process Document' to begin...", interactive=False ) with gr.Row(): with gr.Column(): extracted_text = gr.Textbox( label="Extracted Text", placeholder="Processed text will appear here...", lines=15, max_lines=30, interactive=False ) with gr.Column(): metadata_output = gr.Textbox( label="Processing Details", placeholder="Processing metadata and analysis will appear here...", lines=15, max_lines=30, interactive=False ) # Connect the processing function process_btn.click( fn=dashboard.process_document, inputs=[file_input], outputs=[status_output, extracted_text, metadata_output] ) with gr.Tab("🔍 Search Documents"): gr.Markdown("## Search Processed Documents") gr.Markdown("Search through previously processed documents using keywords and phrases.") with gr.Row(): search_input = gr.Textbox( label="Search Query", placeholder="Enter keywords to search...", scale=3 ) search_btn = gr.Button("🔍 Search", variant="primary", scale=1) search_results = gr.Textbox( label="Search Results", placeholder="Search results will appear here...", lines=10, interactive=False ) # Connect search function search_btn.click( fn=dashboard.search_documents, inputs=[search_input], outputs=[search_results] ) with gr.Tab("⚙️ System Status"): gr.Markdown("## System Status and Information") status_btn = gr.Button("🔄 Refresh Status", variant="secondary") system_status = gr.Textbox( label="System Status", placeholder="Click 'Refresh Status' to check system health...", lines=15, interactive=False ) # Connect status function status_btn.click( fn=dashboard.get_system_status, outputs=[system_status] ) # Auto-load status on interface start iface.load( fn=dashboard.get_system_status, outputs=[system_status] ) with gr.Tab("📚 About"): gr.Markdown(""" ## Legal Dashboard **Advanced Legal Document Management System** ### Features: - 📄 **PDF Processing**: Extract text from PDF documents with high accuracy - 🖼️ **Image OCR**: Process scanned documents and images - 🧠 **AI-Powered**: Uses advanced transformer models for text recognition - 🔍 **Smart Search**: Intelligent search capabilities across documents - 📊 **Analytics**: Document analysis and metadata extraction - 🔒 **Secure**: Privacy-focused document processing ### Supported Formats: - **Documents**: PDF - **Images**: JPG, JPEG, PNG, BMP, TIFF ### Technology Stack: - **OCR**: PyMuPDF, OpenCV, Transformers (TrOCR) - **NLP**: spaCy for named entity recognition - **ML**: PyTorch, Hugging Face Transformers - **Interface**: Gradio for web interface ### Usage: 1. Upload your document using the **Document Processing** tab 2. Click **Process Document** to extract text 3. Use the **Search** tab to find specific content 4. Check **System Status** for service health --- *This system is designed for legal professionals to efficiently process and manage legal documents with the power of AI.* """) return iface def launch_gradio_app(): """ Launch the Gradio application """ if not GRADIO_AVAILABLE: print("❌ Gradio not available. Please install gradio: pip install gradio") return None iface = create_gradio_interface() if iface: print("🚀 Starting Legal Dashboard Gradio Interface...") iface.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True, show_tips=True, enable_queue=True ) return iface if __name__ == "__main__": launch_gradio_app()