import gradio as gr
import json
from pdf_qa.pdf_processor import PDFProcessor
from pdf_qa.product_classifier import ProductClassifier
from examples import employment_products

# Load .env file only in development (optional)
try:
    from dotenv import load_dotenv

    load_dotenv()
except ImportError:
    pass  # dotenv not available in production

# Global instances
pdf_processor = PDFProcessor()
# Initialize classifier with empty products (will be set during classification)
classifier = None


def classify_document(pdf_file, products_json, method):
    """Classify document into product categories"""
    if not pdf_file:
        return "Please upload a PDF file first.", ""

    if not products_json.strip():
        return "Please provide product definitions.", ""

    try:
        # Parse products JSON
        products = json.loads(products_json)

        # Process PDF
        pages = pdf_processor.process_pdf(pdf_file)

        # Create classifier with products
        classifier = ProductClassifier(products)

        # Classify document
        results = classifier.classify_document(pages, products, method)

        # Format results for Gradio Label component
        formatted_results = {}
        for product_id, score in results[:3]:  # Top 3 results
            product_name = products[product_id].get("name", product_id)
            formatted_results[product_name] = score

        # Get summary if using smart_semantic method
        summary = ""
        if method == "smart_semantic":
            summary = classifier.get_summary(pages)

        return formatted_results, summary

    except json.JSONDecodeError:
        return "Invalid JSON format for products.", ""
    except Exception as e:
        return str(e), ""


# Create Gradio interface
with gr.Blocks(title="Document Classification", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📄 Document Classification")
    
    # Details section
    with gr.Accordion("ℹ️ How it works", open=False):
        gr.Markdown("""
        **Document Classification System**
        
        This AI-powered tool analyzes PDF documents and matches them to predefined product categories based on content similarity.
        
        **Methods Available:**
        - **Smart Semantic**: Uses LLM to summarize the document, then finds semantic matches (recommended)
        - **Semantic**: Direct semantic similarity between document and product descriptions
        - **Keyword**: Matches based on keyword presence in the document
        - **Hybrid**: Combines semantic and keyword approaches (70% semantic, 30% keyword)
        
        **How to use:**
        1. Upload a PDF document
        2. Define your product categories with descriptions and keywords (JSON format) or use the examples at the bottom of the page
        3. Choose a classification method
        4. Get top 3 matches with confidence scores
        """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Upload PDF")
            pdf_input = gr.File(
                label="Upload PDF", file_types=[".pdf"], type="filepath"
            )

            gr.Markdown("### Classification Method")
            method_dropdown = gr.Dropdown(
                choices=["hybrid", "smart_semantic", "semantic", "keyword"],
                value="smart_semantic",
                label="Select classification method",
            )

            classify_btn = gr.Button("Classify Document", variant="primary")

        with gr.Column(scale=2):
            gr.Markdown("### Product Definitions")
            products_input = gr.Textbox(
                label="Product definitions (JSON format)",
                value="{}",
                # lines=19,
                placeholder="Enter product definitions in JSON format or use examples below...",
            )

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Classification Results")
            results_output = gr.Label(label="Top 3 matches with confidence scores")
        
        with gr.Column():
            gr.Markdown("### Document Summary")
            summary_output = gr.Textbox(
                label="LLM-generated summary (smart_semantic method only)",
                lines=3,
                interactive=False
            )

    gr.Examples(
        examples=[
            [
                "./examples/CV_KevinSerrano.pdf",
                json.dumps(employment_products, indent=2),
                "smart_semantic",
            ],
        ],
        inputs=[pdf_input, products_input, method_dropdown],
        label="Product Definition Examples",
    )

    # Set up event handlers
    classify_btn.click(
        fn=classify_document,
        inputs=[pdf_input, products_input, method_dropdown],
        outputs=[results_output, summary_output],
    )

if __name__ == "__main__":
    demo.launch()