#!/usr/bin/env python3
"""
Mizan Leaderboard - Enhanced Version with Submit Functionality
Includes leaderboard display, model submission, and evaluation tracking
"""

import gradio as gr

from ui_components import (
    create_leaderboard_tab, create_dataset_tab, create_submit_evaluation_tab
    )
from data_processor import load_leaderboard_from_csv
from evaluation_service import submit_evaluation

# Global data storage
current_data = None


def create_leaderboard_demo():
    """Create enhanced leaderboard demo interface with submit functionality"""
    
    global current_data
    
    # Setup directories

    
    # Load data from CSV file
    current_data = load_leaderboard_from_csv()
    
    with gr.Blocks(
        title="Mizan",
        theme=gr.themes.Soft()
    ) as demo:
        
        gr.Markdown("""
        # Mizan Leaderboard
        
        Performance comparison for Turkish embedding models
        """)
        
        with gr.Tabs():
            # Tab 1: Leaderboard
            with gr.Tab("📊 Leaderboard"):
                leaderboard_table = create_leaderboard_tab(current_data)
            
                        # Tab 2: Submit
            with gr.Tab("🚀 Submit"):
                (model_input, email_input, submit_btn, login_button, result_output) = create_submit_evaluation_tab()
                
                # Submit evaluation functionality with authentication
                def handle_submit_evaluation(model_name, email, profile, progress=gr.Progress()):
                    import logging
                    
                    # Authentication check
                    if profile is None:
                        logging.warning("Unauthorized submission attempt with no profile")
                        return "<p style='color: red; font-weight: bold;'>Authentication required. Please log in with your Hugging Face account.</p>"
                    
                    # IMPORTANT: In local development, Gradio returns "Sign in with Hugging Face" string
                    # This is NOT a real authentication, just a placeholder for local testing
                    if isinstance(profile, str) and profile == "Sign in with Hugging Face":
                        # Block submission in local dev with mock auth
                        return "<p style='color: orange; font-weight: bold;'>⚠️ HF authentication required.</p>"
                    
                    # Email is required
                    if not email or email.strip() == "":
                        return "<p style='color: red; font-weight: bold;'>Email address is required to receive benchmark results.</p>"
                    
                    global current_data
                    batch_size = 32  # Always use default batch size
                    result_msg, updated_data = submit_evaluation(model_name, email, batch_size, current_data, progress)
                    # Note: For now, we don't update the leaderboard since evaluation is async
                    # The leaderboard will be updated manually when results are available
                    logging.info(f"Submission processed for model: {model_name} by user: {profile}")
                    return result_msg
                
                submit_btn.click(
                    fn=handle_submit_evaluation,
                    inputs=[model_input, email_input, login_button],
                    outputs=[result_output]
                )
            
            # Tab 3: Dataset Information
            with gr.Tab("📊 Dataset Information"):
                dataset_table = create_dataset_tab()                
                gr.Markdown("""
        ---
        ### 📊 Metrics Explanation:
        - **Mean (Task)**: Average performance across all individual tasks
        - **Mean (TaskType)**: Average performance by task categories  
        - **Classification**: Performance on Turkish classification tasks
        - **Clustering**: Performance on Turkish clustering tasks
        - **Pair Classification**: Performance on pair classification tasks (like NLI)
        - **Retrieval**: Performance on information retrieval tasks
        - **STS**: Performance on Semantic Textual Similarity tasks
        - **Correlation**: Weighted average of correlation metrics for NLI and STSB datasets
        - **Parameters**: Number of model parameters
        - **Embed Dim**: Embedding dimension size
        - **Max Seq Length**: Maximum sequence length the model can process (0 = infinite/unlimited)
        - **Vocab Size**: Size of the model's vocabulary
        
        ### 📖 About Mizan:
        This leaderboard presents results from the **Mizan** benchmark, which evaluates embedding models
        on Turkish language tasks across multiple domains including:
        - Text classification and sentiment analysis
        - Information retrieval and search
        - Semantic textual similarity
        - Text clustering and pair classification   
        
        ### 🚀 Submit Your Model:
        Use the **Submit** tab to submit your Turkish embedding model for evaluation.
        Your request will be reviewed by administrators and you'll receive email notifications about the progress.
                    
        ### Contact:
        For any questions or feedback, please contact info@newmind.ai
        
        ### Links:   
        - **GitHub**: [mteb/mteb v1.38.51](https://github.com/embeddings-benchmark/mteb/tree/1.38.51) - Mizan is currently based on MTEB v1.38.51 (MTEB v2.0.0 support coming soon)
        """)

    return demo


def main():
    """Main entry point"""
    print("🚀 Starting Mizan Leaderboard...")
    
    demo = create_leaderboard_demo()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )


if __name__ == "__main__":
    main()