#!/usr/bin/env python3 """ Mizan Leaderboard - Enhanced Version with Submit Functionality Includes leaderboard display, model submission, and evaluation tracking """ import gradio as gr from ui_components import ( create_leaderboard_tab, create_dataset_tab, create_submit_evaluation_tab ) from data_processor import load_leaderboard_from_csv from evaluation_service import submit_evaluation # Global data storage current_data = None def create_leaderboard_demo(): """Create enhanced leaderboard demo interface with submit functionality""" global current_data # Setup directories # Load data from CSV file current_data = load_leaderboard_from_csv() with gr.Blocks( title="Mizan", theme=gr.themes.Soft() ) as demo: gr.Markdown(""" # Mizan Leaderboard Performance comparison for Turkish embedding models """) with gr.Tabs(): # Tab 1: Leaderboard with gr.Tab("📊 Leaderboard"): leaderboard_table = create_leaderboard_tab(current_data) # Tab 2: Submit with gr.Tab("🚀 Submit"): (model_input, email_input, submit_btn, login_button, result_output) = create_submit_evaluation_tab() # Submit evaluation functionality with authentication def handle_submit_evaluation(model_name, email, profile, progress=gr.Progress()): import logging # Authentication check if profile is None: logging.warning("Unauthorized submission attempt with no profile") return "
Authentication required. Please log in with your Hugging Face account.
" # IMPORTANT: In local development, Gradio returns "Sign in with Hugging Face" string # This is NOT a real authentication, just a placeholder for local testing if isinstance(profile, str) and profile == "Sign in with Hugging Face": # Block submission in local dev with mock auth return "⚠️ HF authentication required.
" # Email is required if not email or email.strip() == "": return "Email address is required to receive benchmark results.
" global current_data batch_size = 32 # Always use default batch size result_msg, updated_data = submit_evaluation(model_name, email, batch_size, current_data, progress) # Note: For now, we don't update the leaderboard since evaluation is async # The leaderboard will be updated manually when results are available logging.info(f"Submission processed for model: {model_name} by user: {profile}") return result_msg submit_btn.click( fn=handle_submit_evaluation, inputs=[model_input, email_input, login_button], outputs=[result_output] ) # Tab 3: Dataset Information with gr.Tab("📊 Dataset Information"): dataset_table = create_dataset_tab() gr.Markdown(""" --- ### 📊 Metrics Explanation: - **Mean (Task)**: Average performance across all individual tasks - **Mean (TaskType)**: Average performance by task categories - **Classification**: Performance on Turkish classification tasks - **Clustering**: Performance on Turkish clustering tasks - **Pair Classification**: Performance on pair classification tasks (like NLI) - **Retrieval**: Performance on information retrieval tasks - **STS**: Performance on Semantic Textual Similarity tasks - **Correlation**: Weighted average of correlation metrics for NLI and STSB datasets - **Parameters**: Number of model parameters - **Embed Dim**: Embedding dimension size - **Max Seq Length**: Maximum sequence length the model can process (0 = infinite/unlimited) - **Vocab Size**: Size of the model's vocabulary ### 📖 About Mizan: This leaderboard presents results from the **Mizan** benchmark, which evaluates embedding models on Turkish language tasks across multiple domains including: - Text classification and sentiment analysis - Information retrieval and search - Semantic textual similarity - Text clustering and pair classification ### 🚀 Submit Your Model: Use the **Submit** tab to submit your Turkish embedding model for evaluation. Your request will be reviewed by administrators and you'll receive email notifications about the progress. ### Contact: For any questions or feedback, please contact info@newmind.ai ### Links: - **GitHub**: [mteb/mteb v1.38.51](https://github.com/embeddings-benchmark/mteb/tree/1.38.51) - Mizan is currently based on MTEB v1.38.51 (MTEB v2.0.0 support coming soon) """) return demo def main(): """Main entry point""" print("🚀 Starting Mizan Leaderboard...") demo = create_leaderboard_demo() demo.launch( server_name="0.0.0.0", server_port=7860, share=False ) if __name__ == "__main__": main()