import os import json import numpy as np import pandas as pd import gradio as gr from huggingface_hub import HfApi, hf_hub_download OWNER = "inceptionai" DATASET_REPO_ID = f"{OWNER}/requests-dataset" HEADER = """
 """
CITATION_BUTTON_TEXT = """
@misc{Arabic-Leaderboards,
  author = {El Filali, Ali and Albarri, Sarah and Abouelseoud, Arwa and Kamboj, Samta and Sengupta, Neha and Nakov, Preslav},
  title = {Arabic-Leaderboards: Comprehensive Evaluation of Arabic Large Language Models},
  year = {2025},
  publisher = {Inception},
  howpublished = "url{https://huggingface.co/spaces/inceptionai/Arabic-Leaderboards}"
}
"""
CITATION_BUTTON_LABEL = """
Copy the following snippet to cite the results from all Arabic Leaderboards in this Space.
"""
def load_results():
    """
    Loads the AraGen v2 results from aragen_v2_results.json and returns two dataframes:
    1) df_3c3h with columns for 3C3H scores
    2) df_tasks with columns for tasks scores
    """
    current_dir = os.path.dirname(os.path.abspath(__file__))
    results_file = os.path.join(current_dir, "assets", "results", "aragen_v2_results.json")
    
    with open(results_file, 'r') as f:
        data = json.load(f)
    
    # Filter out any entries that only contain '_last_sync_timestamp'
    filtered_data = []
    for entry in data:
        if len(entry.keys()) == 1 and "_last_sync_timestamp" in entry:
            continue
        filtered_data.append(entry)
    
    data = filtered_data
    
    data_3c3h = []
    data_tasks = []
    
    for model_data in data:
        meta = model_data.get('Meta', {})
        model_name = meta.get('Model Name', 'UNK')
        revision = meta.get('Revision', 'UNK')
        precision = meta.get('Precision', 'UNK')
        params = meta.get('Params', 'UNK')
        
        try:
            model_size_numeric = float(params)
        except (ValueError, TypeError):
            model_size_numeric = np.inf
        
        scores_data = model_data.get('claude-3.5-sonnet Scores', {})
        scores_3c3h = scores_data.get('3C3H Scores', {})
        scores_tasks = scores_data.get('Tasks Scores', {})
        
        formatted_scores_3c3h = {k: v*100 for k, v in scores_3c3h.items()}
        formatted_scores_tasks = {k: v*100 for k, v in scores_tasks.items()}
        
        data_entry_3c3h = {
            'Model Name': model_name,
            'Revision': revision,
            'License': meta.get('License', 'UNK'),
            'Precision': precision,
            'Model Size': model_size_numeric,
            '3C3H Score': formatted_scores_3c3h.get("3C3H Score", np.nan),
            'Correctness': formatted_scores_3c3h.get("Correctness", np.nan),
            'Completeness': formatted_scores_3c3h.get("Completeness", np.nan),
            'Conciseness': formatted_scores_3c3h.get("Conciseness", np.nan),
            'Helpfulness': formatted_scores_3c3h.get("Helpfulness", np.nan),
            'Honesty': formatted_scores_3c3h.get("Honesty", np.nan),
            'Harmlessness': formatted_scores_3c3h.get("Harmlessness", np.nan),
        }
        data_3c3h.append(data_entry_3c3h)
        
        data_entry_tasks = {
            'Model Name': model_name,
            'Revision': revision,
            'License': meta.get('License', 'UNK'),
            'Precision': precision,
            'Model Size': model_size_numeric,
            **formatted_scores_tasks
        }
        data_tasks.append(data_entry_tasks)
    
    df_3c3h = pd.DataFrame(data_3c3h)
    df_tasks = pd.DataFrame(data_tasks)
    
    score_columns_3c3h = ['3C3H Score', 'Correctness', 'Completeness', 'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness']
    df_3c3h[score_columns_3c3h] = df_3c3h[score_columns_3c3h].round(4)
    
    max_model_size_value = 1000
    df_3c3h['Model Size Filter'] = df_3c3h['Model Size'].replace(np.inf, max_model_size_value)
    
    if '3C3H Score' in df_3c3h.columns:
        df_3c3h = df_3c3h.sort_values(by='3C3H Score', ascending=False)
        df_3c3h.insert(0, 'Rank', range(1, len(df_3c3h) + 1))
    else:
        df_3c3h.insert(0, 'Rank', range(1, len(df_3c3h) + 1))
    
    task_columns = [col for col in df_tasks.columns if col not in ['Model Name', 'Revision', 'License', 'Precision', 'Model Size', 'Model Size Filter']]
    if task_columns:
        df_tasks[task_columns] = df_tasks[task_columns].round(4)
    
    df_tasks['Model Size Filter'] = df_tasks['Model Size'].replace(np.inf, max_model_size_value)
    
    if task_columns:
        first_task = task_columns[0]
        df_tasks = df_tasks.sort_values(by=first_task, ascending=False)
        df_tasks.insert(0, 'Rank', range(1, len(df_tasks) + 1))
    else:
        df_tasks = df_tasks.sort_values(by='Model Name', ascending=True)
        df_tasks.insert(0, 'Rank', range(1, len(df_tasks) + 1))
    
    return df_3c3h, df_tasks, task_columns
def load_if_data():
    """
    Loads the instruction-following data from ifeval_results.jsonl 
    and returns a dataframe with relevant columns, 
    converting decimal values to percentage format.
    """
    current_dir = os.path.dirname(os.path.abspath(__file__))
    results_file = os.path.join(current_dir, "assets", "results", "ifeval_results.jsonl")
    
    data = []
    with open(results_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            data.append(json.loads(line))
    
    df = pd.DataFrame(data)
    
    # Convert numeric columns
    numeric_cols = ["En Prompt-lvl", "En Instruction-lvl", "Ar Prompt-lvl", "Ar Instruction-lvl"]
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce")
    # Compute average accuracy for En and Ar
    df["Average Accuracy (En)"] = (df["En Prompt-lvl"] + df["En Instruction-lvl"]) / 2
    df["Average Accuracy (Ar)"] = (df["Ar Prompt-lvl"] + df["Ar Instruction-lvl"]) / 2
    
    # Convert them to percentage format (e.g., 0.871 -> 87.1)
    for col in numeric_cols:
        df[col] = (df[col] * 100).round(1)
    df["Average Accuracy (En)"] = (df["Average Accuracy (En)"] * 100).round(1)
    df["Average Accuracy (Ar)"] = (df["Average Accuracy (Ar)"] * 100).round(1)
    
    # Handle size as numeric
    def parse_size(x):
        try:
            return float(x)
        except:
            return np.inf
    
    df["Model Size"] = df["Size (B)"].apply(parse_size)
    
    # Add a filter column for size
    max_model_size_value = 1000
    df["Model Size Filter"] = df["Model Size"].replace(np.inf, max_model_size_value)
    
    # Sort by "Average Accuracy (Ar)" as an example
    df = df.sort_values(by="Average Accuracy (Ar)", ascending=False)
    df = df.reset_index(drop=True)
    df.insert(0, "Rank", range(1, len(df) + 1))
    
    return df
def submit_model(model_name, revision, precision, params, license, modality):
    df_3c3h, df_tasks, _ = load_results()
    existing_models_results = df_3c3h[['Model Name', 'Revision', 'Precision']]
    if precision == 'Missing':
        precision = None
    else:
        precision = precision.strip().lower()
    df_pending = load_requests('pending')
    df_finished = load_requests('finished')
    model_exists_in_results = (
        (existing_models_results['Model Name'] == model_name) &
        (existing_models_results['Revision'] == revision) &
        (existing_models_results['Precision'] == precision)
    ).any()
    if model_exists_in_results:
        return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
    if not df_pending.empty:
        existing_models_pending = df_pending[['model_name', 'revision', 'precision']]
        model_exists_in_pending = (
            (existing_models_pending['model_name'] == model_name) &
            (existing_models_pending['revision'] == revision) &
            (existing_models_pending['precision'] == precision)
        ).any()
        if model_exists_in_pending:
            return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' is already in the pending evaluations.**"
    if not df_finished.empty:
        existing_models_finished = df_finished[['model_name', 'revision', 'precision']]
        model_exists_in_finished = (
            (existing_models_finished['model_name'] == model_name) &
            (existing_models_finished['revision'] == revision) &
            (existing_models_finished['precision'] == precision)
        ).any()
        if model_exists_in_finished:
            return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
    api = HfApi()
    try:
        _ = api.model_info(model_name)
    except Exception:
        return f"**Error: Could not find model '{model_name}' on HuggingFace Hub. Please ensure the model name is correct and the model is public.**"
    status = "PENDING"
    submission = {
        "model_name": model_name,
        "license": license,
        "revision": revision,
        "precision": precision,
        "params": params,
        "status": status,
        "modality": modality
    }
    submission_json = json.dumps(submission, indent=2)
    org_model = model_name.split('/')
    if len(org_model) != 2:
        return "**Please enter the full model name including the organization or username, e.g., 'inceptionai/jais-family-30b-8k'**"
    org, model_id = org_model
    precision_str = precision if precision else 'Missing'
    file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}.json"
    try:
        hf_api_token = os.environ.get('HF_API_TOKEN', None)
        api.upload_file(
            path_or_fileobj=submission_json.encode('utf-8'),
            path_in_repo=file_path_in_repo,
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
            token=hf_api_token
        )
    except Exception as e:
        return f"**Error: Could not submit the model. {str(e)}**"
    return f"**Model '{model_name}' has been submitted for evaluation.**"
def load_requests(status_folder):
    api = HfApi()
    requests_data = []
    folder_path_in_repo = status_folder
    hf_api_token = os.environ.get('HF_API_TOKEN', None)
    try:
        files_info = api.list_repo_files(
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
            token=hf_api_token
        )
    except Exception as e:
        print(f"Error accessing dataset repository: {e}")
        return pd.DataFrame()
    files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]
    for file_path in files_in_folder:
        try:
            local_file_path = hf_hub_download(
                repo_id=DATASET_REPO_ID,
                filename=file_path,
                repo_type="dataset",
                token=hf_api_token
            )
            with open(local_file_path, 'r') as f:
                request = json.load(f)
            requests_data.append(request)
        except Exception as e:
            print(f"Error loading file {file_path}: {e}")
            continue
    df = pd.DataFrame(requests_data)
    return df
def filter_df_3c3h(search_query, selected_cols, precision_filters, license_filters, min_size, max_size):
    df_ = load_results()[0].copy()
    if min_size > max_size:
        min_size, max_size = max_size, min_size
    if search_query:
        df_ = df_[df_['Model Name'].str.contains(search_query, case=False, na=False)]
    if precision_filters:
        include_missing = 'Missing' in precision_filters
        selected_precisions = [p for p in precision_filters if p != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['Precision'].isin(selected_precisions)) |
                (df_['Precision'] == 'UNK') | 
                (df_['Precision'].isna())
            ]
        else:
            df_ = df_[df_['Precision'].isin(selected_precisions)]
    if license_filters:
        include_missing = 'Missing' in license_filters
        selected_licenses = [l for l in license_filters if l != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['License'].isin(selected_licenses)) |
                (df_['License'] == 'UNK') |
                (df_['License'].isna())
            ]
        else:
            df_ = df_[df_['License'].isin(selected_licenses)]
    df_ = df_[(df_['Model Size Filter'] >= min_size) & (df_['Model Size Filter'] <= max_size)]
    if 'Rank' in df_.columns:
        df_ = df_.drop(columns=['Rank'])
    df_ = df_.reset_index(drop=True)
    df_.insert(0, 'Rank', range(1, len(df_)+1))
    fixed_column_order = [
        "Rank",
        "Model Name",
        "3C3H Score",
        "Correctness",
        "Completeness",
        "Conciseness",
        "Helpfulness",
        "Honesty",
        "Harmlessness",
        "Revision",
        "License",
        "Precision",
        "Model Size"
    ]
    selected_cols = [col for col in fixed_column_order if col in selected_cols and col in df_.columns]
    return df_[selected_cols]
def filter_df_tasks(search_query, selected_cols, precision_filters, license_filters, min_size, max_size, task_columns):
    df_ = load_results()[1].copy()
    if min_size > max_size:
        min_size, max_size = max_size, min_size
    if search_query:
        df_ = df_[df_['Model Name'].str.contains(search_query, case=False, na=False)]
    if precision_filters:
        include_missing = 'Missing' in precision_filters
        selected_precisions = [p for p in precision_filters if p != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['Precision'].isin(selected_precisions)) |
                (df_['Precision'] == 'UNK') |
                (df_['Precision'].isna())
            ]
        else:
            df_ = df_[df_['Precision'].isin(selected_precisions)]
    if license_filters:
        include_missing = 'Missing' in license_filters
        selected_licenses = [l for l in license_filters if l != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['License'].isin(selected_licenses)) |
                (df_['License'] == 'UNK') |
                (df_['License'].isna())
            ]
        else:
            df_ = df_[df_['License'].isin(selected_licenses)]
    df_ = df_[(df_['Model Size Filter'] >= min_size) & (df_['Model Size Filter'] <= max_size)]
    if 'Rank' in df_.columns:
        df_ = df_.drop(columns=['Rank'])
    if task_columns:
        first_task = task_columns[0]
        df_ = df_.sort_values(by=first_task, ascending=False)
    else:
        df_ = df_.sort_values(by='Model Name', ascending=True)
    df_ = df_.reset_index(drop=True)
    df_.insert(0, 'Rank', range(1, len(df_)+1))
    fixed_column_order = [
        "Rank",
        "Model Name",
        "Question Answering (QA)",
        "Orthographic and Grammatical Analysis",
        "Safety",
        "Reasoning",
        "Revision",
        "License",
        "Precision",
        "Model Size"
    ]
    selected_cols = [col for col in fixed_column_order if col in selected_cols and col in df_.columns]
    return df_[selected_cols]
def filter_if_df(search_query, selected_cols, family_filters, min_size, max_size):
    """
    Filters the instruction-following dataframe based on various criteria.
    We have removed 'Filter by Type' and 'Filter by Creator'.
    """
    df_ = load_if_data().copy()
    if min_size > max_size:
        min_size, max_size = max_size, min_size
    
    # Search by model name
    if search_query:
        df_ = df_[df_['Model Name'].str.contains(search_query, case=False, na=False)]
    
    # Filter by Family only (Creator and Type filters removed)
    if family_filters:
        df_ = df_[df_['Family'].isin(family_filters)]
    
    # Filter by Model Size
    df_ = df_[(df_['Model Size Filter'] >= min_size) & (df_['Model Size Filter'] <= max_size)]
    
    # Re-rank
    if 'Rank' in df_.columns:
        df_ = df_.drop(columns=['Rank'])
    df_ = df_.reset_index(drop=True)
    df_.insert(0, 'Rank', range(1, len(df_)+1))
    
    fixed_column_order = [
        "Rank",
        "Model Name",
        "Creator",
        "Family",
        "Type",
        "Average Accuracy (Ar)",
        "Ar Prompt-lvl",
        "Ar Instruction-lvl",
        "Average Accuracy (En)",
        "En Prompt-lvl",
        "En Instruction-lvl",
        "Size (B)",
        "Base Model",
        "Context Window",
        "Lang."
    ]
    
    selected_cols = [col for col in fixed_column_order if col in selected_cols and col in df_.columns]
    return df_[selected_cols]
def main():
    df_3c3h, df_tasks, task_columns = load_results()
    df_if = load_if_data()  # Instruction Following DF
    # Setup precision/license options for the 3C3H scoreboard
    precision_options_3c3h = sorted(df_3c3h['Precision'].dropna().unique().tolist())
    precision_options_3c3h = [p for p in precision_options_3c3h if p != 'UNK']
    precision_options_3c3h.append('Missing')
    license_options_3c3h = sorted(df_3c3h['License'].dropna().unique().tolist())
    license_options_3c3h = [l for l in license_options_3c3h if l != 'UNK']
    license_options_3c3h.append('Missing')
    # Setup precision/license options for tasks scoreboard
    precision_options_tasks = sorted(df_tasks['Precision'].dropna().unique().tolist())
    precision_options_tasks = [p for p in precision_options_tasks if p != 'UNK']
    precision_options_tasks.append('Missing')
    license_options_tasks = sorted(df_tasks['License'].dropna().unique().tolist())
    license_options_tasks = [l for l in license_options_tasks if l != 'UNK']
    license_options_tasks.append('Missing')
    # Model size range for 3C3H scoreboard
    min_model_size_3c3h = int(df_3c3h['Model Size Filter'].min())
    max_model_size_3c3h = int(df_3c3h['Model Size Filter'].max())
    # Model size range for tasks scoreboard
    min_model_size_tasks = int(df_tasks['Model Size Filter'].min())
    max_model_size_tasks = int(df_tasks['Model Size Filter'].max())
    # Column choices for 3C3H
    column_choices_3c3h = [col for col in df_3c3h.columns.tolist() if col != 'Model Size Filter']
    # Column choices for tasks
    column_choices_tasks = [col for col in df_tasks.columns.tolist() if col != 'Model Size Filter']
    # Now for instruction-following
    family_options_if = sorted(df_if['Family'].dropna().unique().tolist())
    min_model_size_if = int(df_if['Model Size Filter'].min())
    max_model_size_if = int(df_if['Model Size Filter'].max())
    #
    # IMPORTANT: Reorder the columns for the Instruction-Following leaderboard
    # Define the full order and the default visible columns separately.
    #
    all_if_columns = [
        "Rank",
        "Model Name",
        "Average Accuracy (Ar)",
        "Ar Prompt-lvl",
        "Ar Instruction-lvl",
        "Average Accuracy (En)",
        "En Prompt-lvl",
        "En Instruction-lvl",
        "Type",
        "Creator",
        "Family",
        "Size (B)",
        "Base Model",
        "Context Window",
        "Lang."
    ]
    default_if_columns = [
        "Rank",
        "Model Name",
        "Average Accuracy (Ar)",
        "Ar Prompt-lvl",
        "Ar Instruction-lvl",
        "Average Accuracy (En)"
    ]
    
    with gr.Blocks() as demo:
        gr.HTML(HEADER)
        with gr.Tabs():
            #
            # AL Leaderboards Tab
            #
            with gr.Tab("AL Leaderboards 🏅"):
                # -------------------------
                # Sub-Tab: AraGen Leaderboards
                # -------------------------
                with gr.Tab("🐪 AraGen Leaderboards"):
                    with gr.Tabs():
                        # 3C3H Scores
                        with gr.Tab("3C3H Scores"):
                            with gr.Accordion("⚙️ Filters", open=False):
                                with gr.Row():
                                    search_box_3c3h = gr.Textbox(
                                        placeholder="Search for models...", 
                                        label="Search", 
                                        interactive=True
                                    )
                                with gr.Row():
                                    column_selector_3c3h = gr.CheckboxGroup(
                                        choices=column_choices_3c3h,
                                        value=[
                                            'Rank', 'Model Name', '3C3H Score', 'Correctness', 'Completeness',
                                            'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness'
                                        ],
                                        label="Select columns to display"
                                    )
                                with gr.Row():
                                    license_filter_3c3h = gr.CheckboxGroup(
                                        choices=license_options_3c3h,
                                        value=license_options_3c3h.copy(),
                                        label="Filter by License"
                                    )
                                    precision_filter_3c3h = gr.CheckboxGroup(
                                        choices=precision_options_3c3h,
                                        value=precision_options_3c3h.copy(),
                                        label="Filter by Precision"
                                    )
                                with gr.Row():
                                    model_size_min_filter_3c3h = gr.Slider(
                                        minimum=min_model_size_3c3h,
                                        maximum=max_model_size_3c3h,
                                        value=min_model_size_3c3h,
                                        step=1,
                                        label="Minimum Model Size",
                                        interactive=True
                                    )
                                    model_size_max_filter_3c3h = gr.Slider(
                                        minimum=min_model_size_3c3h,
                                        maximum=max_model_size_3c3h,
                                        value=max_model_size_3c3h,
                                        step=1,
                                        label="Maximum Model Size",
                                        interactive=True
                                    )
                            leaderboard_3c3h = gr.Dataframe(
                                df_3c3h[[
                                    'Rank', 'Model Name', '3C3H Score', 'Correctness', 'Completeness',
                                    'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness'
                                ]],
                                interactive=False
                            )
                            filter_inputs_3c3h = [
                                search_box_3c3h, column_selector_3c3h,
                                precision_filter_3c3h, license_filter_3c3h,
                                model_size_min_filter_3c3h, model_size_max_filter_3c3h
                            ]
                            search_box_3c3h.submit(filter_df_3c3h, inputs=filter_inputs_3c3h, outputs=leaderboard_3c3h)
                            for component in filter_inputs_3c3h:
                                component.change(filter_df_3c3h, inputs=filter_inputs_3c3h, outputs=leaderboard_3c3h)
                        # Tasks Scores
                        with gr.Tab("Tasks Scores"):
                            gr.Markdown("This Table is sorted based on the First Task (Question Answering)")
                            with gr.Accordion("⚙️ Filters", open=False):
                                with gr.Row():
                                    search_box_tasks = gr.Textbox(
                                        placeholder="Search for models...", 
                                        label="Search", 
                                        interactive=True
                                    )
                                with gr.Row():
                                    column_selector_tasks = gr.CheckboxGroup(
                                        choices=column_choices_tasks,
                                        value=['Rank', 'Model Name'] + task_columns,
                                        label="Select columns to display"
                                    )
                                with gr.Row():
                                    license_filter_tasks = gr.CheckboxGroup(
                                        choices=license_options_tasks,
                                        value=license_options_tasks.copy(),
                                        label="Filter by License"
                                    )
                                    precision_filter_tasks = gr.CheckboxGroup(
                                        choices=precision_options_tasks,
                                        value=precision_options_tasks.copy(),
                                        label="Filter by Precision"
                                    )
                                with gr.Row():
                                    model_size_min_filter_tasks = gr.Slider(
                                        minimum=min_model_size_tasks,
                                        maximum=max_model_size_tasks,
                                        value=min_model_size_tasks,
                                        step=1,
                                        label="Minimum Model Size",
                                        interactive=True
                                    )
                                    model_size_max_filter_tasks = gr.Slider(
                                        minimum=min_model_size_tasks,
                                        maximum=max_model_size_tasks,
                                        value=max_model_size_tasks,
                                        step=1,
                                        label="Maximum Model Size",
                                        interactive=True
                                    )
                            leaderboard_tasks = gr.Dataframe(
                                df_tasks[['Rank', 'Model Name'] + task_columns],
                                interactive=False
                            )
                            filter_inputs_tasks = [
                                search_box_tasks, column_selector_tasks,
                                precision_filter_tasks, license_filter_tasks,
                                model_size_min_filter_tasks, model_size_max_filter_tasks
                            ]
                            search_box_tasks.submit(
                                lambda sq, cols, pf, lf, min_val, max_val: filter_df_tasks(sq, cols, pf, lf, min_val, max_val, task_columns),
                                inputs=filter_inputs_tasks,
                                outputs=leaderboard_tasks
                            )
                            for component in filter_inputs_tasks:
                                component.change(
                                    lambda sq, cols, pf, lf, min_val, max_val: filter_df_tasks(sq, cols, pf, lf, min_val, max_val, task_columns),
                                    inputs=filter_inputs_tasks,
                                    outputs=leaderboard_tasks
                                )
                # -------------------------
                # Sub-Tab: Instruction Following Leaderboard
                # -------------------------
                with gr.Tab("🗡️ Instruction Following Leaderboard"):
                    with gr.Accordion("⚙️ Filters", open=False):
                        with gr.Row():
                            search_box_if = gr.Textbox(
                                placeholder="Search for models...", 
                                label="Search", 
                                interactive=True
                            )
                        with gr.Row():
                            column_selector_if = gr.CheckboxGroup(
                                choices=all_if_columns,
                                value=default_if_columns,
                                label="Select columns to display"
                            )
                        with gr.Row():
                            family_filter_if = gr.CheckboxGroup(
                                choices=family_options_if,
                                value=family_options_if.copy(),
                                label="Filter by Family"
                            )
                        with gr.Row():
                            model_size_min_filter_if = gr.Slider(
                                minimum=min_model_size_if,
                                maximum=max_model_size_if,
                                value=min_model_size_if,
                                step=1,
                                label="Minimum Model Size",
                                interactive=True
                            )
                            model_size_max_filter_if = gr.Slider(
                                minimum=min_model_size_if,
                                maximum=max_model_size_if,
                                value=max_model_size_if,
                                step=1,
                                label="Maximum Model Size",
                                interactive=True
                            )
                    leaderboard_if = gr.Dataframe(
                        df_if[default_if_columns],
                        interactive=False
                    )
                    filter_inputs_if = [
                        search_box_if, column_selector_if,
                        family_filter_if, 
                        model_size_min_filter_if, model_size_max_filter_if
                    ]
                    search_box_if.submit(filter_if_df, inputs=filter_inputs_if, outputs=leaderboard_if)
                    for component in filter_inputs_if:
                        component.change(filter_if_df, inputs=filter_inputs_if, outputs=leaderboard_if)
            #
            # Submit Tab
            #
            with gr.Tab("Submit Here 📝"):
                df_pending = load_requests('pending')
                df_finished = load_requests('finished')
                df_failed = load_requests('failed')
                
                gr.Markdown(ABOUT_SECTION)
                
                gr.Markdown("## Submit Your Model for Evaluation")
                with gr.Column():
                    model_name_input = gr.Textbox(
                        label="Model Name",
                        placeholder="Enter the full model name from HuggingFace Hub (e.g., inceptionai/jais-family-30b-8k)"
                    )
                    revision_input = gr.Textbox(label="Revision", placeholder="main", value="main")
                    precision_input = gr.Dropdown(
                        choices=["float16", "float32", "bfloat16", "8bit", "4bit"],
                        label="Precision",
                        value="float16"
                    )
                    params_input = gr.Textbox(
                        label="Params",
                        placeholder="Enter the approximate number of parameters as Integer (e.g., 7, 13, 30, 70 ...)"
                    )
                    license_input = gr.Textbox(
                        label="License",
                        placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)",
                        value="Open"
                    )
                    modality_input = gr.Radio(
                        choices=["Text"],
                        label="Modality",
                        value="Text"
                    )
                    submit_button = gr.Button("Submit Model")
                    submission_result = gr.Markdown()
                    submit_button.click(
                        submit_model,
                        inputs=[
                            model_name_input, revision_input, precision_input,
                            params_input, license_input, modality_input
                        ],
                        outputs=submission_result
                    )
                gr.Markdown("## Evaluation Status")
                with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
                    if not df_pending.empty:
                        gr.Dataframe(df_pending)
                    else:
                        gr.Markdown("No pending evaluations.")
                with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
                    if not df_finished.empty:
                        gr.Dataframe(df_finished)
                    else:
                        gr.Markdown("No finished evaluations.")
                with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False):
                    if not df_failed.empty:
                        gr.Dataframe(df_failed)
                    else:
                        gr.Markdown("No failed evaluations.")
            # Citation Section
            with gr.Row():
                with gr.Accordion("📙 Citation", open=False):
                    citation_button = gr.Textbox(
                        value=CITATION_BUTTON_TEXT,
                        label=CITATION_BUTTON_LABEL,
                        lines=8,
                        elem_id="citation-button",
                        show_copy_button=True
                    )
        
        gr.HTML(BOTTOM_LOGO)
        
        demo.launch()
if __name__ == "__main__":
    main()
"""
CITATION_BUTTON_TEXT = """
@misc{Arabic-Leaderboards,
  author = {El Filali, Ali and Albarri, Sarah and Abouelseoud, Arwa and Kamboj, Samta and Sengupta, Neha and Nakov, Preslav},
  title = {Arabic-Leaderboards: Comprehensive Evaluation of Arabic Large Language Models},
  year = {2025},
  publisher = {Inception},
  howpublished = "url{https://huggingface.co/spaces/inceptionai/Arabic-Leaderboards}"
}
"""
CITATION_BUTTON_LABEL = """
Copy the following snippet to cite the results from all Arabic Leaderboards in this Space.
"""
def load_results():
    """
    Loads the AraGen v2 results from aragen_v2_results.json and returns two dataframes:
    1) df_3c3h with columns for 3C3H scores
    2) df_tasks with columns for tasks scores
    """
    current_dir = os.path.dirname(os.path.abspath(__file__))
    results_file = os.path.join(current_dir, "assets", "results", "aragen_v2_results.json")
    
    with open(results_file, 'r') as f:
        data = json.load(f)
    
    # Filter out any entries that only contain '_last_sync_timestamp'
    filtered_data = []
    for entry in data:
        if len(entry.keys()) == 1 and "_last_sync_timestamp" in entry:
            continue
        filtered_data.append(entry)
    
    data = filtered_data
    
    data_3c3h = []
    data_tasks = []
    
    for model_data in data:
        meta = model_data.get('Meta', {})
        model_name = meta.get('Model Name', 'UNK')
        revision = meta.get('Revision', 'UNK')
        precision = meta.get('Precision', 'UNK')
        params = meta.get('Params', 'UNK')
        
        try:
            model_size_numeric = float(params)
        except (ValueError, TypeError):
            model_size_numeric = np.inf
        
        scores_data = model_data.get('claude-3.5-sonnet Scores', {})
        scores_3c3h = scores_data.get('3C3H Scores', {})
        scores_tasks = scores_data.get('Tasks Scores', {})
        
        formatted_scores_3c3h = {k: v*100 for k, v in scores_3c3h.items()}
        formatted_scores_tasks = {k: v*100 for k, v in scores_tasks.items()}
        
        data_entry_3c3h = {
            'Model Name': model_name,
            'Revision': revision,
            'License': meta.get('License', 'UNK'),
            'Precision': precision,
            'Model Size': model_size_numeric,
            '3C3H Score': formatted_scores_3c3h.get("3C3H Score", np.nan),
            'Correctness': formatted_scores_3c3h.get("Correctness", np.nan),
            'Completeness': formatted_scores_3c3h.get("Completeness", np.nan),
            'Conciseness': formatted_scores_3c3h.get("Conciseness", np.nan),
            'Helpfulness': formatted_scores_3c3h.get("Helpfulness", np.nan),
            'Honesty': formatted_scores_3c3h.get("Honesty", np.nan),
            'Harmlessness': formatted_scores_3c3h.get("Harmlessness", np.nan),
        }
        data_3c3h.append(data_entry_3c3h)
        
        data_entry_tasks = {
            'Model Name': model_name,
            'Revision': revision,
            'License': meta.get('License', 'UNK'),
            'Precision': precision,
            'Model Size': model_size_numeric,
            **formatted_scores_tasks
        }
        data_tasks.append(data_entry_tasks)
    
    df_3c3h = pd.DataFrame(data_3c3h)
    df_tasks = pd.DataFrame(data_tasks)
    
    score_columns_3c3h = ['3C3H Score', 'Correctness', 'Completeness', 'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness']
    df_3c3h[score_columns_3c3h] = df_3c3h[score_columns_3c3h].round(4)
    
    max_model_size_value = 1000
    df_3c3h['Model Size Filter'] = df_3c3h['Model Size'].replace(np.inf, max_model_size_value)
    
    if '3C3H Score' in df_3c3h.columns:
        df_3c3h = df_3c3h.sort_values(by='3C3H Score', ascending=False)
        df_3c3h.insert(0, 'Rank', range(1, len(df_3c3h) + 1))
    else:
        df_3c3h.insert(0, 'Rank', range(1, len(df_3c3h) + 1))
    
    task_columns = [col for col in df_tasks.columns if col not in ['Model Name', 'Revision', 'License', 'Precision', 'Model Size', 'Model Size Filter']]
    if task_columns:
        df_tasks[task_columns] = df_tasks[task_columns].round(4)
    
    df_tasks['Model Size Filter'] = df_tasks['Model Size'].replace(np.inf, max_model_size_value)
    
    if task_columns:
        first_task = task_columns[0]
        df_tasks = df_tasks.sort_values(by=first_task, ascending=False)
        df_tasks.insert(0, 'Rank', range(1, len(df_tasks) + 1))
    else:
        df_tasks = df_tasks.sort_values(by='Model Name', ascending=True)
        df_tasks.insert(0, 'Rank', range(1, len(df_tasks) + 1))
    
    return df_3c3h, df_tasks, task_columns
def load_if_data():
    """
    Loads the instruction-following data from ifeval_results.jsonl 
    and returns a dataframe with relevant columns, 
    converting decimal values to percentage format.
    """
    current_dir = os.path.dirname(os.path.abspath(__file__))
    results_file = os.path.join(current_dir, "assets", "results", "ifeval_results.jsonl")
    
    data = []
    with open(results_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            data.append(json.loads(line))
    
    df = pd.DataFrame(data)
    
    # Convert numeric columns
    numeric_cols = ["En Prompt-lvl", "En Instruction-lvl", "Ar Prompt-lvl", "Ar Instruction-lvl"]
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce")
    # Compute average accuracy for En and Ar
    df["Average Accuracy (En)"] = (df["En Prompt-lvl"] + df["En Instruction-lvl"]) / 2
    df["Average Accuracy (Ar)"] = (df["Ar Prompt-lvl"] + df["Ar Instruction-lvl"]) / 2
    
    # Convert them to percentage format (e.g., 0.871 -> 87.1)
    for col in numeric_cols:
        df[col] = (df[col] * 100).round(1)
    df["Average Accuracy (En)"] = (df["Average Accuracy (En)"] * 100).round(1)
    df["Average Accuracy (Ar)"] = (df["Average Accuracy (Ar)"] * 100).round(1)
    
    # Handle size as numeric
    def parse_size(x):
        try:
            return float(x)
        except:
            return np.inf
    
    df["Model Size"] = df["Size (B)"].apply(parse_size)
    
    # Add a filter column for size
    max_model_size_value = 1000
    df["Model Size Filter"] = df["Model Size"].replace(np.inf, max_model_size_value)
    
    # Sort by "Average Accuracy (Ar)" as an example
    df = df.sort_values(by="Average Accuracy (Ar)", ascending=False)
    df = df.reset_index(drop=True)
    df.insert(0, "Rank", range(1, len(df) + 1))
    
    return df
def submit_model(model_name, revision, precision, params, license, modality):
    df_3c3h, df_tasks, _ = load_results()
    existing_models_results = df_3c3h[['Model Name', 'Revision', 'Precision']]
    if precision == 'Missing':
        precision = None
    else:
        precision = precision.strip().lower()
    df_pending = load_requests('pending')
    df_finished = load_requests('finished')
    model_exists_in_results = (
        (existing_models_results['Model Name'] == model_name) &
        (existing_models_results['Revision'] == revision) &
        (existing_models_results['Precision'] == precision)
    ).any()
    if model_exists_in_results:
        return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
    if not df_pending.empty:
        existing_models_pending = df_pending[['model_name', 'revision', 'precision']]
        model_exists_in_pending = (
            (existing_models_pending['model_name'] == model_name) &
            (existing_models_pending['revision'] == revision) &
            (existing_models_pending['precision'] == precision)
        ).any()
        if model_exists_in_pending:
            return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' is already in the pending evaluations.**"
    if not df_finished.empty:
        existing_models_finished = df_finished[['model_name', 'revision', 'precision']]
        model_exists_in_finished = (
            (existing_models_finished['model_name'] == model_name) &
            (existing_models_finished['revision'] == revision) &
            (existing_models_finished['precision'] == precision)
        ).any()
        if model_exists_in_finished:
            return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
    api = HfApi()
    try:
        _ = api.model_info(model_name)
    except Exception:
        return f"**Error: Could not find model '{model_name}' on HuggingFace Hub. Please ensure the model name is correct and the model is public.**"
    status = "PENDING"
    submission = {
        "model_name": model_name,
        "license": license,
        "revision": revision,
        "precision": precision,
        "params": params,
        "status": status,
        "modality": modality
    }
    submission_json = json.dumps(submission, indent=2)
    org_model = model_name.split('/')
    if len(org_model) != 2:
        return "**Please enter the full model name including the organization or username, e.g., 'inceptionai/jais-family-30b-8k'**"
    org, model_id = org_model
    precision_str = precision if precision else 'Missing'
    file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}.json"
    try:
        hf_api_token = os.environ.get('HF_API_TOKEN', None)
        api.upload_file(
            path_or_fileobj=submission_json.encode('utf-8'),
            path_in_repo=file_path_in_repo,
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
            token=hf_api_token
        )
    except Exception as e:
        return f"**Error: Could not submit the model. {str(e)}**"
    return f"**Model '{model_name}' has been submitted for evaluation.**"
def load_requests(status_folder):
    api = HfApi()
    requests_data = []
    folder_path_in_repo = status_folder
    hf_api_token = os.environ.get('HF_API_TOKEN', None)
    try:
        files_info = api.list_repo_files(
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
            token=hf_api_token
        )
    except Exception as e:
        print(f"Error accessing dataset repository: {e}")
        return pd.DataFrame()
    files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]
    for file_path in files_in_folder:
        try:
            local_file_path = hf_hub_download(
                repo_id=DATASET_REPO_ID,
                filename=file_path,
                repo_type="dataset",
                token=hf_api_token
            )
            with open(local_file_path, 'r') as f:
                request = json.load(f)
            requests_data.append(request)
        except Exception as e:
            print(f"Error loading file {file_path}: {e}")
            continue
    df = pd.DataFrame(requests_data)
    return df
def filter_df_3c3h(search_query, selected_cols, precision_filters, license_filters, min_size, max_size):
    df_ = load_results()[0].copy()
    if min_size > max_size:
        min_size, max_size = max_size, min_size
    if search_query:
        df_ = df_[df_['Model Name'].str.contains(search_query, case=False, na=False)]
    if precision_filters:
        include_missing = 'Missing' in precision_filters
        selected_precisions = [p for p in precision_filters if p != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['Precision'].isin(selected_precisions)) |
                (df_['Precision'] == 'UNK') | 
                (df_['Precision'].isna())
            ]
        else:
            df_ = df_[df_['Precision'].isin(selected_precisions)]
    if license_filters:
        include_missing = 'Missing' in license_filters
        selected_licenses = [l for l in license_filters if l != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['License'].isin(selected_licenses)) |
                (df_['License'] == 'UNK') |
                (df_['License'].isna())
            ]
        else:
            df_ = df_[df_['License'].isin(selected_licenses)]
    df_ = df_[(df_['Model Size Filter'] >= min_size) & (df_['Model Size Filter'] <= max_size)]
    if 'Rank' in df_.columns:
        df_ = df_.drop(columns=['Rank'])
    df_ = df_.reset_index(drop=True)
    df_.insert(0, 'Rank', range(1, len(df_)+1))
    fixed_column_order = [
        "Rank",
        "Model Name",
        "3C3H Score",
        "Correctness",
        "Completeness",
        "Conciseness",
        "Helpfulness",
        "Honesty",
        "Harmlessness",
        "Revision",
        "License",
        "Precision",
        "Model Size"
    ]
    selected_cols = [col for col in fixed_column_order if col in selected_cols and col in df_.columns]
    return df_[selected_cols]
def filter_df_tasks(search_query, selected_cols, precision_filters, license_filters, min_size, max_size, task_columns):
    df_ = load_results()[1].copy()
    if min_size > max_size:
        min_size, max_size = max_size, min_size
    if search_query:
        df_ = df_[df_['Model Name'].str.contains(search_query, case=False, na=False)]
    if precision_filters:
        include_missing = 'Missing' in precision_filters
        selected_precisions = [p for p in precision_filters if p != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['Precision'].isin(selected_precisions)) |
                (df_['Precision'] == 'UNK') |
                (df_['Precision'].isna())
            ]
        else:
            df_ = df_[df_['Precision'].isin(selected_precisions)]
    if license_filters:
        include_missing = 'Missing' in license_filters
        selected_licenses = [l for l in license_filters if l != 'Missing']
        if include_missing:
            df_ = df_[
                (df_['License'].isin(selected_licenses)) |
                (df_['License'] == 'UNK') |
                (df_['License'].isna())
            ]
        else:
            df_ = df_[df_['License'].isin(selected_licenses)]
    df_ = df_[(df_['Model Size Filter'] >= min_size) & (df_['Model Size Filter'] <= max_size)]
    if 'Rank' in df_.columns:
        df_ = df_.drop(columns=['Rank'])
    if task_columns:
        first_task = task_columns[0]
        df_ = df_.sort_values(by=first_task, ascending=False)
    else:
        df_ = df_.sort_values(by='Model Name', ascending=True)
    df_ = df_.reset_index(drop=True)
    df_.insert(0, 'Rank', range(1, len(df_)+1))
    fixed_column_order = [
        "Rank",
        "Model Name",
        "Question Answering (QA)",
        "Orthographic and Grammatical Analysis",
        "Safety",
        "Reasoning",
        "Revision",
        "License",
        "Precision",
        "Model Size"
    ]
    selected_cols = [col for col in fixed_column_order if col in selected_cols and col in df_.columns]
    return df_[selected_cols]
def filter_if_df(search_query, selected_cols, family_filters, min_size, max_size):
    """
    Filters the instruction-following dataframe based on various criteria.
    We have removed 'Filter by Type' and 'Filter by Creator'.
    """
    df_ = load_if_data().copy()
    if min_size > max_size:
        min_size, max_size = max_size, min_size
    
    # Search by model name
    if search_query:
        df_ = df_[df_['Model Name'].str.contains(search_query, case=False, na=False)]
    
    # Filter by Family only (Creator and Type filters removed)
    if family_filters:
        df_ = df_[df_['Family'].isin(family_filters)]
    
    # Filter by Model Size
    df_ = df_[(df_['Model Size Filter'] >= min_size) & (df_['Model Size Filter'] <= max_size)]
    
    # Re-rank
    if 'Rank' in df_.columns:
        df_ = df_.drop(columns=['Rank'])
    df_ = df_.reset_index(drop=True)
    df_.insert(0, 'Rank', range(1, len(df_)+1))
    
    fixed_column_order = [
        "Rank",
        "Model Name",
        "Creator",
        "Family",
        "Type",
        "Average Accuracy (Ar)",
        "Ar Prompt-lvl",
        "Ar Instruction-lvl",
        "Average Accuracy (En)",
        "En Prompt-lvl",
        "En Instruction-lvl",
        "Size (B)",
        "Base Model",
        "Context Window",
        "Lang."
    ]
    
    selected_cols = [col for col in fixed_column_order if col in selected_cols and col in df_.columns]
    return df_[selected_cols]
def main():
    df_3c3h, df_tasks, task_columns = load_results()
    df_if = load_if_data()  # Instruction Following DF
    # Setup precision/license options for the 3C3H scoreboard
    precision_options_3c3h = sorted(df_3c3h['Precision'].dropna().unique().tolist())
    precision_options_3c3h = [p for p in precision_options_3c3h if p != 'UNK']
    precision_options_3c3h.append('Missing')
    license_options_3c3h = sorted(df_3c3h['License'].dropna().unique().tolist())
    license_options_3c3h = [l for l in license_options_3c3h if l != 'UNK']
    license_options_3c3h.append('Missing')
    # Setup precision/license options for tasks scoreboard
    precision_options_tasks = sorted(df_tasks['Precision'].dropna().unique().tolist())
    precision_options_tasks = [p for p in precision_options_tasks if p != 'UNK']
    precision_options_tasks.append('Missing')
    license_options_tasks = sorted(df_tasks['License'].dropna().unique().tolist())
    license_options_tasks = [l for l in license_options_tasks if l != 'UNK']
    license_options_tasks.append('Missing')
    # Model size range for 3C3H scoreboard
    min_model_size_3c3h = int(df_3c3h['Model Size Filter'].min())
    max_model_size_3c3h = int(df_3c3h['Model Size Filter'].max())
    # Model size range for tasks scoreboard
    min_model_size_tasks = int(df_tasks['Model Size Filter'].min())
    max_model_size_tasks = int(df_tasks['Model Size Filter'].max())
    # Column choices for 3C3H
    column_choices_3c3h = [col for col in df_3c3h.columns.tolist() if col != 'Model Size Filter']
    # Column choices for tasks
    column_choices_tasks = [col for col in df_tasks.columns.tolist() if col != 'Model Size Filter']
    # Now for instruction-following
    family_options_if = sorted(df_if['Family'].dropna().unique().tolist())
    min_model_size_if = int(df_if['Model Size Filter'].min())
    max_model_size_if = int(df_if['Model Size Filter'].max())
    #
    # IMPORTANT: Reorder the columns for the Instruction-Following leaderboard
    # Define the full order and the default visible columns separately.
    #
    all_if_columns = [
        "Rank",
        "Model Name",
        "Average Accuracy (Ar)",
        "Ar Prompt-lvl",
        "Ar Instruction-lvl",
        "Average Accuracy (En)",
        "En Prompt-lvl",
        "En Instruction-lvl",
        "Type",
        "Creator",
        "Family",
        "Size (B)",
        "Base Model",
        "Context Window",
        "Lang."
    ]
    default_if_columns = [
        "Rank",
        "Model Name",
        "Average Accuracy (Ar)",
        "Ar Prompt-lvl",
        "Ar Instruction-lvl",
        "Average Accuracy (En)"
    ]
    
    with gr.Blocks() as demo:
        gr.HTML(HEADER)
        with gr.Tabs():
            #
            # AL Leaderboards Tab
            #
            with gr.Tab("AL Leaderboards 🏅"):
                # -------------------------
                # Sub-Tab: AraGen Leaderboards
                # -------------------------
                with gr.Tab("🐪 AraGen Leaderboards"):
                    with gr.Tabs():
                        # 3C3H Scores
                        with gr.Tab("3C3H Scores"):
                            with gr.Accordion("⚙️ Filters", open=False):
                                with gr.Row():
                                    search_box_3c3h = gr.Textbox(
                                        placeholder="Search for models...", 
                                        label="Search", 
                                        interactive=True
                                    )
                                with gr.Row():
                                    column_selector_3c3h = gr.CheckboxGroup(
                                        choices=column_choices_3c3h,
                                        value=[
                                            'Rank', 'Model Name', '3C3H Score', 'Correctness', 'Completeness',
                                            'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness'
                                        ],
                                        label="Select columns to display"
                                    )
                                with gr.Row():
                                    license_filter_3c3h = gr.CheckboxGroup(
                                        choices=license_options_3c3h,
                                        value=license_options_3c3h.copy(),
                                        label="Filter by License"
                                    )
                                    precision_filter_3c3h = gr.CheckboxGroup(
                                        choices=precision_options_3c3h,
                                        value=precision_options_3c3h.copy(),
                                        label="Filter by Precision"
                                    )
                                with gr.Row():
                                    model_size_min_filter_3c3h = gr.Slider(
                                        minimum=min_model_size_3c3h,
                                        maximum=max_model_size_3c3h,
                                        value=min_model_size_3c3h,
                                        step=1,
                                        label="Minimum Model Size",
                                        interactive=True
                                    )
                                    model_size_max_filter_3c3h = gr.Slider(
                                        minimum=min_model_size_3c3h,
                                        maximum=max_model_size_3c3h,
                                        value=max_model_size_3c3h,
                                        step=1,
                                        label="Maximum Model Size",
                                        interactive=True
                                    )
                            leaderboard_3c3h = gr.Dataframe(
                                df_3c3h[[
                                    'Rank', 'Model Name', '3C3H Score', 'Correctness', 'Completeness',
                                    'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness'
                                ]],
                                interactive=False
                            )
                            filter_inputs_3c3h = [
                                search_box_3c3h, column_selector_3c3h,
                                precision_filter_3c3h, license_filter_3c3h,
                                model_size_min_filter_3c3h, model_size_max_filter_3c3h
                            ]
                            search_box_3c3h.submit(filter_df_3c3h, inputs=filter_inputs_3c3h, outputs=leaderboard_3c3h)
                            for component in filter_inputs_3c3h:
                                component.change(filter_df_3c3h, inputs=filter_inputs_3c3h, outputs=leaderboard_3c3h)
                        # Tasks Scores
                        with gr.Tab("Tasks Scores"):
                            gr.Markdown("This Table is sorted based on the First Task (Question Answering)")
                            with gr.Accordion("⚙️ Filters", open=False):
                                with gr.Row():
                                    search_box_tasks = gr.Textbox(
                                        placeholder="Search for models...", 
                                        label="Search", 
                                        interactive=True
                                    )
                                with gr.Row():
                                    column_selector_tasks = gr.CheckboxGroup(
                                        choices=column_choices_tasks,
                                        value=['Rank', 'Model Name'] + task_columns,
                                        label="Select columns to display"
                                    )
                                with gr.Row():
                                    license_filter_tasks = gr.CheckboxGroup(
                                        choices=license_options_tasks,
                                        value=license_options_tasks.copy(),
                                        label="Filter by License"
                                    )
                                    precision_filter_tasks = gr.CheckboxGroup(
                                        choices=precision_options_tasks,
                                        value=precision_options_tasks.copy(),
                                        label="Filter by Precision"
                                    )
                                with gr.Row():
                                    model_size_min_filter_tasks = gr.Slider(
                                        minimum=min_model_size_tasks,
                                        maximum=max_model_size_tasks,
                                        value=min_model_size_tasks,
                                        step=1,
                                        label="Minimum Model Size",
                                        interactive=True
                                    )
                                    model_size_max_filter_tasks = gr.Slider(
                                        minimum=min_model_size_tasks,
                                        maximum=max_model_size_tasks,
                                        value=max_model_size_tasks,
                                        step=1,
                                        label="Maximum Model Size",
                                        interactive=True
                                    )
                            leaderboard_tasks = gr.Dataframe(
                                df_tasks[['Rank', 'Model Name'] + task_columns],
                                interactive=False
                            )
                            filter_inputs_tasks = [
                                search_box_tasks, column_selector_tasks,
                                precision_filter_tasks, license_filter_tasks,
                                model_size_min_filter_tasks, model_size_max_filter_tasks
                            ]
                            search_box_tasks.submit(
                                lambda sq, cols, pf, lf, min_val, max_val: filter_df_tasks(sq, cols, pf, lf, min_val, max_val, task_columns),
                                inputs=filter_inputs_tasks,
                                outputs=leaderboard_tasks
                            )
                            for component in filter_inputs_tasks:
                                component.change(
                                    lambda sq, cols, pf, lf, min_val, max_val: filter_df_tasks(sq, cols, pf, lf, min_val, max_val, task_columns),
                                    inputs=filter_inputs_tasks,
                                    outputs=leaderboard_tasks
                                )
                # -------------------------
                # Sub-Tab: Instruction Following Leaderboard
                # -------------------------
                with gr.Tab("🗡️ Instruction Following Leaderboard"):
                    with gr.Accordion("⚙️ Filters", open=False):
                        with gr.Row():
                            search_box_if = gr.Textbox(
                                placeholder="Search for models...", 
                                label="Search", 
                                interactive=True
                            )
                        with gr.Row():
                            column_selector_if = gr.CheckboxGroup(
                                choices=all_if_columns,
                                value=default_if_columns,
                                label="Select columns to display"
                            )
                        with gr.Row():
                            family_filter_if = gr.CheckboxGroup(
                                choices=family_options_if,
                                value=family_options_if.copy(),
                                label="Filter by Family"
                            )
                        with gr.Row():
                            model_size_min_filter_if = gr.Slider(
                                minimum=min_model_size_if,
                                maximum=max_model_size_if,
                                value=min_model_size_if,
                                step=1,
                                label="Minimum Model Size",
                                interactive=True
                            )
                            model_size_max_filter_if = gr.Slider(
                                minimum=min_model_size_if,
                                maximum=max_model_size_if,
                                value=max_model_size_if,
                                step=1,
                                label="Maximum Model Size",
                                interactive=True
                            )
                    leaderboard_if = gr.Dataframe(
                        df_if[default_if_columns],
                        interactive=False
                    )
                    filter_inputs_if = [
                        search_box_if, column_selector_if,
                        family_filter_if, 
                        model_size_min_filter_if, model_size_max_filter_if
                    ]
                    search_box_if.submit(filter_if_df, inputs=filter_inputs_if, outputs=leaderboard_if)
                    for component in filter_inputs_if:
                        component.change(filter_if_df, inputs=filter_inputs_if, outputs=leaderboard_if)
            #
            # Submit Tab
            #
            with gr.Tab("Submit Here 📝"):
                df_pending = load_requests('pending')
                df_finished = load_requests('finished')
                df_failed = load_requests('failed')
                
                gr.Markdown(ABOUT_SECTION)
                
                gr.Markdown("## Submit Your Model for Evaluation")
                with gr.Column():
                    model_name_input = gr.Textbox(
                        label="Model Name",
                        placeholder="Enter the full model name from HuggingFace Hub (e.g., inceptionai/jais-family-30b-8k)"
                    )
                    revision_input = gr.Textbox(label="Revision", placeholder="main", value="main")
                    precision_input = gr.Dropdown(
                        choices=["float16", "float32", "bfloat16", "8bit", "4bit"],
                        label="Precision",
                        value="float16"
                    )
                    params_input = gr.Textbox(
                        label="Params",
                        placeholder="Enter the approximate number of parameters as Integer (e.g., 7, 13, 30, 70 ...)"
                    )
                    license_input = gr.Textbox(
                        label="License",
                        placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)",
                        value="Open"
                    )
                    modality_input = gr.Radio(
                        choices=["Text"],
                        label="Modality",
                        value="Text"
                    )
                    submit_button = gr.Button("Submit Model")
                    submission_result = gr.Markdown()
                    submit_button.click(
                        submit_model,
                        inputs=[
                            model_name_input, revision_input, precision_input,
                            params_input, license_input, modality_input
                        ],
                        outputs=submission_result
                    )
                gr.Markdown("## Evaluation Status")
                with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
                    if not df_pending.empty:
                        gr.Dataframe(df_pending)
                    else:
                        gr.Markdown("No pending evaluations.")
                with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
                    if not df_finished.empty:
                        gr.Dataframe(df_finished)
                    else:
                        gr.Markdown("No finished evaluations.")
                with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False):
                    if not df_failed.empty:
                        gr.Dataframe(df_failed)
                    else:
                        gr.Markdown("No failed evaluations.")
            # Citation Section
            with gr.Row():
                with gr.Accordion("📙 Citation", open=False):
                    citation_button = gr.Textbox(
                        value=CITATION_BUTTON_TEXT,
                        label=CITATION_BUTTON_LABEL,
                        lines=8,
                        elem_id="citation-button",
                        show_copy_button=True
                    )
        
        gr.HTML(BOTTOM_LOGO)
        
        demo.launch()
if __name__ == "__main__":
    main()