import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler import json import os import datetime import urllib.parse from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn, ModelType, fields, WeightType, Precision, ) from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN from src.populate import get_evaluation_queue_df, get_leaderboard_df def restart_space(): API.restart_space(repo_id=REPO_ID) def save_submission_and_notify(model_name, contact_email, weight_link, json_results, paper_link, description): """Save submission to file and provide instructions for email""" try: # Validate JSON format if provided if json_results.strip(): try: json.loads(json_results) except json.JSONDecodeError: return "❌ Invalid JSON format in results field" # Create submission data submission_data = { "timestamp": datetime.datetime.now().isoformat(), "model_name": model_name, "contact_email": contact_email, "weight_link": weight_link, "paper_link": paper_link, "description": description, "json_results": json_results, } # Save to submissions directory os.makedirs("submissions", exist_ok=True) filename = ( f"submissions/{model_name.replace('/', '_')}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json" ) with open(filename, "w") as f: json.dump(submission_data, f, indent=2) # Create mailto link for user subject = f"SearchAgent Leaderboard Submission: {model_name}" body = f"""New model submission for SearchAgent Leaderboard: Model Name: {model_name} Contact Email: {contact_email} Weight Link: {weight_link} Paper Link: {paper_link} Description: {description} JSON Results: {json_results}""" # URL encode the email content mailto_link = ( f"mailto:shyuli@tencent.com?subject={urllib.parse.quote(subject)}&body={urllib.parse.quote(body[:500])}" ) return f"""✅ Submission saved successfully! 📧 **Please send your submission to: shyuli@tencent.com** You can either: 1. Click here to open your email client: [Send Email](mailto:shyuli@tencent.com) 2. Or copy the submission details above and send manually Your submission has been saved to: {filename} We'll review your model and get back to you at {contact_email}.""" except Exception as e: return f"❌ Failed to save submission: {str(e)}" ### Space initialisation # Use local data for demo purposes try: print(EVAL_REQUESTS_PATH) # For demo, use local eval-queue directory if it exists import os if not os.path.exists(EVAL_REQUESTS_PATH): os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True) # snapshot_download( # repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN # ) except Exception as e: print(f"Could not setup eval requests path: {e}") try: print(EVAL_RESULTS_PATH) # For demo, use local eval-results directory if it exists if not os.path.exists(EVAL_RESULTS_PATH): os.makedirs(EVAL_RESULTS_PATH, exist_ok=True) # snapshot_download( # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN # ) except Exception as e: print(f"Could not setup eval results path: {e}") def _debug_print_dataframe(name: str, dataframe: pd.DataFrame) -> None: if dataframe is None: print(f"[debug] {name}: DataFrame is None") return print(f"[debug] {name}: shape={dataframe.shape}, columns={list(dataframe.columns)}") if not dataframe.empty: preview = dataframe.head().to_dict(orient="records") print(f"[debug] {name}: head={preview}") else: print(f"[debug] {name}: DataFrame is empty") def _debug_list_dir(label: str, path: str, limit: int = 10) -> None: try: entries = os.listdir(path) print(f"[debug] {label}: path={path}, count={len(entries)}, preview={entries[:limit]}") except FileNotFoundError: print(f"[debug] {label}: path={path} not found") except Exception as exc: print(f"[debug] {label}: path={path} error={exc}") _debug_list_dir("EVAL_RESULTS", EVAL_RESULTS_PATH) _debug_list_dir("EVAL_QUEUE", EVAL_REQUESTS_PATH) LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) _debug_print_dataframe("LEADERBOARD", LEADERBOARD_DF) ( finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df, ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) _debug_print_dataframe("EVAL_QUEUE_FINISHED", finished_eval_queue_df) _debug_print_dataframe("EVAL_QUEUE_RUNNING", running_eval_queue_df) _debug_print_dataframe("EVAL_QUEUE_PENDING", pending_eval_queue_df) def init_leaderboard(dataframe): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") return Leaderboard( value=dataframe, datatype=[c.type for c in fields(AutoEvalColumn)], select_columns=SelectColumns( default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], label="Select Columns to Display:", ), search_columns=[AutoEvalColumn.model.name], hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], filter_columns=[ ColumnFilter(AutoEvalColumn.model_size.name, type="checkboxgroup", label="Model Size"), ], bool_checkboxgroup_label="Hide models", interactive=False, ) def create_demo(): """Create the Gradio interface.""" with gr.Blocks(css=custom_css) as demo: gr.HTML(TITLE) with gr.Tabs(elem_classes="tab-buttons") as tabs: print("[debug] Rendering leaderboard tab start") with gr.TabItem("🏅 SearchAgent Benchmark", elem_id="llm-benchmark-tab-table", id=0): leaderboard = init_leaderboard(LEADERBOARD_DF) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") print("[debug] Rendering leaderboard tab done") print("[debug] Rendering about tab start") with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") print("[debug] Rendering about tab done") print("[debug] Rendering submit tab start") with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3): with gr.Column(): with gr.Row(): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") print("[debug] Rendering submit tab done") with gr.Row(): print("[debug] Rendering citation start") with gr.Accordion("📙 Citation", open=False): gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) print("[debug] Rendering citation done") return demo demo = create_demo() scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() demo.launch(show_error=True)