|
|
import logging |
|
|
import os |
|
|
os.makedirs("tmp", exist_ok=True) |
|
|
os.environ['TMP_DIR'] = "tmp" |
|
|
import subprocess |
|
|
import shutil |
|
|
import glob |
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
from src.radial.radial import create_plot |
|
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
from gradio_leaderboard import Leaderboard, SelectColumns |
|
|
from gradio_space_ci import enable_space_ci |
|
|
import json |
|
|
from io import BytesIO |
|
|
|
|
|
def handle_file_upload(file): |
|
|
file_path = file.name.split("/")[-1] if "/" in file.name else file.name |
|
|
logging.info("File uploaded: %s", file_path) |
|
|
with open(file.name, "r") as f: |
|
|
v = json.load(f) |
|
|
return v, file_path |
|
|
def submit_file(v, file_path, mn, profile: gr.OAuthProfile | None): |
|
|
print('START SUBMITTING!!!') |
|
|
if profile is None: |
|
|
return "Hub Login Required" |
|
|
print('PROFILE: ', profile.__dict__) |
|
|
|
|
|
new_file = v['results'] |
|
|
if profile.username == 'kz-transformers': |
|
|
new_file['model'] = mn |
|
|
else: |
|
|
new_file['model'] = profile.username + "/" + mn |
|
|
|
|
|
columns = [ |
|
|
'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc', 'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc', |
|
|
'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc', 'kk_english_unt_mc', 'kk_biology_unt_mc', |
|
|
'kk_human_society_rights_unt_mc' |
|
|
] |
|
|
|
|
|
for column in columns: |
|
|
new_file[column] = new_file[column]['acc,none'] |
|
|
|
|
|
new_file['model_dtype'] = v['config']["model_dtype"] |
|
|
new_file['ppl'] = 0 |
|
|
|
|
|
print('WE READ FILE: ', new_file) |
|
|
buf = BytesIO() |
|
|
buf.write(json.dumps(new_file).encode('utf-8')) |
|
|
API.upload_file( |
|
|
path_or_fileobj=buf, |
|
|
path_in_repo="model_data/external/" + profile.username+mn + ".json", |
|
|
repo_id="kz-transformers/s-openbench-eval", |
|
|
repo_type="dataset", |
|
|
) |
|
|
os.environ[RESET_JUDGEMENT_ENV] = "1" |
|
|
return "Success!" |
|
|
|
|
|
from src.display.about import ( |
|
|
INTRODUCTION_TEXT, |
|
|
TITLE, |
|
|
LLM_BENCHMARKS_TEXT |
|
|
) |
|
|
from src.display.css_html_js import custom_css |
|
|
from src.display.utils import ( |
|
|
AutoEvalColumn, |
|
|
fields, |
|
|
) |
|
|
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV |
|
|
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset |
|
|
import huggingface_hub |
|
|
|
|
|
|
|
|
os.environ["GRADIO_ANALYTICS_ENABLED"] = "false" |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") |
|
|
|
|
|
|
|
|
enable_space_ci() |
|
|
|
|
|
|
|
|
|
|
|
def restart_space(): |
|
|
API.restart_space(repo_id=REPO_ID) |
|
|
download_openbench() |
|
|
|
|
|
def update_plot(selected_models): |
|
|
return create_plot(selected_models) |
|
|
|
|
|
def build_demo(): |
|
|
download_openbench() |
|
|
demo = gr.Blocks(title="Kaz LLM LB", css=custom_css) |
|
|
leaderboard_df = build_leadearboard_df() |
|
|
with demo: |
|
|
gr.HTML(TITLE) |
|
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
|
|
|
with gr.Tabs(elem_classes="tab-buttons"): |
|
|
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): |
|
|
Leaderboard( |
|
|
value=leaderboard_df, |
|
|
datatype=[c.type for c in fields(AutoEvalColumn)], |
|
|
select_columns=SelectColumns( |
|
|
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], |
|
|
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy], |
|
|
label="Select Columns to Display:", |
|
|
), |
|
|
search_columns=[ |
|
|
AutoEvalColumn.model.name, |
|
|
|
|
|
|
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3): |
|
|
with gr.Row(): |
|
|
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") |
|
|
with gr.Row(): |
|
|
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text") |
|
|
|
|
|
with gr.Column(): |
|
|
|
|
|
model_name_textbox = gr.Textbox(label="Model name") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath") |
|
|
|
|
|
uploaded_file = gr.State() |
|
|
file_path = gr.State() |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
out = gr.Textbox("Статус отправки") |
|
|
with gr.Column(): |
|
|
login_button = gr.LoginButton(elem_id="oauth-button") |
|
|
|
|
|
submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary') |
|
|
|
|
|
file_output.upload( |
|
|
handle_file_upload, |
|
|
file_output, |
|
|
[uploaded_file, file_path] |
|
|
) |
|
|
|
|
|
submit_button.click( |
|
|
submit_file, |
|
|
[uploaded_file, file_path, model_name_textbox], |
|
|
[out] |
|
|
) |
|
|
|
|
|
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4): |
|
|
with gr.Column(): |
|
|
model_dropdown = gr.Dropdown( |
|
|
choices=leaderboard_df["model"].tolist(), |
|
|
label="Models", |
|
|
value=leaderboard_df["model"].tolist(), |
|
|
multiselect=True, |
|
|
info="Select models" |
|
|
) |
|
|
with gr.Column(): |
|
|
plot = gr.Plot(update_plot(model_dropdown.value)) |
|
|
|
|
|
model_dropdown.change( |
|
|
fn=update_plot, |
|
|
inputs=[model_dropdown], |
|
|
outputs=[plot] |
|
|
) |
|
|
return demo |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_board(): |
|
|
need_reset = os.environ.get(RESET_JUDGEMENT_ENV) |
|
|
logging.info("Updating the judgement: %s", need_reset) |
|
|
if need_reset != "1": |
|
|
|
|
|
pass |
|
|
os.environ[RESET_JUDGEMENT_ENV] = "0" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
download_dataset("kz-transformers/s-openbench-eval", "m_data") |
|
|
data_list = [] |
|
|
for file in glob.glob("./m_data/model_data/external/*.json"): |
|
|
with open(file) as f: |
|
|
try: |
|
|
data = json.load(f) |
|
|
data_list.append(data) |
|
|
except Exception as e: |
|
|
pass |
|
|
print("DATALIST: ", data_list) |
|
|
|
|
|
with open("genned.json", "w") as f: |
|
|
json.dump(data_list, f) |
|
|
|
|
|
|
|
|
API.upload_file( |
|
|
path_or_fileobj="genned.json", |
|
|
path_in_repo="leaderboard.json", |
|
|
repo_id="kz-transformers/kaz-llm-lb-metainfo", |
|
|
repo_type="dataset", |
|
|
) |
|
|
restart_space() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_board_(): |
|
|
need_reset = os.environ.get(RESET_JUDGEMENT_ENV) |
|
|
logging.info("Updating the judgement: %s", need_reset) |
|
|
if need_reset != "1": |
|
|
|
|
|
pass |
|
|
os.environ[RESET_JUDGEMENT_ENV] = "0" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
download_dataset("kz-transformers/s-openbench-eval", "m_data") |
|
|
data_list = [] |
|
|
for file in glob.glob("./m_data/model_data/external/*.json"): |
|
|
with open(file) as f: |
|
|
try: |
|
|
data = json.load(f) |
|
|
data_list.append(data) |
|
|
except Exception as e: |
|
|
pass |
|
|
print("DATALIST: ", data_list) |
|
|
|
|
|
with open("genned.json", "w") as f: |
|
|
json.dump(data_list, f) |
|
|
|
|
|
|
|
|
API.upload_file( |
|
|
path_or_fileobj="genned.json", |
|
|
path_in_repo="leaderboard.json", |
|
|
repo_id="kz-transformers/kaz-llm-lb-metainfo", |
|
|
repo_type="dataset", |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
os.environ[RESET_JUDGEMENT_ENV] = "1" |
|
|
|
|
|
scheduler = BackgroundScheduler() |
|
|
update_board_() |
|
|
scheduler.add_job(update_board, "interval", minutes=10) |
|
|
scheduler.start() |
|
|
|
|
|
demo_app = build_demo() |
|
|
demo_app.launch(debug=True,share=True) |
|
|
|