Spaces:
Runtime error
Runtime error
updates
Browse files- app.py +37 -10
- src/assets/css_html_js.py +2 -2
- src/display_models/get_model_metadata.py +5 -8
- src/display_models/read_results.py +1 -1
- src/load_from_hub.py +3 -2
- src/rate_limiting.py +1 -4
app.py
CHANGED
|
@@ -51,9 +51,8 @@ api = HfApi(token=H4_TOKEN)
|
|
| 51 |
|
| 52 |
|
| 53 |
def restart_space():
|
| 54 |
-
api.restart_space(
|
| 55 |
-
|
| 56 |
-
)
|
| 57 |
|
| 58 |
# Rate limit variables
|
| 59 |
RATE_LIMIT_PERIOD = 7
|
|
@@ -98,7 +97,7 @@ else:
|
|
| 98 |
eval_queue_private, eval_results_private = None, None
|
| 99 |
|
| 100 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
| 101 |
-
models = original_df["model_name_for_query"].tolist()
|
| 102 |
|
| 103 |
to_be_dumped = f"models = {repr(models)}\n"
|
| 104 |
|
|
@@ -130,7 +129,9 @@ def add_new_eval(
|
|
| 130 |
error_msg = f"Organisation or user `{model.split('/')[0]}`"
|
| 131 |
error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
|
| 132 |
error_msg += f"in the last {RATE_LIMIT_PERIOD} days.\n"
|
| 133 |
-
error_msg +=
|
|
|
|
|
|
|
| 134 |
return styled_error(error_msg)
|
| 135 |
|
| 136 |
if model_type is None or model_type == "":
|
|
@@ -213,9 +214,29 @@ def change_tab(query_param: str):
|
|
| 213 |
|
| 214 |
|
| 215 |
# Searching and filtering
|
| 216 |
-
def update_table(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
| 220 |
if query != "":
|
| 221 |
filtered_df = search_table(filtered_df, query)
|
|
@@ -223,9 +244,11 @@ def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, colu
|
|
| 223 |
|
| 224 |
return df
|
| 225 |
|
|
|
|
| 226 |
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
| 227 |
return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
|
| 228 |
|
|
|
|
| 229 |
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
| 230 |
always_here_cols = [
|
| 231 |
AutoEvalColumn.model_type_symbol.name,
|
|
@@ -237,8 +260,9 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
|
| 237 |
]
|
| 238 |
return filtered_df
|
| 239 |
|
|
|
|
| 240 |
NUMERIC_INTERVALS = {
|
| 241 |
-
"Unknown": pd.Interval(-1, 0, closed="right"),
|
| 242 |
"< 1.5B": pd.Interval(0, 1.5, closed="right"),
|
| 243 |
"~3B": pd.Interval(1.5, 5, closed="right"),
|
| 244 |
"~7B": pd.Interval(6, 11, closed="right"),
|
|
@@ -247,9 +271,9 @@ NUMERIC_INTERVALS = {
|
|
| 247 |
"60B+": pd.Interval(55, 10000, closed="right"),
|
| 248 |
}
|
| 249 |
|
|
|
|
| 250 |
def filter_models(
|
| 251 |
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
|
| 252 |
-
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
|
| 253 |
) -> pd.DataFrame:
|
| 254 |
# Show all models
|
| 255 |
if show_deleted:
|
|
@@ -545,7 +569,10 @@ with demo:
|
|
| 545 |
max_rows=5,
|
| 546 |
)
|
| 547 |
with gr.Row():
|
| 548 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
| 549 |
|
| 550 |
with gr.Row():
|
| 551 |
with gr.Accordion("📙 Citation", open=False):
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
def restart_space():
|
| 54 |
+
api.restart_space(repo_id="gsaivinay/open_llm_leaderboard", token=H4_TOKEN)
|
| 55 |
+
|
|
|
|
| 56 |
|
| 57 |
# Rate limit variables
|
| 58 |
RATE_LIMIT_PERIOD = 7
|
|
|
|
| 97 |
eval_queue_private, eval_results_private = None, None
|
| 98 |
|
| 99 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
| 100 |
+
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
| 101 |
|
| 102 |
to_be_dumped = f"models = {repr(models)}\n"
|
| 103 |
|
|
|
|
| 129 |
error_msg = f"Organisation or user `{model.split('/')[0]}`"
|
| 130 |
error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
|
| 131 |
error_msg += f"in the last {RATE_LIMIT_PERIOD} days.\n"
|
| 132 |
+
error_msg += (
|
| 133 |
+
"Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
|
| 134 |
+
)
|
| 135 |
return styled_error(error_msg)
|
| 136 |
|
| 137 |
if model_type is None or model_type == "":
|
|
|
|
| 214 |
|
| 215 |
|
| 216 |
# Searching and filtering
|
| 217 |
+
def update_table(
|
| 218 |
+
hidden_df: pd.DataFrame,
|
| 219 |
+
current_columns_df: pd.DataFrame,
|
| 220 |
+
columns: list,
|
| 221 |
+
type_query: list,
|
| 222 |
+
precision_query: str,
|
| 223 |
+
size_query: list,
|
| 224 |
+
show_deleted: bool,
|
| 225 |
+
query: str,
|
| 226 |
+
):
|
| 227 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def update_table(
|
| 231 |
+
hidden_df: pd.DataFrame,
|
| 232 |
+
current_columns_df: pd.DataFrame,
|
| 233 |
+
columns: list,
|
| 234 |
+
type_query: list,
|
| 235 |
+
precision_query: str,
|
| 236 |
+
size_query: list,
|
| 237 |
+
show_deleted: bool,
|
| 238 |
+
query: str,
|
| 239 |
+
):
|
| 240 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
| 241 |
if query != "":
|
| 242 |
filtered_df = search_table(filtered_df, query)
|
|
|
|
| 244 |
|
| 245 |
return df
|
| 246 |
|
| 247 |
+
|
| 248 |
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
| 249 |
return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
|
| 250 |
|
| 251 |
+
|
| 252 |
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
| 253 |
always_here_cols = [
|
| 254 |
AutoEvalColumn.model_type_symbol.name,
|
|
|
|
| 260 |
]
|
| 261 |
return filtered_df
|
| 262 |
|
| 263 |
+
|
| 264 |
NUMERIC_INTERVALS = {
|
| 265 |
+
"Unknown": pd.Interval(-1, 0, closed="right"),
|
| 266 |
"< 1.5B": pd.Interval(0, 1.5, closed="right"),
|
| 267 |
"~3B": pd.Interval(1.5, 5, closed="right"),
|
| 268 |
"~7B": pd.Interval(6, 11, closed="right"),
|
|
|
|
| 271 |
"60B+": pd.Interval(55, 10000, closed="right"),
|
| 272 |
}
|
| 273 |
|
| 274 |
+
|
| 275 |
def filter_models(
|
| 276 |
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
|
|
|
|
| 277 |
) -> pd.DataFrame:
|
| 278 |
# Show all models
|
| 279 |
if show_deleted:
|
|
|
|
| 569 |
max_rows=5,
|
| 570 |
)
|
| 571 |
with gr.Row():
|
| 572 |
+
gr.Markdown(
|
| 573 |
+
"# ✉️✨ Submit your model [here!](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)",
|
| 574 |
+
elem_classes="markdown-text",
|
| 575 |
+
)
|
| 576 |
|
| 577 |
with gr.Row():
|
| 578 |
with gr.Accordion("📙 Citation", open=False):
|
src/assets/css_html_js.py
CHANGED
|
@@ -33,7 +33,7 @@ custom_css = """
|
|
| 33 |
background: none;
|
| 34 |
border: none;
|
| 35 |
}
|
| 36 |
-
|
| 37 |
#search-bar {
|
| 38 |
padding: 0px;
|
| 39 |
}
|
|
@@ -83,7 +83,7 @@ table th:first-child {
|
|
| 83 |
#filter_type label > .wrap{
|
| 84 |
width: 103px;
|
| 85 |
}
|
| 86 |
-
#filter_type label > .wrap .wrap-inner{
|
| 87 |
padding: 2px;
|
| 88 |
}
|
| 89 |
#filter_type label > .wrap .wrap-inner input{
|
|
|
|
| 33 |
background: none;
|
| 34 |
border: none;
|
| 35 |
}
|
| 36 |
+
|
| 37 |
#search-bar {
|
| 38 |
padding: 0px;
|
| 39 |
}
|
|
|
|
| 83 |
#filter_type label > .wrap{
|
| 84 |
width: 103px;
|
| 85 |
}
|
| 86 |
+
#filter_type label > .wrap .wrap-inner{
|
| 87 |
padding: 2px;
|
| 88 |
}
|
| 89 |
#filter_type label > .wrap .wrap-inner input{
|
src/display_models/get_model_metadata.py
CHANGED
|
@@ -1,17 +1,15 @@
|
|
| 1 |
import glob
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
-
import re
|
| 5 |
import pickle
|
|
|
|
| 6 |
from typing import List
|
| 7 |
|
| 8 |
import huggingface_hub
|
|
|
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
from tqdm import tqdm
|
| 11 |
-
from transformers import
|
| 12 |
-
from accelerate import init_empty_weights
|
| 13 |
-
from transformers import AutoModel, AutoConfig
|
| 14 |
-
from accelerate import init_empty_weights
|
| 15 |
|
| 16 |
from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
|
| 17 |
from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
|
|
@@ -25,7 +23,6 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
|
|
| 25 |
try:
|
| 26 |
with open("model_info_cache.pkl", "rb") as f:
|
| 27 |
model_info_cache = pickle.load(f)
|
| 28 |
-
except (EOFError, FileNotFoundError):
|
| 29 |
except (EOFError, FileNotFoundError):
|
| 30 |
model_info_cache = {}
|
| 31 |
try:
|
|
@@ -67,7 +64,7 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
|
|
| 67 |
if model_name not in model_size_cache:
|
| 68 |
model_size_cache[model_name] = get_model_size(model_name, model_info)
|
| 69 |
model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
|
| 70 |
-
|
| 71 |
# save cache to disk in pickle format
|
| 72 |
with open("model_info_cache.pkl", "wb") as f:
|
| 73 |
pickle.dump(model_info_cache, f)
|
|
@@ -101,7 +98,7 @@ def get_model_size(model_name, model_info):
|
|
| 101 |
with init_empty_weights():
|
| 102 |
model = AutoModel.from_config(config, trust_remote_code=False)
|
| 103 |
return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
|
| 104 |
-
except (EnvironmentError, ValueError, KeyError):
|
| 105 |
try:
|
| 106 |
size_match = re.search(size_pattern, model_name.lower())
|
| 107 |
size = size_match.group(0)
|
|
|
|
| 1 |
import glob
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
import pickle
|
| 5 |
+
import re
|
| 6 |
from typing import List
|
| 7 |
|
| 8 |
import huggingface_hub
|
| 9 |
+
from accelerate import init_empty_weights
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
from tqdm import tqdm
|
| 12 |
+
from transformers import AutoConfig, AutoModel
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
|
| 15 |
from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
|
|
|
|
| 23 |
try:
|
| 24 |
with open("model_info_cache.pkl", "rb") as f:
|
| 25 |
model_info_cache = pickle.load(f)
|
|
|
|
| 26 |
except (EOFError, FileNotFoundError):
|
| 27 |
model_info_cache = {}
|
| 28 |
try:
|
|
|
|
| 64 |
if model_name not in model_size_cache:
|
| 65 |
model_size_cache[model_name] = get_model_size(model_name, model_info)
|
| 66 |
model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
|
| 67 |
+
|
| 68 |
# save cache to disk in pickle format
|
| 69 |
with open("model_info_cache.pkl", "wb") as f:
|
| 70 |
pickle.dump(model_info_cache, f)
|
|
|
|
| 98 |
with init_empty_weights():
|
| 99 |
model = AutoModel.from_config(config, trust_remote_code=False)
|
| 100 |
return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
|
| 101 |
+
except (EnvironmentError, ValueError, KeyError): # model config not found, likely private
|
| 102 |
try:
|
| 103 |
size_match = re.search(size_pattern, model_name.lower())
|
| 104 |
size = size_match.group(0)
|
src/display_models/read_results.py
CHANGED
|
@@ -107,7 +107,7 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
|
|
| 107 |
revision=model_sha,
|
| 108 |
results={benchmark: mean_acc},
|
| 109 |
precision=precision, # todo model_type=, weight_type=
|
| 110 |
-
date=config.get("submission_date")
|
| 111 |
)
|
| 112 |
)
|
| 113 |
|
|
|
|
| 107 |
revision=model_sha,
|
| 108 |
results={benchmark: mean_acc},
|
| 109 |
precision=precision, # todo model_type=, weight_type=
|
| 110 |
+
date=config.get("submission_date"),
|
| 111 |
)
|
| 112 |
)
|
| 113 |
|
src/load_from_hub.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
import json
|
| 2 |
import os
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
from huggingface_hub import Repository
|
| 6 |
from transformers import AutoConfig
|
| 7 |
-
from collections import defaultdict
|
| 8 |
|
| 9 |
from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
|
| 10 |
from src.display_models.get_model_metadata import apply_metadata
|
|
@@ -23,7 +23,8 @@ def get_all_requested_models(requested_models_dir: str) -> set[str]:
|
|
| 23 |
current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
|
| 24 |
if current_depth == depth:
|
| 25 |
for file in files:
|
| 26 |
-
if not file.endswith(".json"):
|
|
|
|
| 27 |
with open(os.path.join(root, file), "r") as f:
|
| 28 |
info = json.load(f)
|
| 29 |
file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
+
from collections import defaultdict
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
from huggingface_hub import Repository
|
| 7 |
from transformers import AutoConfig
|
|
|
|
| 8 |
|
| 9 |
from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
|
| 10 |
from src.display_models.get_model_metadata import apply_metadata
|
|
|
|
| 23 |
current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
|
| 24 |
if current_depth == depth:
|
| 25 |
for file in files:
|
| 26 |
+
if not file.endswith(".json"):
|
| 27 |
+
continue
|
| 28 |
with open(os.path.join(root, file), "r") as f:
|
| 29 |
info = json.load(f)
|
| 30 |
file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
|
src/rate_limiting.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
from datetime import datetime, timezone, timedelta
|
| 3 |
|
| 4 |
|
| 5 |
def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period):
|
|
@@ -12,5 +11,3 @@ def user_submission_permission(submission_name, users_to_submission_dates, rate_
|
|
| 12 |
submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
|
| 13 |
|
| 14 |
return len(submissions_after_timelimit)
|
| 15 |
-
|
| 16 |
-
|
|
|
|
| 1 |
+
from datetime import datetime, timedelta, timezone
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period):
|
|
|
|
| 11 |
submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
|
| 12 |
|
| 13 |
return len(submissions_after_timelimit)
|
|
|
|
|
|