Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Tom Aarsen
commited on
Commit
·
ab565ba
1
Parent(s):
e2b41c8
Add search bar/filtering; always show Model Size
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
-
from functools import partial
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
|
| 5 |
from datasets import load_dataset
|
| 6 |
import gradio as gr
|
|
@@ -1098,7 +1099,7 @@ def add_rank(df):
|
|
| 1098 |
if len(cols_to_rank) == 1:
|
| 1099 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 1100 |
else:
|
| 1101 |
-
df.insert(
|
| 1102 |
df.sort_values("Average", ascending=False, inplace=True)
|
| 1103 |
df.insert(0, "Rank", list(range(1, len(df) + 1)))
|
| 1104 |
df = df.round(2)
|
|
@@ -1106,7 +1107,7 @@ def add_rank(df):
|
|
| 1106 |
df.fillna("", inplace=True)
|
| 1107 |
return df
|
| 1108 |
|
| 1109 |
-
def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=
|
| 1110 |
api = HfApi()
|
| 1111 |
models = api.list_models(filter="mteb")
|
| 1112 |
# Initialize list to models that we cannot fetch metadata from
|
|
@@ -1169,6 +1170,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 1169 |
except:
|
| 1170 |
pass
|
| 1171 |
df_list.append(out)
|
|
|
|
|
|
|
| 1172 |
df = pd.DataFrame(df_list)
|
| 1173 |
# If there are any models that are the same, merge them
|
| 1174 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
|
@@ -1217,26 +1220,26 @@ def get_mteb_average():
|
|
| 1217 |
|
| 1218 |
DATA_OVERALL = DATA_OVERALL.round(2)
|
| 1219 |
|
| 1220 |
-
DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION])
|
| 1221 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
| 1222 |
-
DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:,
|
| 1223 |
|
| 1224 |
-
DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model"] +
|
| 1225 |
-
DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:,
|
| 1226 |
|
| 1227 |
-
DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model"] +
|
| 1228 |
-
DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:,
|
| 1229 |
|
| 1230 |
-
DATA_RERANKING = add_rank(DATA_OVERALL[["Model"] +
|
| 1231 |
-
DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:,
|
| 1232 |
|
| 1233 |
-
DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model"] +
|
| 1234 |
-
DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:,
|
| 1235 |
|
| 1236 |
-
DATA_STS_EN = add_rank(DATA_OVERALL[["Model"] +
|
| 1237 |
-
DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:,
|
| 1238 |
|
| 1239 |
-
DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model"] +
|
| 1240 |
DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
|
| 1241 |
|
| 1242 |
# Fill NaN after averaging
|
|
@@ -1279,24 +1282,24 @@ def get_mteb_average_zh():
|
|
| 1279 |
|
| 1280 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
|
| 1281 |
|
| 1282 |
-
DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
| 1283 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
| 1284 |
-
DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:,
|
| 1285 |
|
| 1286 |
-
DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
| 1287 |
-
DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:,
|
| 1288 |
|
| 1289 |
-
DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
| 1290 |
-
DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:,
|
| 1291 |
|
| 1292 |
-
DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
| 1293 |
-
DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:,
|
| 1294 |
|
| 1295 |
-
DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
| 1296 |
-
DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:,
|
| 1297 |
|
| 1298 |
-
DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
| 1299 |
-
DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:,
|
| 1300 |
|
| 1301 |
# Fill NaN after averaging
|
| 1302 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
|
@@ -1339,25 +1342,25 @@ def get_mteb_average_fr():
|
|
| 1339 |
DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
|
| 1340 |
DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
|
| 1341 |
|
| 1342 |
-
DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
| 1343 |
-
DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:,
|
| 1344 |
|
| 1345 |
-
DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
| 1346 |
-
DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:,
|
| 1347 |
|
| 1348 |
-
DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
| 1349 |
-
DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:,
|
| 1350 |
|
| 1351 |
-
DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
| 1352 |
-
DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:,
|
| 1353 |
|
| 1354 |
-
DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
| 1355 |
-
DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:,
|
| 1356 |
|
| 1357 |
-
DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
| 1358 |
-
DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:,
|
| 1359 |
|
| 1360 |
-
DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
| 1361 |
DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
|
| 1362 |
|
| 1363 |
# Fill NaN after averaging
|
|
@@ -1398,21 +1401,21 @@ def get_mteb_average_pl():
|
|
| 1398 |
|
| 1399 |
DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
|
| 1400 |
|
| 1401 |
-
DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
| 1402 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
| 1403 |
-
DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:,
|
| 1404 |
|
| 1405 |
-
DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
| 1406 |
-
DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:,
|
| 1407 |
|
| 1408 |
-
DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
| 1409 |
-
DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:,
|
| 1410 |
|
| 1411 |
-
DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
| 1412 |
-
DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:,
|
| 1413 |
|
| 1414 |
-
DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
| 1415 |
-
DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:,
|
| 1416 |
|
| 1417 |
# Fill NaN after averaging
|
| 1418 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
|
@@ -1426,14 +1429,14 @@ get_mteb_average()
|
|
| 1426 |
get_mteb_average_fr()
|
| 1427 |
get_mteb_average_pl()
|
| 1428 |
get_mteb_average_zh()
|
| 1429 |
-
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
|
| 1430 |
-
DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)
|
| 1431 |
-
DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
|
| 1432 |
-
DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)
|
| 1433 |
-
DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
|
| 1434 |
-
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
|
| 1435 |
-
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
|
| 1436 |
-
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
|
| 1437 |
|
| 1438 |
# Exact, add all non-nan integer values for every dataset
|
| 1439 |
NUM_SCORES = 0
|
|
@@ -1476,7 +1479,7 @@ for d in [
|
|
| 1476 |
DATA_SUMMARIZATION_FR,
|
| 1477 |
]:
|
| 1478 |
# NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
|
| 1479 |
-
cols_to_ignore =
|
| 1480 |
# Count number of scores including only non-nan floats & excluding the rank column
|
| 1481 |
NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
|
| 1482 |
# Exclude rank & model name column (first two); Do not count different language versions as different datasets
|
|
@@ -1491,6 +1494,7 @@ NUM_MODELS = len(set(MODELS))
|
|
| 1491 |
# 1. Force headers to wrap
|
| 1492 |
# 2. Force model column (maximum) width
|
| 1493 |
# 3. Prevent model column from overflowing, scroll instead
|
|
|
|
| 1494 |
css = """
|
| 1495 |
table > thead {
|
| 1496 |
white-space: normal
|
|
@@ -1503,6 +1507,10 @@ table {
|
|
| 1503 |
table > tbody > tr > td:nth-child(2) > div {
|
| 1504 |
overflow-x: auto
|
| 1505 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1506 |
"""
|
| 1507 |
|
| 1508 |
"""
|
|
@@ -1822,6 +1830,7 @@ data = {
|
|
| 1822 |
}
|
| 1823 |
|
| 1824 |
dataframes = []
|
|
|
|
| 1825 |
tabs = []
|
| 1826 |
|
| 1827 |
# The following JavaScript function updates the URL parameters based on the selected task and language
|
|
@@ -1854,6 +1863,57 @@ def update_url_language(event: gr.SelectData, current_task_language: dict, langu
|
|
| 1854 |
language_per_task[current_task_language["task"]] = event.target.id
|
| 1855 |
return current_task_language, language_per_task
|
| 1856 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1857 |
with gr.Blocks(css=css) as block:
|
| 1858 |
|
| 1859 |
# Store the current task and language for updating the URL. This is a bit hacky, but it works
|
|
@@ -1865,6 +1925,26 @@ with gr.Blocks(css=css) as block:
|
|
| 1865 |
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
| 1866 |
""")
|
| 1867 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1868 |
with gr.Tabs() as outer_tabs:
|
| 1869 |
# Store the tabs for updating them on load based on URL parameters
|
| 1870 |
tabs.append(outer_tabs)
|
|
@@ -1901,9 +1981,12 @@ with gr.Blocks(css=css) as block:
|
|
| 1901 |
|
| 1902 |
with gr.Row():
|
| 1903 |
datatype = ["number", "markdown"] + ["number"] * len(item["data"])
|
| 1904 |
-
dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", height=
|
| 1905 |
dataframes.append(dataframe)
|
| 1906 |
|
|
|
|
|
|
|
|
|
|
| 1907 |
with gr.Row():
|
| 1908 |
refresh_button = gr.Button("Refresh")
|
| 1909 |
refresh_button.click(item["refresh"], inputs=None, outputs=dataframe)
|
|
@@ -1950,6 +2033,10 @@ with gr.Blocks(css=css) as block:
|
|
| 1950 |
|
| 1951 |
block.load(set_tabs_on_load, inputs=[], outputs=tabs + [current_task_language, language_per_task])
|
| 1952 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1953 |
block.queue(max_size=10)
|
| 1954 |
block.launch()
|
| 1955 |
|
|
|
|
| 1 |
+
from functools import partial, reduce
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
+
import re
|
| 5 |
|
| 6 |
from datasets import load_dataset
|
| 7 |
import gradio as gr
|
|
|
|
| 1099 |
if len(cols_to_rank) == 1:
|
| 1100 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 1101 |
else:
|
| 1102 |
+
df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
|
| 1103 |
df.sort_values("Average", ascending=False, inplace=True)
|
| 1104 |
df.insert(0, "Rank", list(range(1, len(df) + 1)))
|
| 1105 |
df = df.round(2)
|
|
|
|
| 1107 |
df.fillna("", inplace=True)
|
| 1108 |
return df
|
| 1109 |
|
| 1110 |
+
def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
|
| 1111 |
api = HfApi()
|
| 1112 |
models = api.list_models(filter="mteb")
|
| 1113 |
# Initialize list to models that we cannot fetch metadata from
|
|
|
|
| 1170 |
except:
|
| 1171 |
pass
|
| 1172 |
df_list.append(out)
|
| 1173 |
+
if len(df_list) >= 1:
|
| 1174 |
+
break
|
| 1175 |
df = pd.DataFrame(df_list)
|
| 1176 |
# If there are any models that are the same, merge them
|
| 1177 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
|
|
|
| 1220 |
|
| 1221 |
DATA_OVERALL = DATA_OVERALL.round(2)
|
| 1222 |
|
| 1223 |
+
DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION])
|
| 1224 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
| 1225 |
+
DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 3:].ne("").any(axis=1)]
|
| 1226 |
|
| 1227 |
+
DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING])
|
| 1228 |
+
DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 3:].ne("").any(axis=1)]
|
| 1229 |
|
| 1230 |
+
DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION])
|
| 1231 |
+
DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 3:].ne("").any(axis=1)]
|
| 1232 |
|
| 1233 |
+
DATA_RERANKING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING])
|
| 1234 |
+
DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 3:].ne("").any(axis=1)]
|
| 1235 |
|
| 1236 |
+
DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL])
|
| 1237 |
+
DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 3:].ne("").any(axis=1)]
|
| 1238 |
|
| 1239 |
+
DATA_STS_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS])
|
| 1240 |
+
DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 3:].ne("").any(axis=1)]
|
| 1241 |
|
| 1242 |
+
DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_SUMMARIZATION])
|
| 1243 |
DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
|
| 1244 |
|
| 1245 |
# Fill NaN after averaging
|
|
|
|
| 1282 |
|
| 1283 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
|
| 1284 |
|
| 1285 |
+
DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_ZH])
|
| 1286 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
| 1287 |
+
DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
| 1288 |
|
| 1289 |
+
DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_ZH])
|
| 1290 |
+
DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
| 1291 |
|
| 1292 |
+
DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_ZH])
|
| 1293 |
+
DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
| 1294 |
|
| 1295 |
+
DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING_ZH])
|
| 1296 |
+
DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
| 1297 |
|
| 1298 |
+
DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_ZH])
|
| 1299 |
+
DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
| 1300 |
|
| 1301 |
+
DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_ZH])
|
| 1302 |
+
DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
| 1303 |
|
| 1304 |
# Fill NaN after averaging
|
| 1305 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
|
|
|
| 1342 |
DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
|
| 1343 |
DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
|
| 1344 |
|
| 1345 |
+
DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_FR])
|
| 1346 |
+
DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 3:].ne("").any(axis=1)]
|
| 1347 |
|
| 1348 |
+
DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_FR])
|
| 1349 |
+
DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 3:].ne("").any(axis=1)]
|
| 1350 |
|
| 1351 |
+
DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_FR])
|
| 1352 |
+
DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 3:].ne("").any(axis=1)]
|
| 1353 |
|
| 1354 |
+
DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING_FR])
|
| 1355 |
+
DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 3:].ne("").any(axis=1)]
|
| 1356 |
|
| 1357 |
+
DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_FR])
|
| 1358 |
+
DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 3:].ne("").any(axis=1)]
|
| 1359 |
|
| 1360 |
+
DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_FR])
|
| 1361 |
+
DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 3:].ne("").any(axis=1)]
|
| 1362 |
|
| 1363 |
+
DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_SUMMARIZATION_FR])
|
| 1364 |
DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
|
| 1365 |
|
| 1366 |
# Fill NaN after averaging
|
|
|
|
| 1401 |
|
| 1402 |
DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
|
| 1403 |
|
| 1404 |
+
DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_PL])
|
| 1405 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
| 1406 |
+
DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 3:].ne("").any(axis=1)]
|
| 1407 |
|
| 1408 |
+
DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_PL])
|
| 1409 |
+
DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 3:].ne("").any(axis=1)]
|
| 1410 |
|
| 1411 |
+
DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_PL])
|
| 1412 |
+
DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 3:].ne("").any(axis=1)]
|
| 1413 |
|
| 1414 |
+
DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_PL])
|
| 1415 |
+
DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 3:].ne("").any(axis=1)]
|
| 1416 |
|
| 1417 |
+
DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_PL])
|
| 1418 |
+
DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 3:].ne("").any(axis=1)]
|
| 1419 |
|
| 1420 |
# Fill NaN after averaging
|
| 1421 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
|
|
|
| 1429 |
get_mteb_average_fr()
|
| 1430 |
get_mteb_average_pl()
|
| 1431 |
get_mteb_average_zh()
|
| 1432 |
+
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_BITEXT_MINING]
|
| 1433 |
+
DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)[["Rank", "Model", "Model Size (Million Parameters)"] + TASK_LIST_BITEXT_MINING_DA]
|
| 1434 |
+
DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_DA]
|
| 1435 |
+
DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_NB]
|
| 1436 |
+
DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_SV]
|
| 1437 |
+
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_OTHER]
|
| 1438 |
+
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLUSTERING_DE]
|
| 1439 |
+
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_STS_OTHER]
|
| 1440 |
|
| 1441 |
# Exact, add all non-nan integer values for every dataset
|
| 1442 |
NUM_SCORES = 0
|
|
|
|
| 1479 |
DATA_SUMMARIZATION_FR,
|
| 1480 |
]:
|
| 1481 |
# NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
|
| 1482 |
+
cols_to_ignore = 4 if "Average" in d.columns else 3
|
| 1483 |
# Count number of scores including only non-nan floats & excluding the rank column
|
| 1484 |
NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
|
| 1485 |
# Exclude rank & model name column (first two); Do not count different language versions as different datasets
|
|
|
|
| 1494 |
# 1. Force headers to wrap
|
| 1495 |
# 2. Force model column (maximum) width
|
| 1496 |
# 3. Prevent model column from overflowing, scroll instead
|
| 1497 |
+
# 4. Prevent checkbox groups from taking up too much space
|
| 1498 |
css = """
|
| 1499 |
table > thead {
|
| 1500 |
white-space: normal
|
|
|
|
| 1507 |
table > tbody > tr > td:nth-child(2) > div {
|
| 1508 |
overflow-x: auto
|
| 1509 |
}
|
| 1510 |
+
|
| 1511 |
+
.filter-checkbox-group {
|
| 1512 |
+
max-width: max-content;
|
| 1513 |
+
}
|
| 1514 |
"""
|
| 1515 |
|
| 1516 |
"""
|
|
|
|
| 1830 |
}
|
| 1831 |
|
| 1832 |
dataframes = []
|
| 1833 |
+
full_dataframes = []
|
| 1834 |
tabs = []
|
| 1835 |
|
| 1836 |
# The following JavaScript function updates the URL parameters based on the selected task and language
|
|
|
|
| 1863 |
language_per_task[current_task_language["task"]] = event.target.id
|
| 1864 |
return current_task_language, language_per_task
|
| 1865 |
|
| 1866 |
+
NUMERIC_INTERVALS = {
|
| 1867 |
+
"<100M": pd.Interval(0, 100, closed="right"),
|
| 1868 |
+
">100M, <500M": pd.Interval(100, 500, closed="right"),
|
| 1869 |
+
">500M, <1B": pd.Interval(500, 1000, closed="right"),
|
| 1870 |
+
">1B": pd.Interval(1000, 1_000_000, closed="right"),
|
| 1871 |
+
}
|
| 1872 |
+
|
| 1873 |
+
MODEL_TYPES = [
|
| 1874 |
+
"Open",
|
| 1875 |
+
"API",
|
| 1876 |
+
]
|
| 1877 |
+
|
| 1878 |
+
def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
| 1879 |
+
output_dataframes = []
|
| 1880 |
+
for df in full_dataframes:
|
| 1881 |
+
# df = pd.DataFrame(data=dataframe.value["data"], columns=dataframe.value["headers"])
|
| 1882 |
+
|
| 1883 |
+
# Apply the search query
|
| 1884 |
+
if search_query:
|
| 1885 |
+
names = df["Model"].map(lambda x: re.match("<a .+?>(.+)</a>", x).group(1))
|
| 1886 |
+
masks = []
|
| 1887 |
+
for query in search_query.split(";"):
|
| 1888 |
+
masks.append(names.str.contains(query))
|
| 1889 |
+
df = df[reduce(lambda a, b: a | b, masks)]
|
| 1890 |
+
|
| 1891 |
+
# Apply the model type filtering
|
| 1892 |
+
if model_types != MODEL_TYPES:
|
| 1893 |
+
masks = []
|
| 1894 |
+
for model_type in model_types:
|
| 1895 |
+
if model_type == "Open":
|
| 1896 |
+
masks.append(df["Model Size (Million Parameters)"] != "")
|
| 1897 |
+
elif model_type == "API":
|
| 1898 |
+
masks.append(df["Model Size (Million Parameters)"] == "")
|
| 1899 |
+
df = df[reduce(lambda a, b: a | b, masks)]
|
| 1900 |
+
|
| 1901 |
+
# Apply the model size filtering
|
| 1902 |
+
if model_sizes != ["?", *NUMERIC_INTERVALS.keys()]:
|
| 1903 |
+
masks = []
|
| 1904 |
+
# Handle the ? only
|
| 1905 |
+
if "?" in model_sizes:
|
| 1906 |
+
masks.append(df["Model Size (Million Parameters)"] == "")
|
| 1907 |
+
model_sizes.remove("?")
|
| 1908 |
+
# Handle the numeric intervals only
|
| 1909 |
+
numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[model_size] for model_size in model_sizes]))
|
| 1910 |
+
sizes = df["Model Size (Million Parameters)"].replace('', 0)
|
| 1911 |
+
masks.append(sizes.apply(lambda size: any(numeric_interval.contains(size))))
|
| 1912 |
+
df = df[reduce(lambda a, b: a | b, masks)]
|
| 1913 |
+
|
| 1914 |
+
output_dataframes.append(df)
|
| 1915 |
+
return output_dataframes
|
| 1916 |
+
|
| 1917 |
with gr.Blocks(css=css) as block:
|
| 1918 |
|
| 1919 |
# Store the current task and language for updating the URL. This is a bit hacky, but it works
|
|
|
|
| 1925 |
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
| 1926 |
""")
|
| 1927 |
|
| 1928 |
+
with gr.Row():
|
| 1929 |
+
search_bar = gr.Textbox(
|
| 1930 |
+
label="Search Bar",
|
| 1931 |
+
placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press enter...",
|
| 1932 |
+
)
|
| 1933 |
+
filter_model_type = gr.CheckboxGroup(
|
| 1934 |
+
label="Model types",
|
| 1935 |
+
choices=MODEL_TYPES,
|
| 1936 |
+
value=MODEL_TYPES,
|
| 1937 |
+
interactive=True,
|
| 1938 |
+
elem_classes=["filter-checkbox-group"]
|
| 1939 |
+
)
|
| 1940 |
+
filter_model_sizes = gr.CheckboxGroup(
|
| 1941 |
+
label="Model sizes (in number of parameters)",
|
| 1942 |
+
choices=["?"] + list(NUMERIC_INTERVALS.keys()),
|
| 1943 |
+
value=["?"] + list(NUMERIC_INTERVALS.keys()),
|
| 1944 |
+
interactive=True,
|
| 1945 |
+
elem_classes=["filter-checkbox-group"]
|
| 1946 |
+
)
|
| 1947 |
+
|
| 1948 |
with gr.Tabs() as outer_tabs:
|
| 1949 |
# Store the tabs for updating them on load based on URL parameters
|
| 1950 |
tabs.append(outer_tabs)
|
|
|
|
| 1981 |
|
| 1982 |
with gr.Row():
|
| 1983 |
datatype = ["number", "markdown"] + ["number"] * len(item["data"])
|
| 1984 |
+
dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", height=500)
|
| 1985 |
dataframes.append(dataframe)
|
| 1986 |
|
| 1987 |
+
full_dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", visible=False)
|
| 1988 |
+
full_dataframes.append(full_dataframe)
|
| 1989 |
+
|
| 1990 |
with gr.Row():
|
| 1991 |
refresh_button = gr.Button("Refresh")
|
| 1992 |
refresh_button.click(item["refresh"], inputs=None, outputs=dataframe)
|
|
|
|
| 2033 |
|
| 2034 |
block.load(set_tabs_on_load, inputs=[], outputs=tabs + [current_task_language, language_per_task])
|
| 2035 |
|
| 2036 |
+
search_bar.submit(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
|
| 2037 |
+
filter_model_type.change(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
|
| 2038 |
+
filter_model_sizes.change(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
|
| 2039 |
+
|
| 2040 |
block.queue(max_size=10)
|
| 2041 |
block.launch()
|
| 2042 |
|