Commit
·
10e69e7
1
Parent(s):
7498e81
Leaderboard updates, no more anonymous (tracking via huggingface
Browse files- about.py +44 -2
- app.py +17 -31
- constants.py +3 -1
- submit.py +47 -31
- utils.py +9 -5
about.py
CHANGED
|
@@ -22,9 +22,51 @@ TODO
|
|
| 22 |
We'd like to add some more existing models to the leaderboard. Some examples of models we'd like to add:
|
| 23 |
- TODO
|
| 24 |
|
| 25 |
-
|
| 26 |
|
| 27 |
"""
|
|
|
|
| 28 |
FAQS = {
|
| 29 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
}
|
|
|
|
| 22 |
We'd like to add some more existing models to the leaderboard. Some examples of models we'd like to add:
|
| 23 |
- TODO
|
| 24 |
|
| 25 |
+
### FAQs
|
| 26 |
|
| 27 |
"""
|
| 28 |
+
# Note(Lood): Let's track these FAQs in the main Google Doc and have that remain the source of truth.
|
| 29 |
FAQS = {
|
| 30 |
+
"Is there a fee to enter?": "No. Participation is free of charge.",
|
| 31 |
+
"Who can participate?": "Anyone. We encourage academic labs, individuals, and especially industry teams who use developability models in production.",
|
| 32 |
+
"Where can I find more information about the methods used to generate the data?": (
|
| 33 |
+
"Our [PROPHET-Ab preprint](https://www.biorxiv.org/content/10.1101/2025.05.01.651684v1) described in detail the methods used to generate the training dataset. "
|
| 34 |
+
"Note: These assays may differ from previously published methods, and these correlations between literature data and experimental data are also described in the preprint. "
|
| 35 |
+
"These same methods are used to generate the heldout test data."
|
| 36 |
+
),
|
| 37 |
+
"How were the heldout sequences designed?": (
|
| 38 |
+
"We sampled 80 paired antibody sequences from [OAS](https://opig.stats.ox.ac.uk/webapps/oas/). We tried to represent the range of germline variants, sequence identities to germline, and CDR3 lengths. "
|
| 39 |
+
"The sequences in the dataset are quite diverse as measured by pairwise sequence identity."
|
| 40 |
+
),
|
| 41 |
+
"Do I need to design new proteins?": (
|
| 42 |
+
"No. This is just a predictive competition, which will be judged according to the correlation between predictions and experimental values. "
|
| 43 |
+
"There may be a generative round in the future."
|
| 44 |
+
),
|
| 45 |
+
"Can I participate anonymously?": (
|
| 46 |
+
"Yes! Please create an anonymous Hugging Face account so that we can uniquely associate submissions. "
|
| 47 |
+
"Note that top participants will be contacted to identify themselves at the end of the tournament."
|
| 48 |
+
),
|
| 49 |
+
"How is intellectual property handled?": (
|
| 50 |
+
"Participants retain IP rights to the methods they use and develop during the tournament. Read more details in our terms here [link]."
|
| 51 |
+
),
|
| 52 |
+
"Do I need to submit my code / methods in order to participate?": (
|
| 53 |
+
"No, there are no requirements to submit code / methods and submitted predictions remain private."
|
| 54 |
+
"We also have an optional field for including a short model description. "
|
| 55 |
+
"Top performing participants will be requested to identify themselves at the end of the tournament. "
|
| 56 |
+
"There will be one prize for the best open-source model, which will require code / methods to be available."
|
| 57 |
+
),
|
| 58 |
+
"How are winners determined?": (
|
| 59 |
+
"There will be 6 prizes (one for each of the assay properties plus an “open-source” prize). "
|
| 60 |
+
"For the property-specific prizes, winners will be determined by the submission with the highest Spearman rank correlation coefficient on the private holdout set. "
|
| 61 |
+
"For the “open-source prize”, this will be determined by the highest average Spearman across all properties. "
|
| 62 |
+
"We reserve the right to award the open-source prize to a predictor with competitive results for a subset of properties (e.g. a top polyreactivity model)."
|
| 63 |
+
),
|
| 64 |
+
"How does the open-source prize work?": (
|
| 65 |
+
"Participants who open-source their code and methods will be eligible for the open-source prize (as well as the other prizes)."
|
| 66 |
+
),
|
| 67 |
+
"What do I need to submit?": (
|
| 68 |
+
"There is a '✉️ Submit' tab on the Hugging Face competition page to upload predictions for datasets - for each dataset participants need to submit a CSV containing a column for each property they would like to predict (e.g. called “HIC”), "
|
| 69 |
+
"and a row with the sequence matching the sequence in the input file. These predictions are then evaluated in the backend using the Spearman rank correlation between predictions and experimental values, "
|
| 70 |
+
"and these metrics are then added to the leaderboard. Predictions remain private and are not seen by other contestants."
|
| 71 |
+
),
|
| 72 |
}
|
app.py
CHANGED
|
@@ -10,6 +10,7 @@ from constants import (
|
|
| 10 |
ASSAY_EMOJIS,
|
| 11 |
ASSAY_DESCRIPTION,
|
| 12 |
EXAMPLE_FILE_DICT,
|
|
|
|
| 13 |
)
|
| 14 |
from about import ABOUT_TEXT, FAQS
|
| 15 |
from submit import make_submission
|
|
@@ -20,36 +21,25 @@ def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None)
|
|
| 20 |
# Having a submission time column, and a user column where the username is clickable (this is a pro for usability but con for anonymity)
|
| 21 |
# full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
|
| 22 |
# to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
|
| 23 |
-
column_order = ["model", "property", "spearman", "spearman_cross_val"]
|
| 24 |
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
|
| 25 |
if assay is not None:
|
| 26 |
df = df[df["assay"] == assay]
|
| 27 |
-
df = df[
|
| 28 |
return df.sort_values(by="spearman", ascending=False)
|
| 29 |
|
| 30 |
|
| 31 |
-
# Cache the results to avoid multiple downloads
|
| 32 |
-
_cached_results = None
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
def get_cached_results():
|
| 36 |
-
global _cached_results
|
| 37 |
-
if _cached_results is None:
|
| 38 |
-
_cached_results = fetch_hf_results()
|
| 39 |
-
return _cached_results
|
| 40 |
-
|
| 41 |
-
|
| 42 |
def get_leaderboard_object(assay: str | None = None):
|
| 43 |
-
filter_columns = ["
|
| 44 |
if assay is None:
|
| 45 |
filter_columns.append("property")
|
| 46 |
# TODO how to sort filter columns alphabetically?
|
|
|
|
| 47 |
Leaderboard(
|
| 48 |
# TODO(Lood) check that this actually refreshes using the function
|
| 49 |
-
value=format_leaderboard_table(df_results=
|
| 50 |
datatype=["str", "str", "str", "number"],
|
| 51 |
-
select_columns=["model", "property", "spearman", "
|
| 52 |
-
search_columns=["model"],
|
| 53 |
filter_columns=filter_columns,
|
| 54 |
every=60,
|
| 55 |
render=True,
|
|
@@ -57,6 +47,7 @@ def get_leaderboard_object(assay: str | None = None):
|
|
| 57 |
|
| 58 |
|
| 59 |
with gr.Blocks() as demo:
|
|
|
|
| 60 |
gr.Markdown("""
|
| 61 |
## Welcome to the Ginkgo Antibody Developability Benchmark!
|
| 62 |
|
|
@@ -75,9 +66,11 @@ with gr.Blocks() as demo:
|
|
| 75 |
width="50vw", # 50% of the "viewport width"
|
| 76 |
)
|
| 77 |
gr.Markdown(ABOUT_TEXT)
|
| 78 |
-
for question, answer in FAQS.items():
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# Procedurally make these 5 tabs
|
| 83 |
for assay in ASSAY_LIST:
|
|
@@ -100,7 +93,7 @@ with gr.Blocks() as demo:
|
|
| 100 |
# Antibody Developability Submission
|
| 101 |
Upload a CSV to get a score!
|
| 102 |
|
| 103 |
-
Please use your Hugging Face account name to submit your model - we use this to track separate submissions,
|
| 104 |
Your submission will be evaluated and added to the leaderboard.
|
| 105 |
"""
|
| 106 |
)
|
|
@@ -114,7 +107,7 @@ with gr.Blocks() as demo:
|
|
| 114 |
with gr.Column():
|
| 115 |
username_input = gr.Textbox(
|
| 116 |
label="Username",
|
| 117 |
-
placeholder="Enter your Hugging Face username
|
| 118 |
info="This will be used to track your submissions, and to update your results if you submit again.",
|
| 119 |
)
|
| 120 |
model_name_input = gr.Textbox(
|
|
@@ -123,7 +116,7 @@ with gr.Blocks() as demo:
|
|
| 123 |
info="This will be displayed on the leaderboard.",
|
| 124 |
)
|
| 125 |
model_description_input = gr.Textbox(
|
| 126 |
-
label="Model Description",
|
| 127 |
placeholder="Brief description of your model and approach",
|
| 128 |
info="Describe your model, training data, or methodology.",
|
| 129 |
lines=3,
|
|
@@ -141,13 +134,6 @@ with gr.Blocks() as demo:
|
|
| 141 |
)
|
| 142 |
submission_file = gr.File(label="Submission CSV")
|
| 143 |
|
| 144 |
-
# If username is empty, set to anonymous submission
|
| 145 |
-
username_input.change(
|
| 146 |
-
fn=lambda x: x if x and x.strip() else None,
|
| 147 |
-
inputs=username_input,
|
| 148 |
-
outputs=user_state,
|
| 149 |
-
)
|
| 150 |
-
|
| 151 |
def update_submission_type_and_file(submission_type):
|
| 152 |
"""
|
| 153 |
Based on the submission type selected in the dropdown,
|
|
@@ -203,7 +189,7 @@ with gr.Blocks() as demo:
|
|
| 203 |
gr.Markdown(
|
| 204 |
"""
|
| 205 |
<div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
|
| 206 |
-
📬 For questions or feedback, contact <a href="mailto:
|
| 207 |
</div>
|
| 208 |
""",
|
| 209 |
elem_id="contact-footer",
|
|
|
|
| 10 |
ASSAY_EMOJIS,
|
| 11 |
ASSAY_DESCRIPTION,
|
| 12 |
EXAMPLE_FILE_DICT,
|
| 13 |
+
LEADERBOARD_DISPLAY_COLUMNS,
|
| 14 |
)
|
| 15 |
from about import ABOUT_TEXT, FAQS
|
| 16 |
from submit import make_submission
|
|
|
|
| 21 |
# Having a submission time column, and a user column where the username is clickable (this is a pro for usability but con for anonymity)
|
| 22 |
# full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
|
| 23 |
# to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
|
|
|
|
| 24 |
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
|
| 25 |
if assay is not None:
|
| 26 |
df = df[df["assay"] == assay]
|
| 27 |
+
df = df[LEADERBOARD_DISPLAY_COLUMNS]
|
| 28 |
return df.sort_values(by="spearman", ascending=False)
|
| 29 |
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def get_leaderboard_object(assay: str | None = None):
|
| 32 |
+
filter_columns = ["dataset"]
|
| 33 |
if assay is None:
|
| 34 |
filter_columns.append("property")
|
| 35 |
# TODO how to sort filter columns alphabetically?
|
| 36 |
+
# Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
|
| 37 |
Leaderboard(
|
| 38 |
# TODO(Lood) check that this actually refreshes using the function
|
| 39 |
+
value=format_leaderboard_table(df_results=fetch_hf_results(), assay=assay),
|
| 40 |
datatype=["str", "str", "str", "number"],
|
| 41 |
+
select_columns=["model", "property", "spearman", "dataset"],
|
| 42 |
+
search_columns=["model"], # Note(Lood): Would be nice to make this clear it's searching on model name
|
| 43 |
filter_columns=filter_columns,
|
| 44 |
every=60,
|
| 45 |
render=True,
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
with gr.Blocks() as demo:
|
| 50 |
+
# TODO: Add Ginkgo logo here on the top right
|
| 51 |
gr.Markdown("""
|
| 52 |
## Welcome to the Ginkgo Antibody Developability Benchmark!
|
| 53 |
|
|
|
|
| 66 |
width="50vw", # 50% of the "viewport width"
|
| 67 |
)
|
| 68 |
gr.Markdown(ABOUT_TEXT)
|
| 69 |
+
for i, (question, answer) in enumerate(FAQS.items()):
|
| 70 |
+
# Would love to make questions bold but accordion doesn't support it
|
| 71 |
+
question = f"{i+1}. {question}"
|
| 72 |
+
with gr.Accordion(question, open=False):
|
| 73 |
+
gr.Markdown(f"*{answer}*") # Italics for answers
|
| 74 |
|
| 75 |
# Procedurally make these 5 tabs
|
| 76 |
for assay in ASSAY_LIST:
|
|
|
|
| 93 |
# Antibody Developability Submission
|
| 94 |
Upload a CSV to get a score!
|
| 95 |
|
| 96 |
+
Please use your Hugging Face account name to submit your model - we use this to track separate submissions, and if you would like to remain anonymous please set up an anonymous huggingface account.
|
| 97 |
Your submission will be evaluated and added to the leaderboard.
|
| 98 |
"""
|
| 99 |
)
|
|
|
|
| 107 |
with gr.Column():
|
| 108 |
username_input = gr.Textbox(
|
| 109 |
label="Username",
|
| 110 |
+
placeholder="Enter your Hugging Face username",
|
| 111 |
info="This will be used to track your submissions, and to update your results if you submit again.",
|
| 112 |
)
|
| 113 |
model_name_input = gr.Textbox(
|
|
|
|
| 116 |
info="This will be displayed on the leaderboard.",
|
| 117 |
)
|
| 118 |
model_description_input = gr.Textbox(
|
| 119 |
+
label="Model Description (optional)",
|
| 120 |
placeholder="Brief description of your model and approach",
|
| 121 |
info="Describe your model, training data, or methodology.",
|
| 122 |
lines=3,
|
|
|
|
| 134 |
)
|
| 135 |
submission_file = gr.File(label="Submission CSV")
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
def update_submission_type_and_file(submission_type):
|
| 138 |
"""
|
| 139 |
Based on the submission type selected in the dropdown,
|
|
|
|
| 189 |
gr.Markdown(
|
| 190 |
"""
|
| 191 |
<div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
|
| 192 |
+
📬 For questions or feedback, contact <a href="mailto:antibodycompetition@ginkgobioworks.com">antibodycompetition@ginkgobioworks.com</a> or visit the Community tab at the top of this page.
|
| 193 |
</div>
|
| 194 |
""",
|
| 195 |
elem_id="contact-footer",
|
constants.py
CHANGED
|
@@ -59,4 +59,6 @@ ORGANIZATION = "ginkgo-datapoints"
|
|
| 59 |
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
|
| 60 |
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
| 59 |
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
|
| 60 |
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
|
| 61 |
|
| 62 |
+
# Leaderboard dataframes
|
| 63 |
+
LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user"] # The columns expected from the results dataset
|
| 64 |
+
LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user"] # After changing assay to property (pretty formatting)
|
submit.py
CHANGED
|
@@ -3,48 +3,26 @@ import tempfile
|
|
| 3 |
from typing import BinaryIO
|
| 4 |
import json
|
| 5 |
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
from datetime import datetime
|
| 8 |
import uuid
|
| 9 |
|
| 10 |
-
from constants import API, SUBMISSIONS_REPO
|
| 11 |
from validation import validate_csv_file, validate_username
|
| 12 |
|
| 13 |
|
| 14 |
-
def
|
| 15 |
-
|
| 16 |
user_state,
|
| 17 |
-
submission_type: str
|
| 18 |
-
model_name: str
|
| 19 |
-
model_description: str
|
| 20 |
):
|
| 21 |
-
user_state = user_state or ANONYMOUS_SUBMISSION_USERNAME
|
| 22 |
-
validate_username(user_state)
|
| 23 |
-
|
| 24 |
-
model_name = model_name.strip()
|
| 25 |
-
model_description = model_description.strip()
|
| 26 |
-
|
| 27 |
-
if not model_name:
|
| 28 |
-
raise gr.Error("Please provide a model name.")
|
| 29 |
-
if not model_description:
|
| 30 |
-
raise gr.Error("Please provide a model description.")
|
| 31 |
-
if submitted_file is None:
|
| 32 |
-
raise gr.Error("Please upload a CSV file before submitting.")
|
| 33 |
-
|
| 34 |
-
file_path = submitted_file.name
|
| 35 |
-
|
| 36 |
-
if not file_path:
|
| 37 |
-
raise gr.Error("Uploaded file object does not have a valid file path.")
|
| 38 |
-
|
| 39 |
-
path_obj = Path(file_path)
|
| 40 |
-
|
| 41 |
-
if path_obj.suffix.lower() != ".csv":
|
| 42 |
-
raise gr.Error("File must be a CSV file. Please upload a .csv file.")
|
| 43 |
-
|
| 44 |
timestamp = datetime.utcnow().isoformat()
|
| 45 |
submission_id = str(uuid.uuid4())
|
| 46 |
|
| 47 |
-
with
|
| 48 |
file_content = f_in.read().decode("utf-8")
|
| 49 |
|
| 50 |
validate_csv_file(file_content, submission_type)
|
|
@@ -58,7 +36,7 @@ def make_submission(
|
|
| 58 |
"submission_time": timestamp,
|
| 59 |
"evaluated": False,
|
| 60 |
"user": user_state,
|
| 61 |
-
"
|
| 62 |
"model_description": model_description,
|
| 63 |
"csv_content": file_content,
|
| 64 |
"dataset": submission_type,
|
|
@@ -77,4 +55,42 @@ def make_submission(
|
|
| 77 |
)
|
| 78 |
Path(tmp_name).unlink()
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."
|
|
|
|
| 3 |
from typing import BinaryIO
|
| 4 |
import json
|
| 5 |
|
| 6 |
+
from click import pass_obj
|
| 7 |
import gradio as gr
|
| 8 |
from datetime import datetime
|
| 9 |
import uuid
|
| 10 |
|
| 11 |
+
from constants import API, SUBMISSIONS_REPO
|
| 12 |
from validation import validate_csv_file, validate_username
|
| 13 |
|
| 14 |
|
| 15 |
+
def upload_submission(
|
| 16 |
+
file_path: Path,
|
| 17 |
user_state,
|
| 18 |
+
submission_type: str,
|
| 19 |
+
model_name: str,
|
| 20 |
+
model_description: str,
|
| 21 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
timestamp = datetime.utcnow().isoformat()
|
| 23 |
submission_id = str(uuid.uuid4())
|
| 24 |
|
| 25 |
+
with file_path.open("rb") as f_in:
|
| 26 |
file_content = f_in.read().decode("utf-8")
|
| 27 |
|
| 28 |
validate_csv_file(file_content, submission_type)
|
|
|
|
| 36 |
"submission_time": timestamp,
|
| 37 |
"evaluated": False,
|
| 38 |
"user": user_state,
|
| 39 |
+
"model_name": model_name,
|
| 40 |
"model_description": model_description,
|
| 41 |
"csv_content": file_content,
|
| 42 |
"dataset": submission_type,
|
|
|
|
| 55 |
)
|
| 56 |
Path(tmp_name).unlink()
|
| 57 |
|
| 58 |
+
def make_submission(
|
| 59 |
+
submitted_file: BinaryIO,
|
| 60 |
+
user_state,
|
| 61 |
+
submission_type: str = "GDPa1",
|
| 62 |
+
model_name: str = "",
|
| 63 |
+
model_description: str = "",
|
| 64 |
+
):
|
| 65 |
+
user_state = user_state
|
| 66 |
+
validate_username(user_state)
|
| 67 |
+
|
| 68 |
+
model_name = model_name.strip()
|
| 69 |
+
model_description = model_description.strip()
|
| 70 |
+
|
| 71 |
+
if not model_name:
|
| 72 |
+
raise gr.Error("Please provide a model name.")
|
| 73 |
+
if not model_description:
|
| 74 |
+
raise gr.Error("Please provide a model description.")
|
| 75 |
+
if submitted_file is None:
|
| 76 |
+
raise gr.Error("Please upload a CSV file before submitting.")
|
| 77 |
+
|
| 78 |
+
file_path = submitted_file.name
|
| 79 |
+
|
| 80 |
+
if not file_path:
|
| 81 |
+
raise gr.Error("Uploaded file object does not have a valid file path.")
|
| 82 |
+
|
| 83 |
+
path_obj = Path(file_path)
|
| 84 |
+
|
| 85 |
+
if path_obj.suffix.lower() != ".csv":
|
| 86 |
+
raise gr.Error("File must be a CSV file. Please upload a .csv file.")
|
| 87 |
+
|
| 88 |
+
upload_submission(
|
| 89 |
+
file_path=path_obj,
|
| 90 |
+
user_state=user_state,
|
| 91 |
+
submission_type=submission_type,
|
| 92 |
+
model_name=model_name,
|
| 93 |
+
model_description=model_description,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."
|
utils.py
CHANGED
|
@@ -7,7 +7,9 @@ import pandas as pd
|
|
| 7 |
from datasets import load_dataset
|
| 8 |
from huggingface_hub import hf_hub_download
|
| 9 |
|
| 10 |
-
from constants import API, SUBMISSIONS_REPO, RESULTS_REPO, ASSAY_RENAME
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# def make_user_clickable(name):
|
| 13 |
# link =f'https://huggingface.co/{name}'
|
|
@@ -23,10 +25,12 @@ def show_output_box(message):
|
|
| 23 |
|
| 24 |
|
| 25 |
def fetch_hf_results():
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
df["property"] = df["assay"].map(ASSAY_RENAME)
|
| 31 |
print(df.head())
|
| 32 |
return df
|
|
|
|
| 7 |
from datasets import load_dataset
|
| 8 |
from huggingface_hub import hf_hub_download
|
| 9 |
|
| 10 |
+
from constants import API, SUBMISSIONS_REPO, RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
|
| 11 |
+
|
| 12 |
+
pd.set_option('display.max_columns', None)
|
| 13 |
|
| 14 |
# def make_user_clickable(name):
|
| 15 |
# link =f'https://huggingface.co/{name}'
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
def fetch_hf_results():
|
| 28 |
+
# Should cache by default if not using force_redownload
|
| 29 |
+
df = load_dataset(
|
| 30 |
+
RESULTS_REPO, data_files="auto_submissions/metrics_all.csv",
|
| 31 |
+
)["train"].to_pandas()
|
| 32 |
+
assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}"
|
| 33 |
+
df = df.drop_duplicates(subset=["model", "assay"])
|
| 34 |
df["property"] = df["assay"].map(ASSAY_RENAME)
|
| 35 |
print(df.head())
|
| 36 |
return df
|