Spaces:
Runtime error
Runtime error
Commit
Β·
61885ca
1
Parent(s):
7d20cd0
udpate submit mechanism
Browse files- app.py +18 -12
- src/display/utils.py +14 -14
- src/leaderboard/read_evals.py +1 -1
- src/populate.py +1 -1
- src/submission/submit.py +26 -12
app.py
CHANGED
|
@@ -154,15 +154,17 @@ with demo:
|
|
| 154 |
|
| 155 |
with gr.Row():
|
| 156 |
with gr.Column():
|
| 157 |
-
|
|
|
|
| 158 |
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
| 166 |
|
| 167 |
# with gr.Column():
|
| 168 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
|
@@ -182,17 +184,21 @@ with demo:
|
|
| 182 |
# )
|
| 183 |
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
| 184 |
|
| 185 |
-
logger.info("
|
| 186 |
submit_button = gr.Button("Submit")
|
| 187 |
submission_result = gr.Markdown()
|
| 188 |
|
| 189 |
-
def add_solution_cbk(
|
| 190 |
-
|
|
|
|
|
|
|
| 191 |
|
| 192 |
submit_button.click(
|
| 193 |
add_solution_cbk,
|
| 194 |
[
|
| 195 |
-
|
|
|
|
|
|
|
| 196 |
submission_file,
|
| 197 |
],
|
| 198 |
submission_result,
|
|
|
|
| 154 |
|
| 155 |
with gr.Row():
|
| 156 |
with gr.Column():
|
| 157 |
+
system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
|
| 158 |
+
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
|
| 159 |
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
| 160 |
+
sys_type_dropdown = gr.Dropdown(
|
| 161 |
+
choices=[t.to_str(" : ") for t in ModelType],
|
| 162 |
+
label=AutoEvalColumn.system_type.name,
|
| 163 |
+
multiselect=False,
|
| 164 |
+
value=ModelType.LLM.to_str(" : "),
|
| 165 |
+
interactive=True,
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
|
| 169 |
# with gr.Column():
|
| 170 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
|
|
|
| 184 |
# )
|
| 185 |
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
| 186 |
|
| 187 |
+
logger.info("Submit button")
|
| 188 |
submit_button = gr.Button("Submit")
|
| 189 |
submission_result = gr.Markdown()
|
| 190 |
|
| 191 |
+
def add_solution_cbk(system_name,
|
| 192 |
+
org,
|
| 193 |
+
sys_type, submission_path):
|
| 194 |
+
return add_new_solutions(lbdb, system_name, org, sys_type, submission_path)
|
| 195 |
|
| 196 |
submit_button.click(
|
| 197 |
add_solution_cbk,
|
| 198 |
[
|
| 199 |
+
system_name_textbox,
|
| 200 |
+
org_textbox,
|
| 201 |
+
sys_type_dropdown,
|
| 202 |
submission_file,
|
| 203 |
],
|
| 204 |
submission_result,
|
src/display/utils.py
CHANGED
|
@@ -86,26 +86,26 @@ class ModelDetails:
|
|
| 86 |
|
| 87 |
|
| 88 |
class ModelType(Enum):
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
IFT = ModelDetails(name="instruction-tuned", symbol="β")
|
| 92 |
-
RL = ModelDetails(name="RL-tuned", symbol="π¦")
|
| 93 |
-
|
| 94 |
|
| 95 |
def to_str(self, separator=" "):
|
| 96 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
| 97 |
|
| 98 |
@staticmethod
|
| 99 |
def from_str(type):
|
| 100 |
-
if "
|
| 101 |
-
return ModelType.
|
| 102 |
-
if "
|
| 103 |
-
return ModelType.
|
| 104 |
-
if "RL-tuned" in type or "π¦" in type:
|
| 105 |
-
|
| 106 |
-
if "instruction-tuned" in type or "β" in type:
|
| 107 |
-
|
| 108 |
-
return ModelType.
|
| 109 |
|
| 110 |
|
| 111 |
class WeightType(Enum):
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
class ModelType(Enum):
|
| 89 |
+
LLM = ModelDetails(name="LLM", symbol="π’")
|
| 90 |
+
AgenticLLM = ModelDetails(name="AgenticLLM", symbol="πΆ")
|
| 91 |
+
# IFT = ModelDetails(name="instruction-tuned", symbol="β")
|
| 92 |
+
# RL = ModelDetails(name="RL-tuned", symbol="π¦")
|
| 93 |
+
Other = ModelDetails(name="Other", symbol="?")
|
| 94 |
|
| 95 |
def to_str(self, separator=" "):
|
| 96 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
| 97 |
|
| 98 |
@staticmethod
|
| 99 |
def from_str(type):
|
| 100 |
+
if "AgenticLLM" in type or "πΆ" in type:
|
| 101 |
+
return ModelType.AgenticLLM
|
| 102 |
+
if "LLM" in type or "π’" in type:
|
| 103 |
+
return ModelType.LLM
|
| 104 |
+
# if "RL-tuned" in type or "π¦" in type:
|
| 105 |
+
# return ModelType.RL
|
| 106 |
+
# if "instruction-tuned" in type or "β" in type:
|
| 107 |
+
# return ModelType.IFT
|
| 108 |
+
return ModelType.Other
|
| 109 |
|
| 110 |
|
| 111 |
class WeightType(Enum):
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -23,7 +23,7 @@ class EvalResult:
|
|
| 23 |
revision: str # commit hash, "" if main
|
| 24 |
results: dict
|
| 25 |
precision: Precision = Precision.Unknown
|
| 26 |
-
model_type: ModelType = ModelType.
|
| 27 |
weight_type: WeightType = WeightType.Original # Original or Adapter
|
| 28 |
architecture: str = "Unknown"
|
| 29 |
license: str = "?"
|
|
|
|
| 23 |
revision: str # commit hash, "" if main
|
| 24 |
results: dict
|
| 25 |
precision: Precision = Precision.Unknown
|
| 26 |
+
model_type: ModelType = ModelType.LLM # Pretrained, fine tuned, ...
|
| 27 |
weight_type: WeightType = WeightType.Original # Original or Adapter
|
| 28 |
architecture: str = "Unknown"
|
| 29 |
license: str = "?"
|
src/populate.py
CHANGED
|
@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
| 18 |
df = df[cols].round(decimals=2)
|
| 19 |
|
| 20 |
# filter out if any of the benchmarks have not been produced
|
|
|
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
+
df = df.sort_values(by=[AutoEvalColumn.success_rate.name], ascending=False)
|
| 18 |
df = df[cols].round(decimals=2)
|
| 19 |
|
| 20 |
# filter out if any of the benchmarks have not been produced
|
src/submission/submit.py
CHANGED
|
@@ -8,6 +8,7 @@ import pandas as pd
|
|
| 8 |
|
| 9 |
from src.datamodel.data import F1Data
|
| 10 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
|
|
|
| 11 |
from src.envs import API, SUBMISSIONS_REPO, TOKEN
|
| 12 |
from src.logger import get_logger
|
| 13 |
# from src.submission.check_validity import (
|
|
@@ -21,8 +22,9 @@ logger = get_logger(__name__)
|
|
| 21 |
|
| 22 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
| 23 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
if any(type(v) != str for v in pd_ds["formula_name"]):
|
| 27 |
return "Not all formula_name values are of type str"
|
| 28 |
if any(type(v) != str for v in pd_ds["solution"]):
|
|
@@ -38,37 +40,49 @@ def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
|
| 38 |
|
| 39 |
def add_new_solutions(
|
| 40 |
lbdb: F1Data,
|
| 41 |
-
|
|
|
|
|
|
|
| 42 |
submission_path: str,
|
| 43 |
):
|
| 44 |
-
logger.info("ADD SUBMISSION!
|
| 45 |
-
if not
|
| 46 |
-
return styled_error("Please fill
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
if not submission_path:
|
| 49 |
return styled_error("Please upload JSONL solutions file")
|
| 50 |
|
| 51 |
try:
|
| 52 |
-
|
| 53 |
except Exception as e:
|
| 54 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
| 55 |
|
| 56 |
-
validation_error = validate_submission(lbdb,
|
| 57 |
if validation_error:
|
| 58 |
return styled_error(validation_error)
|
| 59 |
|
| 60 |
-
|
|
|
|
| 61 |
|
| 62 |
# Seems good, creating the eval
|
| 63 |
-
print(f"Adding new submission {submission_id}
|
| 64 |
submission_ts = time.time_ns()
|
| 65 |
|
| 66 |
def add_info(row):
|
| 67 |
-
row["
|
|
|
|
|
|
|
| 68 |
row["submission_id"] = submission_id
|
| 69 |
row["submission_ts"] = submission_ts
|
| 70 |
|
| 71 |
-
ds = Dataset.from_pandas(
|
| 72 |
|
| 73 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
| 74 |
# print("Creating eval file")
|
|
|
|
| 8 |
|
| 9 |
from src.datamodel.data import F1Data
|
| 10 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
| 11 |
+
from src.display.utils import ModelType
|
| 12 |
from src.envs import API, SUBMISSIONS_REPO, TOKEN
|
| 13 |
from src.logger import get_logger
|
| 14 |
# from src.submission.check_validity import (
|
|
|
|
| 22 |
|
| 23 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
| 24 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
| 25 |
+
expected_cols = ["formula_name", "solution"]
|
| 26 |
+
if set(pd_ds.columns) != set(expected_cols):
|
| 27 |
+
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
|
| 28 |
if any(type(v) != str for v in pd_ds["formula_name"]):
|
| 29 |
return "Not all formula_name values are of type str"
|
| 30 |
if any(type(v) != str for v in pd_ds["solution"]):
|
|
|
|
| 40 |
|
| 41 |
def add_new_solutions(
|
| 42 |
lbdb: F1Data,
|
| 43 |
+
system_name : str,
|
| 44 |
+
org: str,
|
| 45 |
+
sys_type: str,
|
| 46 |
submission_path: str,
|
| 47 |
):
|
| 48 |
+
logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
|
| 49 |
+
if not system_name:
|
| 50 |
+
return styled_error("Please fill system name")
|
| 51 |
+
|
| 52 |
+
if not org:
|
| 53 |
+
return styled_error("Please fill organization name")
|
| 54 |
+
|
| 55 |
+
if not sys_type:
|
| 56 |
+
return styled_error("Please select system type")
|
| 57 |
+
sys_type = ModelType.from_str(sys_type).name
|
| 58 |
|
| 59 |
if not submission_path:
|
| 60 |
return styled_error("Please upload JSONL solutions file")
|
| 61 |
|
| 62 |
try:
|
| 63 |
+
submission_df = pd.read_json(submission_path, lines=True)
|
| 64 |
except Exception as e:
|
| 65 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
| 66 |
|
| 67 |
+
validation_error = validate_submission(lbdb, submission_df)
|
| 68 |
if validation_error:
|
| 69 |
return styled_error(validation_error)
|
| 70 |
|
| 71 |
+
|
| 72 |
+
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
| 73 |
|
| 74 |
# Seems good, creating the eval
|
| 75 |
+
print(f"Adding new submission: {submission_id}")
|
| 76 |
submission_ts = time.time_ns()
|
| 77 |
|
| 78 |
def add_info(row):
|
| 79 |
+
row["system_name"] = system_name
|
| 80 |
+
row["organization"] = org
|
| 81 |
+
row["system_type"] = sys_type
|
| 82 |
row["submission_id"] = submission_id
|
| 83 |
row["submission_ts"] = submission_ts
|
| 84 |
|
| 85 |
+
ds = Dataset.from_pandas(submission_df).map(add_info)
|
| 86 |
|
| 87 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
| 88 |
# print("Creating eval file")
|