FormulaOne-Leaderboard

Runtime error

App Files Files Community

Alvinn-aai commited on Jul 28

Commit

61885ca

1 Parent(s): 7d20cd0

udpate submit mechanism

Browse files

Files changed (5) hide show

app.py +18 -12
src/display/utils.py +14 -14
src/leaderboard/read_evals.py +1 -1
src/populate.py +1 -1
src/submission/submit.py +26 -12

app.py CHANGED Viewed

@@ -154,15 +154,17 @@ with demo:
             with gr.Row():
                 with gr.Column():
-                    submitter_textbox = gr.Textbox(label="Submitter Name")
                     # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    # model_type = gr.Dropdown(
-                    #     choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                    #     label="Model type",
-                    #     multiselect=False,
-                    #     value=None,
-                    #     interactive=True,
-                    # )
                     # with gr.Column():
                     submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
@@ -182,17 +184,21 @@ with demo:
                     # )
                     # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            logger.info("Submut button")
             submit_button = gr.Button("Submit")
             submission_result = gr.Markdown()
-            def add_solution_cbk(submitter, submission_path):
-                return add_new_solutions(lbdb, submitter, submission_path)
             submit_button.click(
                 add_solution_cbk,
                 [
-                    submitter_textbox,
                     submission_file,
                 ],
                 submission_result,

             with gr.Row():
                 with gr.Column():
+                    system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
+                    org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
                     # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+                    sys_type_dropdown = gr.Dropdown(
+                        choices=[t.to_str(" : ") for t in ModelType],
+                        label=AutoEvalColumn.system_type.name,
+                        multiselect=False,
+                        value=ModelType.LLM.to_str(" : "),
+                        interactive=True,
+                    )
                     # with gr.Column():
                     submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
                     # )
                     # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+            logger.info("Submit button")
             submit_button = gr.Button("Submit")
             submission_result = gr.Markdown()
+            def add_solution_cbk(system_name,
+                    org,
+                    sys_type, submission_path):
+                return add_new_solutions(lbdb, system_name, org, sys_type, submission_path)
             submit_button.click(
                 add_solution_cbk,
                 [
+                    system_name_textbox,
+                    org_textbox,
+                    sys_type_dropdown,
                     submission_file,
                 ],
                 submission_result,

src/display/utils.py CHANGED Viewed

@@ -86,26 +86,26 @@ class ModelDetails:
 class ModelType(Enum):
-    PT = ModelDetails(name="pretrained", symbol="🟢")
-    FT = ModelDetails(name="fine-tuned", symbol="🔶")
-    IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
-    RL = ModelDetails(name="RL-tuned", symbol="🟦")
-    Unknown = ModelDetails(name="", symbol="?")
     def to_str(self, separator=" "):
         return f"{self.value.symbol}{separator}{self.value.name}"
     @staticmethod
     def from_str(type):
-        if "fine-tuned" in type or "🔶" in type:
-            return ModelType.FT
-        if "pretrained" in type or "🟢" in type:
-            return ModelType.PT
-        if "RL-tuned" in type or "🟦" in type:
-            return ModelType.RL
-        if "instruction-tuned" in type or "⭕" in type:
-            return ModelType.IFT
-        return ModelType.Unknown
 class WeightType(Enum):

 class ModelType(Enum):
+    LLM = ModelDetails(name="LLM", symbol="🟢")
+    AgenticLLM = ModelDetails(name="AgenticLLM", symbol="🔶")
+    # IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
+    # RL = ModelDetails(name="RL-tuned", symbol="🟦")
+    Other = ModelDetails(name="Other", symbol="?")
     def to_str(self, separator=" "):
         return f"{self.value.symbol}{separator}{self.value.name}"
     @staticmethod
     def from_str(type):
+        if "AgenticLLM" in type or "🔶" in type:
+            return ModelType.AgenticLLM
+        if "LLM" in type or "🟢" in type:
+            return ModelType.LLM
+        # if "RL-tuned" in type or "🟦" in type:
+        #     return ModelType.RL
+        # if "instruction-tuned" in type or "⭕" in type:
+        #     return ModelType.IFT
+        return ModelType.Other
 class WeightType(Enum):

src/leaderboard/read_evals.py CHANGED Viewed

@@ -23,7 +23,7 @@ class EvalResult:
     revision: str # commit hash, "" if main
     results: dict
     precision: Precision = Precision.Unknown
-    model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
     weight_type: WeightType = WeightType.Original # Original or Adapter
     architecture: str = "Unknown"
     license: str = "?"

     revision: str # commit hash, "" if main
     results: dict
     precision: Precision = Precision.Unknown
+    model_type: ModelType = ModelType.LLM # Pretrained, fine tuned, ...
     weight_type: WeightType = WeightType.Original # Original or Adapter
     architecture: str = "Unknown"
     license: str = "?"

src/populate.py CHANGED Viewed

@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)
-    df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced

     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)
+    df = df.sort_values(by=[AutoEvalColumn.success_rate.name], ascending=False)
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced

src/submission/submit.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 from src.datamodel.data import F1Data
 from src.display.formatting import styled_error, styled_message, styled_warning
 from src.envs import API, SUBMISSIONS_REPO, TOKEN
 from src.logger import get_logger
 # from src.submission.check_validity import (
@@ -21,8 +22,9 @@ logger = get_logger(__name__)
 def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
     logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
-    if set(pd_ds.columns) != set(["formula_name", "solution"]):
-        return "Bad format of submission"
     if any(type(v) != str for v in pd_ds["formula_name"]):
         return "Not all formula_name values are of type str"
     if any(type(v) != str for v in pd_ds["solution"]):
@@ -38,37 +40,49 @@ def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
 def add_new_solutions(
     lbdb: F1Data,
-    submitter: str,
     submission_path: str,
 ):
-    logger.info("ADD SUBMISSION! submitter %s path %s", submitter, submission_path)
-    if not submitter:
-        return styled_error("Please fill submitter name")
     if not submission_path:
         return styled_error("Please upload JSONL solutions file")
     try:
-        pd_ds = pd.read_json(submission_path, lines=True)
     except Exception as e:
         return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
-    validation_error = validate_submission(lbdb, pd_ds)
     if validation_error:
         return styled_error(validation_error)
-    submission_id = datetime.now().strftime("%Y%m%d%H%M%S")
     # Seems good, creating the eval
-    print(f"Adding new submission {submission_id} from {submitter}")
     submission_ts = time.time_ns()
     def add_info(row):
-        row["submitter"] = submitter
         row["submission_id"] = submission_id
         row["submission_ts"] = submission_ts
-    ds = Dataset.from_pandas(pd_ds).map(add_info)
     ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
     # print("Creating eval file")

 from src.datamodel.data import F1Data
 from src.display.formatting import styled_error, styled_message, styled_warning
+from src.display.utils import ModelType
 from src.envs import API, SUBMISSIONS_REPO, TOKEN
 from src.logger import get_logger
 # from src.submission.check_validity import (
 def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
     logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
+    expected_cols = ["formula_name", "solution"]
+    if set(pd_ds.columns) != set(expected_cols):
+        return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
     if any(type(v) != str for v in pd_ds["formula_name"]):
         return "Not all formula_name values are of type str"
     if any(type(v) != str for v in pd_ds["solution"]):
 def add_new_solutions(
     lbdb: F1Data,
+    system_name : str,
+    org: str,
+    sys_type: str,
     submission_path: str,
 ):
+    logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
+    if not system_name:
+        return styled_error("Please fill system name")
+    if not org:
+        return styled_error("Please fill organization name")
+    if not sys_type:
+        return styled_error("Please select system type")
+    sys_type = ModelType.from_str(sys_type).name
     if not submission_path:
         return styled_error("Please upload JSONL solutions file")
     try:
+        submission_df = pd.read_json(submission_path, lines=True)
     except Exception as e:
         return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
+    validation_error = validate_submission(lbdb, submission_df)
     if validation_error:
         return styled_error(validation_error)
+    submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
     # Seems good, creating the eval
+    print(f"Adding new submission: {submission_id}")
     submission_ts = time.time_ns()
     def add_info(row):
+        row["system_name"] = system_name
+        row["organization"] = org
+        row["system_type"] = sys_type
         row["submission_id"] = submission_id
         row["submission_ts"] = submission_ts
+    ds = Dataset.from_pandas(submission_df).map(add_info)
     ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
     # print("Creating eval file")