Spaces:
Runtime error
Runtime error
Commit
Β·
b74992f
1
Parent(s):
80fb2c0
improve validation, testing wip
Browse files- app.py +8 -6
- src/envs.py +1 -1
- src/submission/submit.py +19 -8
app.py
CHANGED
|
@@ -38,12 +38,15 @@ from src.submission.submit import add_new_solutions
|
|
| 38 |
|
| 39 |
logger = get_logger(__name__)
|
| 40 |
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
def restart_space():
|
| 43 |
API.restart_space(repo_id=REPO_ID)
|
| 44 |
|
| 45 |
|
| 46 |
-
lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO)
|
| 47 |
|
| 48 |
logger.info("Initialized LBDB")
|
| 49 |
|
|
@@ -165,7 +168,6 @@ with demo:
|
|
| 165 |
interactive=True,
|
| 166 |
)
|
| 167 |
|
| 168 |
-
|
| 169 |
# with gr.Column():
|
| 170 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
| 171 |
# precision = gr.Dropdown(
|
|
@@ -188,10 +190,10 @@ with demo:
|
|
| 188 |
submit_button = gr.Button("Submit")
|
| 189 |
submission_result = gr.Markdown()
|
| 190 |
|
| 191 |
-
def add_solution_cbk(system_name,
|
| 192 |
-
|
| 193 |
-
sys_type, submission_path
|
| 194 |
-
|
| 195 |
|
| 196 |
submit_button.click(
|
| 197 |
add_solution_cbk,
|
|
|
|
| 38 |
|
| 39 |
logger = get_logger(__name__)
|
| 40 |
|
| 41 |
+
SPLIT = "warmup" # TODO temp
|
| 42 |
+
SKIP_VALIDATION = True # TODO temp
|
| 43 |
+
|
| 44 |
|
| 45 |
def restart_space():
|
| 46 |
API.restart_space(repo_id=REPO_ID)
|
| 47 |
|
| 48 |
|
| 49 |
+
lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT)
|
| 50 |
|
| 51 |
logger.info("Initialized LBDB")
|
| 52 |
|
|
|
|
| 168 |
interactive=True,
|
| 169 |
)
|
| 170 |
|
|
|
|
| 171 |
# with gr.Column():
|
| 172 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
| 173 |
# precision = gr.Dropdown(
|
|
|
|
| 190 |
submit_button = gr.Button("Submit")
|
| 191 |
submission_result = gr.Markdown()
|
| 192 |
|
| 193 |
+
def add_solution_cbk(system_name, org, sys_type, submission_path):
|
| 194 |
+
return add_new_solutions(
|
| 195 |
+
lbdb, system_name, org, sys_type, submission_path, skip_validation=SKIP_VALIDATION
|
| 196 |
+
)
|
| 197 |
|
| 198 |
submit_button.click(
|
| 199 |
add_solution_cbk,
|
src/envs.py
CHANGED
|
@@ -14,7 +14,7 @@ SUBMISSIONS_REPO = f"{OWNER}/dev-f1-leaderboard-submissions"
|
|
| 14 |
RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
|
| 15 |
|
| 16 |
# If you setup a cache later, just change HF_HOME
|
| 17 |
-
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 18 |
|
| 19 |
print(f"{TOKEN=}")
|
| 20 |
print(f"{REPO_ID=}")
|
|
|
|
| 14 |
RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
|
| 15 |
|
| 16 |
# If you setup a cache later, just change HF_HOME
|
| 17 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
| 18 |
|
| 19 |
print(f"{TOKEN=}")
|
| 20 |
print(f"{REPO_ID=}")
|
src/submission/submit.py
CHANGED
|
@@ -3,8 +3,9 @@ import os
|
|
| 3 |
from datetime import datetime, timezone
|
| 4 |
import time
|
| 5 |
|
| 6 |
-
from datasets import Dataset
|
| 7 |
import pandas as pd
|
|
|
|
| 8 |
|
| 9 |
from src.datamodel.data import F1Data
|
| 10 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
|
@@ -25,19 +26,24 @@ logger = get_logger(__name__)
|
|
| 25 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
| 26 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
| 27 |
expected_cols = ["problem_id", "solution"]
|
|
|
|
| 28 |
if set(pd_ds.columns) != set(expected_cols):
|
| 29 |
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
if any(type(v) != str for v in pd_ds["solution"]):
|
| 33 |
return "solution must be of type str"
|
| 34 |
-
|
|
|
|
| 35 |
if submitted_ids != lbdb.code_problem_ids:
|
| 36 |
missing = lbdb.code_problem_ids - submitted_ids
|
| 37 |
unknown = submitted_ids - lbdb.code_problem_ids
|
| 38 |
return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
|
| 39 |
if len(pd_ds) > len(lbdb.code_problem_ids):
|
| 40 |
return "Duplicate problem IDs exist in uploaded file"
|
|
|
|
| 41 |
return None
|
| 42 |
|
| 43 |
|
|
@@ -47,6 +53,7 @@ def add_new_solutions(
|
|
| 47 |
org: str,
|
| 48 |
sys_type: str,
|
| 49 |
submission_path: str,
|
|
|
|
| 50 |
):
|
| 51 |
logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
|
| 52 |
if not system_name:
|
|
@@ -67,9 +74,10 @@ def add_new_solutions(
|
|
| 67 |
except Exception as e:
|
| 68 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
| 75 |
|
|
@@ -86,6 +94,9 @@ def add_new_solutions(
|
|
| 86 |
|
| 87 |
ds = Dataset.from_pandas(submission_df).map(add_info)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
| 90 |
# print("Creating eval file")
|
| 91 |
# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
|
@@ -108,5 +119,5 @@ def add_new_solutions(
|
|
| 108 |
# os.remove(out_path)
|
| 109 |
|
| 110 |
return styled_message(
|
| 111 |
-
"Your request has been submitted to the evaluation queue!\
|
| 112 |
)
|
|
|
|
| 3 |
from datetime import datetime, timezone
|
| 4 |
import time
|
| 5 |
|
| 6 |
+
from datasets import Dataset, DatasetDict
|
| 7 |
import pandas as pd
|
| 8 |
+
from pandas.api.types import is_integer_dtype, is_string_dtype
|
| 9 |
|
| 10 |
from src.datamodel.data import F1Data
|
| 11 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
|
|
|
| 26 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
| 27 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
| 28 |
expected_cols = ["problem_id", "solution"]
|
| 29 |
+
|
| 30 |
if set(pd_ds.columns) != set(expected_cols):
|
| 31 |
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
|
| 32 |
+
|
| 33 |
+
if not is_integer_dtype(pd_ds["problem_id"]):
|
| 34 |
+
return "problem_id must be str convertible to int"
|
| 35 |
+
|
| 36 |
if any(type(v) != str for v in pd_ds["solution"]):
|
| 37 |
return "solution must be of type str"
|
| 38 |
+
|
| 39 |
+
submitted_ids = set(pd_ds.problem_id.astype(str))
|
| 40 |
if submitted_ids != lbdb.code_problem_ids:
|
| 41 |
missing = lbdb.code_problem_ids - submitted_ids
|
| 42 |
unknown = submitted_ids - lbdb.code_problem_ids
|
| 43 |
return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
|
| 44 |
if len(pd_ds) > len(lbdb.code_problem_ids):
|
| 45 |
return "Duplicate problem IDs exist in uploaded file"
|
| 46 |
+
|
| 47 |
return None
|
| 48 |
|
| 49 |
|
|
|
|
| 53 |
org: str,
|
| 54 |
sys_type: str,
|
| 55 |
submission_path: str,
|
| 56 |
+
skip_validation: bool = False,
|
| 57 |
):
|
| 58 |
logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
|
| 59 |
if not system_name:
|
|
|
|
| 74 |
except Exception as e:
|
| 75 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
| 76 |
|
| 77 |
+
if not skip_validation:
|
| 78 |
+
validation_error = validate_submission(lbdb, submission_df)
|
| 79 |
+
if validation_error:
|
| 80 |
+
return styled_error(validation_error)
|
| 81 |
|
| 82 |
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
| 83 |
|
|
|
|
| 94 |
|
| 95 |
ds = Dataset.from_pandas(submission_df).map(add_info)
|
| 96 |
|
| 97 |
+
# dsdict = DatasetDict({submission_id: ds})
|
| 98 |
+
# dsdict.push_to_hub(SUBMISSIONS_REPO, private=True)
|
| 99 |
+
|
| 100 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
| 101 |
# print("Creating eval file")
|
| 102 |
# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
|
|
|
| 119 |
# os.remove(out_path)
|
| 120 |
|
| 121 |
return styled_message(
|
| 122 |
+
"Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
|
| 123 |
)
|