Update app.py
Browse files
app.py
CHANGED
|
@@ -25,16 +25,16 @@ TOKEN = os.environ.get("TOKEN", None)
|
|
| 25 |
OWNER="osunlp"
|
| 26 |
DATA_DATASET = f"{OWNER}/TravelBench"
|
| 27 |
EVAL_DATASET = f"{OWNER}/TravelBenchEval"
|
|
|
|
| 28 |
|
| 29 |
api = HfApi()
|
| 30 |
|
| 31 |
-
|
| 32 |
|
| 33 |
os.makedirs("scored", exist_ok=True)
|
| 34 |
|
| 35 |
# # Display the results
|
| 36 |
-
|
| 37 |
-
eval_results = load_dataset(EVAL_DATASET, 'scores', token=TOKEN)
|
| 38 |
def get_dataframe_from_results(eval_results, split):
|
| 39 |
local_df = eval_results[split]
|
| 40 |
local_df = local_df.remove_columns(["Mail"])
|
|
@@ -84,7 +84,7 @@ def add_new_eval(
|
|
| 84 |
|
| 85 |
# Save submitted file
|
| 86 |
api.upload_file(
|
| 87 |
-
repo_id=
|
| 88 |
path_or_fileobj=path_to_file.name,
|
| 89 |
path_in_repo=f"{organization}/{val_or_test}_{eval_mode}_{planning_strategy}_raw_{datetime.datetime.today()}.jsonl",
|
| 90 |
repo_type="dataset",
|
|
@@ -99,7 +99,7 @@ def add_new_eval(
|
|
| 99 |
|
| 100 |
# Save scored file
|
| 101 |
api.upload_file(
|
| 102 |
-
repo_id=
|
| 103 |
path_or_fileobj=f"scored/{organization}_{val_or_test}_{eval_mode}_{planning_strategy}.jsonl",
|
| 104 |
path_in_repo=f"{organization}/{model}/{val_or_test}_{eval_mode}_{planning_strategy}_scored_{datetime.datetime.today()}.jsonl",
|
| 105 |
repo_type="dataset",
|
|
@@ -124,13 +124,13 @@ def add_new_eval(
|
|
| 124 |
|
| 125 |
print(eval_results)
|
| 126 |
|
| 127 |
-
eval_results.push_to_hub(
|
| 128 |
|
| 129 |
return format_log(f"Model {model} submitted by {organization} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
|
| 130 |
|
| 131 |
|
| 132 |
def refresh():
|
| 133 |
-
eval_results = load_dataset(
|
| 134 |
eval_dataframe_val = get_dataframe_from_results(eval_results=eval_results, split="validation")
|
| 135 |
eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
|
| 136 |
return eval_dataframe_val, eval_dataframe_test
|
|
@@ -196,3 +196,4 @@ with demo:
|
|
| 196 |
# scheduler.add_job(restart_space, "interval", seconds=3600)
|
| 197 |
# scheduler.start()
|
| 198 |
demo.launch(debug=True)
|
|
|
|
|
|
| 25 |
OWNER="osunlp"
|
| 26 |
DATA_DATASET = f"{OWNER}/TravelBench"
|
| 27 |
EVAL_DATASET = f"{OWNER}/TravelBenchEval"
|
| 28 |
+
RESULTS_DATASET = f"{OWNER}/TravelBenchPublicResults"
|
| 29 |
|
| 30 |
api = HfApi()
|
| 31 |
|
| 32 |
+
# 'scores' = "2024"
|
| 33 |
|
| 34 |
os.makedirs("scored", exist_ok=True)
|
| 35 |
|
| 36 |
# # Display the results
|
| 37 |
+
eval_results = load_dataset(RESULTS_DATASET, 'scores', token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
|
|
|
| 38 |
def get_dataframe_from_results(eval_results, split):
|
| 39 |
local_df = eval_results[split]
|
| 40 |
local_df = local_df.remove_columns(["Mail"])
|
|
|
|
| 84 |
|
| 85 |
# Save submitted file
|
| 86 |
api.upload_file(
|
| 87 |
+
repo_id=RESULTS_DATASET,
|
| 88 |
path_or_fileobj=path_to_file.name,
|
| 89 |
path_in_repo=f"{organization}/{val_or_test}_{eval_mode}_{planning_strategy}_raw_{datetime.datetime.today()}.jsonl",
|
| 90 |
repo_type="dataset",
|
|
|
|
| 99 |
|
| 100 |
# Save scored file
|
| 101 |
api.upload_file(
|
| 102 |
+
repo_id=RESULTS_DATASET,
|
| 103 |
path_or_fileobj=f"scored/{organization}_{val_or_test}_{eval_mode}_{planning_strategy}.jsonl",
|
| 104 |
path_in_repo=f"{organization}/{model}/{val_or_test}_{eval_mode}_{planning_strategy}_scored_{datetime.datetime.today()}.jsonl",
|
| 105 |
repo_type="dataset",
|
|
|
|
| 124 |
|
| 125 |
print(eval_results)
|
| 126 |
|
| 127 |
+
eval_results.push_to_hub(RESULTS_DATASET, config_name = 'scores', token=TOKEN)
|
| 128 |
|
| 129 |
return format_log(f"Model {model} submitted by {organization} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
|
| 130 |
|
| 131 |
|
| 132 |
def refresh():
|
| 133 |
+
eval_results = load_dataset(RESULTS_DATASET, 'scores', token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
| 134 |
eval_dataframe_val = get_dataframe_from_results(eval_results=eval_results, split="validation")
|
| 135 |
eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
|
| 136 |
return eval_dataframe_val, eval_dataframe_test
|
|
|
|
| 196 |
# scheduler.add_job(restart_space, "interval", seconds=3600)
|
| 197 |
# scheduler.start()
|
| 198 |
demo.launch(debug=True)
|
| 199 |
+
|