Spaces:

osunlp
/

TravelPlannerLeaderboard

Running

App Files Files Community

hsaest commited on Jan 14, 2024

Commit

2024176

verified ·

1 Parent(s): eedb382

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -7

app.py CHANGED Viewed

@@ -25,16 +25,16 @@ TOKEN = os.environ.get("TOKEN", None)
 OWNER="osunlp"
 DATA_DATASET = f"{OWNER}/TravelBench"
 EVAL_DATASET = f"{OWNER}/TravelBenchEval"
 api = HfApi()
-YEAR_VERSION = "2024"
 os.makedirs("scored", exist_ok=True)
 # # Display the results
-query_data_list  = load_dataset('osunlp/TravelBenchEval','validation',token=TOKEN)['validation']
-eval_results = load_dataset(EVAL_DATASET, 'scores', token=TOKEN)
 def get_dataframe_from_results(eval_results, split):
     local_df = eval_results[split]
     local_df = local_df.remove_columns(["Mail"])
@@ -84,7 +84,7 @@ def add_new_eval(
     # Save submitted file
     api.upload_file(
-        repo_id=EVAL_DATASET,
         path_or_fileobj=path_to_file.name,
         path_in_repo=f"{organization}/{val_or_test}_{eval_mode}_{planning_strategy}_raw_{datetime.datetime.today()}.jsonl",
         repo_type="dataset",
@@ -99,7 +99,7 @@ def add_new_eval(
     # Save scored file
     api.upload_file(
-        repo_id=EVAL_DATASET,
         path_or_fileobj=f"scored/{organization}_{val_or_test}_{eval_mode}_{planning_strategy}.jsonl",
         path_in_repo=f"{organization}/{model}/{val_or_test}_{eval_mode}_{planning_strategy}_scored_{datetime.datetime.today()}.jsonl",
         repo_type="dataset",
@@ -124,13 +124,13 @@ def add_new_eval(
     print(eval_results)
-    eval_results.push_to_hub(EVAL_DATASET, config_name = 'scores', token=TOKEN)
     return format_log(f"Model {model} submitted by {organization} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
 def refresh():
-    eval_results = load_dataset(EVAL_DATASET, 'scores', token=TOKEN)
     eval_dataframe_val = get_dataframe_from_results(eval_results=eval_results, split="validation")
     eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
     return eval_dataframe_val, eval_dataframe_test
@@ -196,3 +196,4 @@ with demo:
 # scheduler.add_job(restart_space, "interval", seconds=3600)
 # scheduler.start()
 demo.launch(debug=True)

 OWNER="osunlp"
 DATA_DATASET = f"{OWNER}/TravelBench"
 EVAL_DATASET = f"{OWNER}/TravelBenchEval"
+RESULTS_DATASET = f"{OWNER}/TravelBenchPublicResults"
 api = HfApi()
+# 'scores' = "2024"
 os.makedirs("scored", exist_ok=True)
 # # Display the results
+eval_results = load_dataset(RESULTS_DATASET, 'scores', token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
 def get_dataframe_from_results(eval_results, split):
     local_df = eval_results[split]
     local_df = local_df.remove_columns(["Mail"])
     # Save submitted file
     api.upload_file(
+        repo_id=RESULTS_DATASET,
         path_or_fileobj=path_to_file.name,
         path_in_repo=f"{organization}/{val_or_test}_{eval_mode}_{planning_strategy}_raw_{datetime.datetime.today()}.jsonl",
         repo_type="dataset",
     # Save scored file
     api.upload_file(
+        repo_id=RESULTS_DATASET,
         path_or_fileobj=f"scored/{organization}_{val_or_test}_{eval_mode}_{planning_strategy}.jsonl",
         path_in_repo=f"{organization}/{model}/{val_or_test}_{eval_mode}_{planning_strategy}_scored_{datetime.datetime.today()}.jsonl",
         repo_type="dataset",
     print(eval_results)
+    eval_results.push_to_hub(RESULTS_DATASET, config_name = 'scores', token=TOKEN)
     return format_log(f"Model {model} submitted by {organization} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
 def refresh():
+    eval_results = load_dataset(RESULTS_DATASET, 'scores', token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
     eval_dataframe_val = get_dataframe_from_results(eval_results=eval_results, split="validation")
     eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
     return eval_dataframe_val, eval_dataframe_test
 # scheduler.add_job(restart_space, "interval", seconds=3600)
 # scheduler.start()
 demo.launch(debug=True)