Spaces:
Build error
Build error
remove sync
Browse files- app.py +2 -9
- src/populate.py +2 -2
app.py
CHANGED
|
@@ -56,21 +56,14 @@ def restart_space():
|
|
| 56 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
| 57 |
|
| 58 |
|
| 59 |
-
def init_space(
|
| 60 |
dataset_df = get_dataset_summary_table(file_path='blog/Hallucination-Leaderboard-Summary.csv')
|
| 61 |
|
| 62 |
if socket.gethostname() not in {'neuromancer'}:
|
| 63 |
# sync model_type with open-llm-leaderboard
|
| 64 |
ui_snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
| 65 |
ui_snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
| 66 |
-
|
| 67 |
-
if update_model_type_with_open_llm:
|
| 68 |
-
from src.envs import EVAL_REQUESTS_PATH_OPEN_LLM, QUEUE_REPO_OPEN_LLM
|
| 69 |
-
ui_snapshot_download(repo_id=QUEUE_REPO_OPEN_LLM, local_dir=EVAL_REQUESTS_PATH_OPEN_LLM, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
| 70 |
-
else:
|
| 71 |
-
EVAL_REQUESTS_PATH_OPEN_LLM = ""
|
| 72 |
-
|
| 73 |
-
raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, EVAL_REQUESTS_PATH_OPEN_LLM, COLS, BENCHMARK_COLS)
|
| 74 |
|
| 75 |
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 76 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
|
|
|
| 56 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
| 57 |
|
| 58 |
|
| 59 |
+
def init_space():
|
| 60 |
dataset_df = get_dataset_summary_table(file_path='blog/Hallucination-Leaderboard-Summary.csv')
|
| 61 |
|
| 62 |
if socket.gethostname() not in {'neuromancer'}:
|
| 63 |
# sync model_type with open-llm-leaderboard
|
| 64 |
ui_snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
| 65 |
ui_snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
| 66 |
+
raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, "", COLS, BENCHMARK_COLS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 69 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
src/populate.py
CHANGED
|
@@ -21,8 +21,8 @@ def get_leaderboard_df(results_path: str,
|
|
| 21 |
is_backend: bool = False) -> tuple[list[EvalResult], pd.DataFrame]:
|
| 22 |
# Returns a list of EvalResult
|
| 23 |
raw_data: list[EvalResult] = get_raw_eval_results(results_path, requests_path, requests_path_open_llm)
|
| 24 |
-
|
| 25 |
-
|
| 26 |
raw_data[result_idx] = update_model_type_with_open_llm_request_file(raw_data[result_idx], requests_path_open_llm)
|
| 27 |
|
| 28 |
all_data_json_ = [v.to_dict() for v in raw_data if v.is_complete()]
|
|
|
|
| 21 |
is_backend: bool = False) -> tuple[list[EvalResult], pd.DataFrame]:
|
| 22 |
# Returns a list of EvalResult
|
| 23 |
raw_data: list[EvalResult] = get_raw_eval_results(results_path, requests_path, requests_path_open_llm)
|
| 24 |
+
if requests_path_open_llm != "":
|
| 25 |
+
for result_idx in tqdm(range(len(raw_data)), desc="updating model type with open llm leaderboard"):
|
| 26 |
raw_data[result_idx] = update_model_type_with_open_llm_request_file(raw_data[result_idx], requests_path_open_llm)
|
| 27 |
|
| 28 |
all_data_json_ = [v.to_dict() for v in raw_data if v.is_complete()]
|