Spaces:
Runtime error
Runtime error
Merge branch 'main' of https://huggingface.co/spaces/open-rl-leaderboard/backend
Browse files- README.md +2 -0
- src/backend.py +35 -30
- src/evaluation.py +1 -1
- texts/getting_my_agent_evaluated.md +3 -4
README.md
CHANGED
|
@@ -8,4 +8,6 @@ sdk_version: 4.20.0
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: true
|
| 10 |
license: apache-2.0
|
|
|
|
|
|
|
| 11 |
---
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: true
|
| 10 |
license: apache-2.0
|
| 11 |
+
tags:
|
| 12 |
+
- leaderboard
|
| 13 |
---
|
src/backend.py
CHANGED
|
@@ -39,41 +39,43 @@ def _backend_routine():
|
|
| 39 |
evaluated_models.add((report["config"]["model_id"], report["config"]["model_sha"]))
|
| 40 |
|
| 41 |
# Find the models that are not associated with any results
|
| 42 |
-
pending_models = set(compatible_models) - evaluated_models
|
| 43 |
logger.info(f"Found {len(pending_models)} pending models")
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
# Run an evaluation on the models
|
| 46 |
with tempfile.TemporaryDirectory() as tmp_dir:
|
| 47 |
commits = []
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
)
|
| 77 |
|
| 78 |
|
| 79 |
def backend_routine():
|
|
@@ -81,3 +83,6 @@ def backend_routine():
|
|
| 81 |
_backend_routine()
|
| 82 |
except Exception as e:
|
| 83 |
logger.error(f"{e.__class__.__name__}: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
evaluated_models.add((report["config"]["model_id"], report["config"]["model_sha"]))
|
| 40 |
|
| 41 |
# Find the models that are not associated with any results
|
| 42 |
+
pending_models = list(set(compatible_models) - evaluated_models)
|
| 43 |
logger.info(f"Found {len(pending_models)} pending models")
|
| 44 |
|
| 45 |
+
if len(pending_models) == 0:
|
| 46 |
+
return None
|
| 47 |
+
|
| 48 |
# Run an evaluation on the models
|
| 49 |
with tempfile.TemporaryDirectory() as tmp_dir:
|
| 50 |
commits = []
|
| 51 |
+
model_id, sha = pending_models[0]
|
| 52 |
+
logger.info(f"Running evaluation on {model_id}")
|
| 53 |
+
report = {"config": {"model_id": model_id, "model_sha": sha}}
|
| 54 |
+
try:
|
| 55 |
+
evaluations = evaluate(model_id, revision=sha)
|
| 56 |
+
except Exception as e:
|
| 57 |
+
logger.error(f"Error evaluating {model_id}: {e}")
|
| 58 |
+
evaluations = None
|
| 59 |
+
|
| 60 |
+
if evaluations is not None:
|
| 61 |
+
report["results"] = evaluations
|
| 62 |
+
report["status"] = "DONE"
|
| 63 |
+
else:
|
| 64 |
+
report["status"] = "FAILED"
|
| 65 |
+
|
| 66 |
+
# Update the results
|
| 67 |
+
dumped = json.dumps(report, indent=2)
|
| 68 |
+
path_in_repo = f"{model_id}/results_{sha}.json"
|
| 69 |
+
local_path = os.path.join(tmp_dir, path_in_repo)
|
| 70 |
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
| 71 |
+
with open(local_path, "w") as f:
|
| 72 |
+
f.write(dumped)
|
| 73 |
+
|
| 74 |
+
commits.append(CommitOperationAdd(path_in_repo=path_in_repo, path_or_fileobj=local_path))
|
| 75 |
+
|
| 76 |
+
API.create_commit(
|
| 77 |
+
repo_id=RESULTS_REPO, commit_message="Add evaluation results", operations=commits, repo_type="dataset"
|
| 78 |
+
)
|
|
|
|
| 79 |
|
| 80 |
|
| 81 |
def backend_routine():
|
|
|
|
| 83 |
_backend_routine()
|
| 84 |
except Exception as e:
|
| 85 |
logger.error(f"{e.__class__.__name__}: {str(e)}")
|
| 86 |
+
|
| 87 |
+
if __name__=="__main__":
|
| 88 |
+
backend_routine()
|
src/evaluation.py
CHANGED
|
@@ -347,7 +347,7 @@ def evaluate(model_id, revision):
|
|
| 347 |
|
| 348 |
# Evaluate the agent on the environments
|
| 349 |
for env_id in env_ids:
|
| 350 |
-
envs = gym.vector.SyncVectorEnv([make(env_id) for _ in range(
|
| 351 |
observations, _ = envs.reset()
|
| 352 |
episodic_returns = []
|
| 353 |
while len(episodic_returns) < NUM_EPISODES:
|
|
|
|
| 347 |
|
| 348 |
# Evaluate the agent on the environments
|
| 349 |
for env_id in env_ids:
|
| 350 |
+
envs = gym.vector.SyncVectorEnv([make(env_id) for _ in range(1)])
|
| 351 |
observations, _ = envs.reset()
|
| 352 |
episodic_returns = []
|
| 353 |
while len(episodic_returns) < NUM_EPISODES:
|
texts/getting_my_agent_evaluated.md
CHANGED
|
@@ -93,7 +93,7 @@ class Agent(nn.Module):
|
|
| 93 |
agent = Agent(policy) # instantiate the agent
|
| 94 |
|
| 95 |
# A few tests to check if the agent is working
|
| 96 |
-
observations = torch.
|
| 97 |
actions = agent(observations)
|
| 98 |
actions = actions.numpy()[0]
|
| 99 |
assert env.action_space.contains(actions)
|
|
@@ -109,10 +109,9 @@ from huggingface_hub import metadata_save, HfApi
|
|
| 109 |
|
| 110 |
# Save model along with its card
|
| 111 |
metadata_save("model_card.md", {"tags": ["reinforcement-learning", env_id]})
|
| 112 |
-
dummy_input = torch.
|
| 113 |
agent = torch.jit.trace(agent.eval(), dummy_input)
|
| 114 |
-
agent = torch.jit.freeze(agent) # required for
|
| 115 |
-
agent = torch.jit.optimize_for_inference(agent)
|
| 116 |
torch.jit.save(agent, "agent.pt")
|
| 117 |
|
| 118 |
# Upload model and card to the 🤗 Hub
|
|
|
|
| 93 |
agent = Agent(policy) # instantiate the agent
|
| 94 |
|
| 95 |
# A few tests to check if the agent is working
|
| 96 |
+
observations = torch.randn(env.observation_space.shape).unsqueeze(0) # dummy batch of observations
|
| 97 |
actions = agent(observations)
|
| 98 |
actions = actions.numpy()[0]
|
| 99 |
assert env.action_space.contains(actions)
|
|
|
|
| 109 |
|
| 110 |
# Save model along with its card
|
| 111 |
metadata_save("model_card.md", {"tags": ["reinforcement-learning", env_id]})
|
| 112 |
+
dummy_input = torch.randn(env.observation_space.shape).unsqueeze(0) # dummy batch of observations
|
| 113 |
agent = torch.jit.trace(agent.eval(), dummy_input)
|
| 114 |
+
agent = torch.jit.freeze(agent) # required for the model not to depend on the training library
|
|
|
|
| 115 |
torch.jit.save(agent, "agent.pt")
|
| 116 |
|
| 117 |
# Upload model and card to the 🤗 Hub
|