{ "train_losses": [ 0.0 ], "test_losses": [ 0.0 ], "config": { "training_type": "online_ppo", "environment": "MiniHack-Room-5x5-v0", "total_timesteps": 50000, "training_time": 0.019638538360595703, "device": "cuda", "ppo_config": { "learning_rate": 0.0005, "batch_size": 32, "n_epochs": 10, "gamma": 0.99, "vf_coef": 0.5, "ent_coef": 0.01, "max_grad_norm": 0.5 }, "exploration_config": { "use_curiosity": true, "curiosity_lr": 0.0001, "curiosity_forward_coef": 0.2, "curiosity_inverse_coef": 0.8, "use_rnd": false, "rnd_lr": null, "rnd_coef": null }, "model_sources": { "vae_repo_id": "CatkinChen/nethack-vae-hmm", "hmm_repo_id": "CatkinChen/nethack-hmm" } }, "final_train_loss": 0.0, "final_test_loss": 0.0, "total_epochs": 1, "best_train_loss": 0.0, "best_test_loss": 0.0 }