nethack-ppo-ablation-baseline_rnd / training_data.json
CatkinChen's picture
Add training data
d744a97 verified
{
"train_losses": [
0.0003613280423451215,
0.0011523436987772584,
7.812489639036357e-05,
0.0008105469169095159,
-6.835948443040252e-05,
0.0010058593470603228,
-0.00010742194717749953,
-0.00013671873603016138,
-0.0021679687779396772,
0.0005371093284338713,
4.882807843387127e-05,
-0.002119140699505806,
0.0011035155039280653,
0.0008300781482830644,
-0.0007128907018341124,
5.859363591298461e-05,
0.00020507792942225933,
-0.002080078236758709,
-0.0001171876210719347,
-0.0001367187942378223,
-0.00033203139901161194,
-0.000898437574505806,
-0.0005957031971774995,
-0.0028808596543967724,
0.0001269530621357262,
0.0009277343051508069,
0.001748046837747097,
-0.00327148474752903,
-0.0008886720170266926,
-0.0009374999790452421,
0.00028320305864326656,
-0.0006445314502343535,
-0.001103515736758709,
-0.001679687760770321,
-0.0016015628352761269,
0.0008691406110301614,
-0.002128906548023224,
0.0008789062267169356,
0.000732421875,
0.0007812499534338713,
-0.001230468973517418,
-0.0017773439176380634,
0.0009277343051508069,
-0.002490234561264515,
-0.00019531260477378964,
-0.0016992189921438694,
0.00021484363242052495,
-0.00030273443553596735,
-0.00018554690177552402,
-0.0010156250791624188,
-0.0002148438652511686,
-0.00023437506752088666,
6.835925159975886e-05,
0.0003124999348074198,
-0.0009277344215661287,
-0.000859375053551048,
0.000849609321448952,
-3.9062462747097015e-05,
-0.0006054687546566129,
-0.0015429689083248377,
-0.0008496094960719347,
-0.0037011723034083843,
-3.9062550058588386e-05,
0.0005078123649582267,
0.00014648429350927472,
-0.0004101564409211278,
-0.0008300781482830644,
-0.0011914062779396772,
-0.001718750107102096,
0.0001269530621357262,
-8.789071580395103e-05,
0.00041992179467342794,
-0.0028417969588190317,
0.0008789062267169356,
0.0008593749371357262,
0.0001855467853602022,
-0.0018066407646983862,
-0.0006738282972946763,
0.00026367176906205714,
-0.0031542968936264515,
-0.0021386719308793545,
-0.0013183595146983862,
0.0006738281226716936,
-0.0010937501210719347,
0.0004980468656867743,
0.001416015555150807,
-0.00017578131519258022,
-0.0011425783159211278,
0.0005566406180150807,
-0.00024414071231149137,
-0.001914062537252903,
-0.0026953124906867743,
-0.000820312590803951,
0.00038085930282250047,
1.9531144062057137e-05,
-0.0012011720100417733,
0.001015624962747097,
-0.0006835939711891115,
0.0010058593470603228,
0.00031249987659975886,
-0.002109374850988388,
-1.9531260477378964e-05,
0.0009082030737772584,
0.0007617187220603228,
0.0008203124161809683,
0.00039062497671693563,
-0.0006250001024454832,
-0.0010058593470603228,
-0.00030273443553596735,
0.0006640624487772584,
-0.0010058593470603228,
-0.00046874998952262104,
-0.00016601569950580597,
-0.0010351561941206455,
-0.002392577938735485,
-0.00025390629889443517,
0.0002636718563735485,
0.0002929687616415322,
0.000283203087747097,
0.00016601562674622983,
0.0002734375011641532,
-0.000615234486758709,
-0.0010546875419095159,
0.00023437495110556483,
-0.00024414071231149137,
0.0006249999860301614,
-0.0004199219401925802,
-0.0003125000512227416,
-0.0006249999860301614,
0.00041992185288108885,
-0.002695312723517418,
-0.005576171912252903,
-0.0020605470053851604,
0.0007910155691206455,
-0.0032324218191206455,
-0.003095703199505806,
0.0006445312174037099,
0.00048828122089616954,
-0.001220703125,
-3.906257916241884e-05,
-0.0013183595146983862,
-0.0004003906506113708,
0.0004101562371943146,
-0.0011230469681322575,
0.0005371093284338713,
-0.00010742188896983862,
-0.005966797471046448,
-0.000244140625,
-0.0005664062337018549,
-0.0023828125558793545,
0.0005078124813735485,
-0.002197265625,
-0.0010742186568677425,
-0.0021386719308793545,
0.00044921872904524207,
-0.00036132821696810424,
-0.0004589843738358468,
-0.0002343750384170562,
-0.00020507816225290298,
-0.0001367187942378223,
0.0006738281226716936,
0.00048828122089616954,
0.00041992185288108885,
-0.0004199218819849193,
0.0005468750023283064,
-0.004150390625,
0.0004003905924037099,
9.765624417923391e-05,
0.00020507810404524207,
-0.002236328087747097,
-0.0004199218819849193,
-0.0005371095612645149,
4.8828194849193096e-05,
0.0002441405667923391,
-0.002246093936264515,
0.0003320312243886292,
-0.00012695312034338713,
-0.0020703128539025784,
-0.003310547210276127,
-0.0010449220426380634,
-0.0006250001024454832,
0.00034179684007540345,
0.0008789062267169356,
-2.929696347564459e-05,
0.00025390624068677425,
0.00032226560870185494,
-0.0021875002421438694,
-0.0004980469821020961,
0.0003320312243886292,
-0.0008984376909211278,
-0.0006738281808793545,
-0.0010156251955777407,
-0.00011718765017576516,
-0.003017578274011612,
-0.0013671875931322575
],
"test_losses": [
-0.33519999999999983,
-0.1731999999999999,
-0.0009999999999998404,
-0.07839999999999994,
-0.2763999999999999,
-0.2005999999999999,
-0.07279999999999989,
-0.10079999999999996,
-0.07419999999999993,
0.00020000000000011453,
-0.22099999999999997,
-0.15119999999999995,
-0.25819999999999993,
-0.2673999999999999,
-0.2437999999999999,
-0.22659999999999997,
-0.2699999999999999,
-0.24459999999999998,
-0.16359999999999997
],
"config": {
"training_type": "online_ppo",
"environment": "MiniHack-River-Narrow-v0",
"total_timesteps": 195,
"training_time": 5415.230568885803,
"device": "cuda",
"ppo_config": {
"learning_rate": 0.0003,
"n_epochs": 4,
"gamma": 0.99,
"vf_coef": 0.5,
"ent_coef": 0.01,
"max_grad_norm": 0.5
},
"exploration_config": {
"use_curiosity": false,
"curiosity_dyn": false,
"curiosity_skill_entropy": false,
"curiosity_skill_transition_novelty": false,
"curiosity_dyn_coef": 0.03,
"curiosity_hdp_coef": 0.2,
"curiosity_stn_coef": 0.05,
"use_rnd": true,
"rnd_lr": 0.001,
"rnd_coef": 0.002
},
"model_sources": {
"vae_repo_id": null,
"hmm_repo_id": null
}
},
"final_train_loss": -0.0013671875931322575,
"final_test_loss": -0.16359999999999997,
"total_epochs": 195,
"best_train_loss": -0.005966797471046448,
"best_test_loss": -0.33519999999999983
}