{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99467140319716, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "eval_bleu": 0.6549, "eval_gen_len": 14.2911, "eval_loss": 3.51861834526062, "eval_runtime": 90.1677, "eval_samples_per_second": 62.672, "eval_steps_per_second": 1.575, "step": 28 }, { "epoch": 1.99, "eval_bleu": 0.7119, "eval_gen_len": 15.2173, "eval_loss": 3.422013998031616, "eval_runtime": 90.001, "eval_samples_per_second": 62.788, "eval_steps_per_second": 1.578, "step": 56 }, { "epoch": 2.99, "eval_bleu": 0.7933, "eval_gen_len": 15.77, "eval_loss": 3.3816401958465576, "eval_runtime": 90.2248, "eval_samples_per_second": 62.632, "eval_steps_per_second": 1.574, "step": 84 }, { "epoch": 3.99, "eval_bleu": 0.828, "eval_gen_len": 15.9515, "eval_loss": 3.357332944869995, "eval_runtime": 90.0431, "eval_samples_per_second": 62.759, "eval_steps_per_second": 1.577, "step": 112 }, { "epoch": 4.99, "eval_bleu": 0.8564, "eval_gen_len": 15.9135, "eval_loss": 3.3394410610198975, "eval_runtime": 90.2255, "eval_samples_per_second": 62.632, "eval_steps_per_second": 1.574, "step": 140 }, { "epoch": 5.99, "eval_bleu": 0.9038, "eval_gen_len": 15.9788, "eval_loss": 3.324686050415039, "eval_runtime": 90.177, "eval_samples_per_second": 62.666, "eval_steps_per_second": 1.575, "step": 168 }, { "epoch": 6.99, "eval_bleu": 0.9239, "eval_gen_len": 16.1274, "eval_loss": 3.312596082687378, "eval_runtime": 90.2242, "eval_samples_per_second": 62.633, "eval_steps_per_second": 1.574, "step": 196 }, { "epoch": 7.99, "eval_bleu": 0.9535, "eval_gen_len": 16.1568, "eval_loss": 3.3029773235321045, "eval_runtime": 89.9892, "eval_samples_per_second": 62.796, "eval_steps_per_second": 1.578, "step": 224 }, { "epoch": 8.99, "eval_bleu": 0.9392, "eval_gen_len": 16.2577, "eval_loss": 3.2943568229675293, "eval_runtime": 90.2103, "eval_samples_per_second": 62.643, "eval_steps_per_second": 1.574, "step": 252 }, { "epoch": 9.99, "eval_bleu": 1.0057, "eval_gen_len": 16.4258, "eval_loss": 3.286745548248291, "eval_runtime": 90.3866, "eval_samples_per_second": 62.52, "eval_steps_per_second": 1.571, "step": 280 }, { "epoch": 10.99, "eval_bleu": 1.0136, "eval_gen_len": 16.3022, "eval_loss": 3.2792539596557617, "eval_runtime": 90.1642, "eval_samples_per_second": 62.675, "eval_steps_per_second": 1.575, "step": 308 }, { "epoch": 11.99, "eval_bleu": 1.0151, "eval_gen_len": 16.3868, "eval_loss": 3.273854970932007, "eval_runtime": 90.0094, "eval_samples_per_second": 62.782, "eval_steps_per_second": 1.578, "step": 336 }, { "epoch": 12.99, "eval_bleu": 1.0137, "eval_gen_len": 16.2493, "eval_loss": 3.2679247856140137, "eval_runtime": 90.2543, "eval_samples_per_second": 62.612, "eval_steps_per_second": 1.573, "step": 364 }, { "epoch": 13.99, "eval_bleu": 1.0062, "eval_gen_len": 16.3263, "eval_loss": 3.263322353363037, "eval_runtime": 90.0803, "eval_samples_per_second": 62.733, "eval_steps_per_second": 1.576, "step": 392 }, { "epoch": 14.99, "eval_bleu": 1.0205, "eval_gen_len": 16.1936, "eval_loss": 3.258091449737549, "eval_runtime": 90.2141, "eval_samples_per_second": 62.64, "eval_steps_per_second": 1.574, "step": 420 }, { "epoch": 15.99, "eval_bleu": 1.0392, "eval_gen_len": 16.2488, "eval_loss": 3.254173755645752, "eval_runtime": 90.1523, "eval_samples_per_second": 62.683, "eval_steps_per_second": 1.575, "step": 448 }, { "epoch": 16.99, "eval_bleu": 1.0613, "eval_gen_len": 16.1219, "eval_loss": 3.249685764312744, "eval_runtime": 90.3649, "eval_samples_per_second": 62.535, "eval_steps_per_second": 1.571, "step": 476 }, { "epoch": 17.85, "learning_rate": 3.2142857142857144e-05, "loss": 3.4648, "step": 500 }, { "epoch": 17.99, "eval_bleu": 1.0755, "eval_gen_len": 16.2776, "eval_loss": 3.2462830543518066, "eval_runtime": 90.126, "eval_samples_per_second": 62.701, "eval_steps_per_second": 1.576, "step": 504 }, { "epoch": 18.99, "eval_bleu": 1.0772, "eval_gen_len": 16.3072, "eval_loss": 3.2422850131988525, "eval_runtime": 90.07, "eval_samples_per_second": 62.74, "eval_steps_per_second": 1.577, "step": 532 }, { "epoch": 19.99, "eval_bleu": 1.0893, "eval_gen_len": 16.3113, "eval_loss": 3.2396700382232666, "eval_runtime": 90.1336, "eval_samples_per_second": 62.696, "eval_steps_per_second": 1.575, "step": 560 }, { "epoch": 20.99, "eval_bleu": 1.0591, "eval_gen_len": 16.2219, "eval_loss": 3.236823797225952, "eval_runtime": 90.1085, "eval_samples_per_second": 62.713, "eval_steps_per_second": 1.576, "step": 588 }, { "epoch": 21.99, "eval_bleu": 1.0858, "eval_gen_len": 16.3468, "eval_loss": 3.23404598236084, "eval_runtime": 89.9968, "eval_samples_per_second": 62.791, "eval_steps_per_second": 1.578, "step": 616 }, { "epoch": 22.99, "eval_bleu": 1.1204, "eval_gen_len": 16.284, "eval_loss": 3.2314248085021973, "eval_runtime": 89.8557, "eval_samples_per_second": 62.89, "eval_steps_per_second": 1.58, "step": 644 }, { "epoch": 23.99, "eval_bleu": 1.1199, "eval_gen_len": 16.3371, "eval_loss": 3.2291789054870605, "eval_runtime": 89.9552, "eval_samples_per_second": 62.82, "eval_steps_per_second": 1.579, "step": 672 }, { "epoch": 24.99, "eval_bleu": 1.1413, "eval_gen_len": 16.3203, "eval_loss": 3.226853847503662, "eval_runtime": 90.1817, "eval_samples_per_second": 62.662, "eval_steps_per_second": 1.575, "step": 700 }, { "epoch": 25.99, "eval_bleu": 1.1415, "eval_gen_len": 16.3137, "eval_loss": 3.2246992588043213, "eval_runtime": 90.2118, "eval_samples_per_second": 62.642, "eval_steps_per_second": 1.574, "step": 728 }, { "epoch": 26.99, "eval_bleu": 1.1522, "eval_gen_len": 16.2957, "eval_loss": 3.2227494716644287, "eval_runtime": 90.192, "eval_samples_per_second": 62.655, "eval_steps_per_second": 1.574, "step": 756 }, { "epoch": 27.99, "eval_bleu": 1.1315, "eval_gen_len": 16.2869, "eval_loss": 3.2210450172424316, "eval_runtime": 90.0566, "eval_samples_per_second": 62.749, "eval_steps_per_second": 1.577, "step": 784 }, { "epoch": 28.99, "eval_bleu": 1.1366, "eval_gen_len": 16.2984, "eval_loss": 3.21929669380188, "eval_runtime": 90.1542, "eval_samples_per_second": 62.681, "eval_steps_per_second": 1.575, "step": 812 }, { "epoch": 29.99, "eval_bleu": 1.1557, "eval_gen_len": 16.3242, "eval_loss": 3.2176640033721924, "eval_runtime": 89.9951, "eval_samples_per_second": 62.792, "eval_steps_per_second": 1.578, "step": 840 }, { "epoch": 30.99, "eval_bleu": 1.1545, "eval_gen_len": 16.378, "eval_loss": 3.216632604598999, "eval_runtime": 90.1702, "eval_samples_per_second": 62.67, "eval_steps_per_second": 1.575, "step": 868 }, { "epoch": 31.99, "eval_bleu": 1.1612, "eval_gen_len": 16.3198, "eval_loss": 3.2149343490600586, "eval_runtime": 90.3354, "eval_samples_per_second": 62.556, "eval_steps_per_second": 1.572, "step": 896 }, { "epoch": 32.99, "eval_bleu": 1.1612, "eval_gen_len": 16.3831, "eval_loss": 3.2138538360595703, "eval_runtime": 89.9561, "eval_samples_per_second": 62.82, "eval_steps_per_second": 1.579, "step": 924 }, { "epoch": 33.99, "eval_bleu": 1.1849, "eval_gen_len": 16.3297, "eval_loss": 3.2125675678253174, "eval_runtime": 90.1068, "eval_samples_per_second": 62.714, "eval_steps_per_second": 1.576, "step": 952 }, { "epoch": 34.99, "eval_bleu": 1.1838, "eval_gen_len": 16.3698, "eval_loss": 3.211477279663086, "eval_runtime": 90.1237, "eval_samples_per_second": 62.703, "eval_steps_per_second": 1.576, "step": 980 }, { "epoch": 35.71, "learning_rate": 1.4285714285714285e-05, "loss": 3.3175, "step": 1000 }, { "epoch": 35.99, "eval_bleu": 1.1849, "eval_gen_len": 16.3953, "eval_loss": 3.2102482318878174, "eval_runtime": 89.7847, "eval_samples_per_second": 62.939, "eval_steps_per_second": 1.582, "step": 1008 }, { "epoch": 36.99, "eval_bleu": 1.2054, "eval_gen_len": 16.4488, "eval_loss": 3.2095632553100586, "eval_runtime": 90.0872, "eval_samples_per_second": 62.728, "eval_steps_per_second": 1.576, "step": 1036 }, { "epoch": 37.99, "eval_bleu": 1.1967, "eval_gen_len": 16.4178, "eval_loss": 3.208686113357544, "eval_runtime": 90.1581, "eval_samples_per_second": 62.679, "eval_steps_per_second": 1.575, "step": 1064 }, { "epoch": 38.99, "eval_bleu": 1.1766, "eval_gen_len": 16.3684, "eval_loss": 3.207756996154785, "eval_runtime": 89.9296, "eval_samples_per_second": 62.838, "eval_steps_per_second": 1.579, "step": 1092 }, { "epoch": 39.99, "eval_bleu": 1.1795, "eval_gen_len": 16.4084, "eval_loss": 3.2069926261901855, "eval_runtime": 90.2544, "eval_samples_per_second": 62.612, "eval_steps_per_second": 1.573, "step": 1120 }, { "epoch": 40.99, "eval_bleu": 1.1961, "eval_gen_len": 16.4006, "eval_loss": 3.206512928009033, "eval_runtime": 90.0766, "eval_samples_per_second": 62.735, "eval_steps_per_second": 1.576, "step": 1148 }, { "epoch": 41.99, "eval_bleu": 1.1815, "eval_gen_len": 16.421, "eval_loss": 3.205848455429077, "eval_runtime": 90.3074, "eval_samples_per_second": 62.575, "eval_steps_per_second": 1.572, "step": 1176 }, { "epoch": 42.99, "eval_bleu": 1.1988, "eval_gen_len": 16.4063, "eval_loss": 3.2054076194763184, "eval_runtime": 90.2177, "eval_samples_per_second": 62.637, "eval_steps_per_second": 1.574, "step": 1204 }, { "epoch": 43.99, "eval_bleu": 1.1869, "eval_gen_len": 16.3886, "eval_loss": 3.2050981521606445, "eval_runtime": 90.1457, "eval_samples_per_second": 62.687, "eval_steps_per_second": 1.575, "step": 1232 }, { "epoch": 44.99, "eval_bleu": 1.1861, "eval_gen_len": 16.4121, "eval_loss": 3.204746723175049, "eval_runtime": 90.0329, "eval_samples_per_second": 62.766, "eval_steps_per_second": 1.577, "step": 1260 }, { "epoch": 45.99, "eval_bleu": 1.1751, "eval_gen_len": 16.3983, "eval_loss": 3.204568386077881, "eval_runtime": 90.1067, "eval_samples_per_second": 62.715, "eval_steps_per_second": 1.576, "step": 1288 }, { "epoch": 46.99, "eval_bleu": 1.181, "eval_gen_len": 16.4228, "eval_loss": 3.2042646408081055, "eval_runtime": 90.0428, "eval_samples_per_second": 62.759, "eval_steps_per_second": 1.577, "step": 1316 }, { "epoch": 47.99, "eval_bleu": 1.1787, "eval_gen_len": 16.4403, "eval_loss": 3.204113245010376, "eval_runtime": 90.1802, "eval_samples_per_second": 62.663, "eval_steps_per_second": 1.575, "step": 1344 }, { "epoch": 48.99, "eval_bleu": 1.1932, "eval_gen_len": 16.4201, "eval_loss": 3.203984260559082, "eval_runtime": 90.324, "eval_samples_per_second": 62.564, "eval_steps_per_second": 1.572, "step": 1372 }, { "epoch": 49.99, "eval_bleu": 1.1935, "eval_gen_len": 16.4247, "eval_loss": 3.203944444656372, "eval_runtime": 90.1016, "eval_samples_per_second": 62.718, "eval_steps_per_second": 1.576, "step": 1400 }, { "epoch": 49.99, "step": 1400, "total_flos": 1.2293823395620454e+17, "train_loss": 3.3598651123046874, "train_runtime": 21589.3175, "train_samples_per_second": 62.573, "train_steps_per_second": 0.065 } ], "max_steps": 1400, "num_train_epochs": 50, "total_flos": 1.2293823395620454e+17, "trial_name": null, "trial_params": null }