{ "best_metric": 0.8618181818181818, "best_model_checkpoint": "swiftformer-xs-RD\\checkpoint-1167", "epoch": 39.75155279503105, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 1.875e-05, "loss": 1.6091, "step": 10 }, { "epoch": 0.5, "learning_rate": 3.75e-05, "loss": 1.6066, "step": 20 }, { "epoch": 0.75, "learning_rate": 5.625e-05, "loss": 1.5954, "step": 30 }, { "epoch": 0.99, "learning_rate": 7.5e-05, "loss": 1.5571, "step": 40 }, { "epoch": 0.99, "eval_accuracy": 0.5181818181818182, "eval_loss": 1.519059658050537, "eval_runtime": 2.8787, "eval_samples_per_second": 191.057, "eval_steps_per_second": 12.158, "step": 40 }, { "epoch": 1.24, "learning_rate": 9.374999999999999e-05, "loss": 1.4441, "step": 50 }, { "epoch": 1.49, "learning_rate": 0.0001125, "loss": 1.255, "step": 60 }, { "epoch": 1.74, "learning_rate": 0.00013125, "loss": 1.0283, "step": 70 }, { "epoch": 1.99, "learning_rate": 0.00015, "loss": 0.979, "step": 80 }, { "epoch": 1.99, "eval_accuracy": 0.7090909090909091, "eval_loss": 0.8952864408493042, "eval_runtime": 1.7304, "eval_samples_per_second": 317.84, "eval_steps_per_second": 20.226, "step": 80 }, { "epoch": 2.24, "learning_rate": 0.00016874999999999998, "loss": 0.864, "step": 90 }, { "epoch": 2.48, "learning_rate": 0.00018749999999999998, "loss": 0.859, "step": 100 }, { "epoch": 2.73, "learning_rate": 0.00020624999999999997, "loss": 0.7978, "step": 110 }, { "epoch": 2.98, "learning_rate": 0.000225, "loss": 0.8343, "step": 120 }, { "epoch": 2.98, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.7519798874855042, "eval_runtime": 1.3968, "eval_samples_per_second": 393.746, "eval_steps_per_second": 25.057, "step": 120 }, { "epoch": 3.23, "learning_rate": 0.00024375, "loss": 0.7879, "step": 130 }, { "epoch": 3.48, "learning_rate": 0.0002625, "loss": 0.7814, "step": 140 }, { "epoch": 3.73, "learning_rate": 0.00028125, "loss": 0.7806, "step": 150 }, { "epoch": 3.98, "learning_rate": 0.0003, "loss": 0.7755, "step": 160 }, { "epoch": 4.0, "eval_accuracy": 0.7527272727272727, "eval_loss": 0.7222614288330078, "eval_runtime": 1.3418, "eval_samples_per_second": 409.89, "eval_steps_per_second": 26.084, "step": 161 }, { "epoch": 4.22, "learning_rate": 0.00029791666666666665, "loss": 0.7536, "step": 170 }, { "epoch": 4.47, "learning_rate": 0.00029583333333333333, "loss": 0.6664, "step": 180 }, { "epoch": 4.72, "learning_rate": 0.00029374999999999996, "loss": 0.6218, "step": 190 }, { "epoch": 4.97, "learning_rate": 0.00029166666666666664, "loss": 0.7064, "step": 200 }, { "epoch": 4.99, "eval_accuracy": 0.74, "eval_loss": 0.6760307550430298, "eval_runtime": 1.3443, "eval_samples_per_second": 409.125, "eval_steps_per_second": 26.035, "step": 201 }, { "epoch": 5.22, "learning_rate": 0.0002895833333333333, "loss": 0.715, "step": 210 }, { "epoch": 5.47, "learning_rate": 0.0002875, "loss": 0.7128, "step": 220 }, { "epoch": 5.71, "learning_rate": 0.0002854166666666666, "loss": 0.6436, "step": 230 }, { "epoch": 5.96, "learning_rate": 0.0002833333333333333, "loss": 0.6296, "step": 240 }, { "epoch": 5.99, "eval_accuracy": 0.8018181818181818, "eval_loss": 0.5624179840087891, "eval_runtime": 1.5624, "eval_samples_per_second": 352.03, "eval_steps_per_second": 22.402, "step": 241 }, { "epoch": 6.21, "learning_rate": 0.00028125, "loss": 0.6154, "step": 250 }, { "epoch": 6.46, "learning_rate": 0.00027916666666666666, "loss": 0.6749, "step": 260 }, { "epoch": 6.71, "learning_rate": 0.00027708333333333334, "loss": 0.6336, "step": 270 }, { "epoch": 6.96, "learning_rate": 0.00027499999999999996, "loss": 0.6387, "step": 280 }, { "epoch": 6.98, "eval_accuracy": 0.8072727272727273, "eval_loss": 0.5362208485603333, "eval_runtime": 1.4093, "eval_samples_per_second": 390.255, "eval_steps_per_second": 24.834, "step": 281 }, { "epoch": 7.2, "learning_rate": 0.00027291666666666664, "loss": 0.5918, "step": 290 }, { "epoch": 7.45, "learning_rate": 0.0002708333333333333, "loss": 0.5901, "step": 300 }, { "epoch": 7.7, "learning_rate": 0.00026875, "loss": 0.6415, "step": 310 }, { "epoch": 7.95, "learning_rate": 0.0002666666666666666, "loss": 0.5793, "step": 320 }, { "epoch": 8.0, "eval_accuracy": 0.7963636363636364, "eval_loss": 0.5241875648498535, "eval_runtime": 1.3758, "eval_samples_per_second": 399.758, "eval_steps_per_second": 25.439, "step": 322 }, { "epoch": 8.2, "learning_rate": 0.0002645833333333333, "loss": 0.5718, "step": 330 }, { "epoch": 8.45, "learning_rate": 0.0002625, "loss": 0.5651, "step": 340 }, { "epoch": 8.7, "learning_rate": 0.00026041666666666666, "loss": 0.5828, "step": 350 }, { "epoch": 8.94, "learning_rate": 0.00025833333333333334, "loss": 0.5723, "step": 360 }, { "epoch": 8.99, "eval_accuracy": 0.8309090909090909, "eval_loss": 0.454416960477829, "eval_runtime": 1.3498, "eval_samples_per_second": 407.461, "eval_steps_per_second": 25.929, "step": 362 }, { "epoch": 9.19, "learning_rate": 0.00025624999999999997, "loss": 0.5246, "step": 370 }, { "epoch": 9.44, "learning_rate": 0.00025416666666666665, "loss": 0.5618, "step": 380 }, { "epoch": 9.69, "learning_rate": 0.0002520833333333333, "loss": 0.5462, "step": 390 }, { "epoch": 9.94, "learning_rate": 0.00025, "loss": 0.5897, "step": 400 }, { "epoch": 9.99, "eval_accuracy": 0.8272727272727273, "eval_loss": 0.4509519636631012, "eval_runtime": 1.3383, "eval_samples_per_second": 410.963, "eval_steps_per_second": 26.152, "step": 402 }, { "epoch": 10.19, "learning_rate": 0.00024791666666666663, "loss": 0.5194, "step": 410 }, { "epoch": 10.43, "learning_rate": 0.0002458333333333333, "loss": 0.4946, "step": 420 }, { "epoch": 10.68, "learning_rate": 0.00024375, "loss": 0.5858, "step": 430 }, { "epoch": 10.93, "learning_rate": 0.00024166666666666664, "loss": 0.5443, "step": 440 }, { "epoch": 10.98, "eval_accuracy": 0.8054545454545454, "eval_loss": 0.46854037046432495, "eval_runtime": 1.3393, "eval_samples_per_second": 410.65, "eval_steps_per_second": 26.132, "step": 442 }, { "epoch": 11.18, "learning_rate": 0.00023958333333333332, "loss": 0.5482, "step": 450 }, { "epoch": 11.43, "learning_rate": 0.00023749999999999997, "loss": 0.4724, "step": 460 }, { "epoch": 11.68, "learning_rate": 0.00023541666666666665, "loss": 0.5285, "step": 470 }, { "epoch": 11.93, "learning_rate": 0.0002333333333333333, "loss": 0.4959, "step": 480 }, { "epoch": 12.0, "eval_accuracy": 0.8254545454545454, "eval_loss": 0.42480796575546265, "eval_runtime": 1.4113, "eval_samples_per_second": 389.699, "eval_steps_per_second": 24.799, "step": 483 }, { "epoch": 12.17, "learning_rate": 0.00023124999999999998, "loss": 0.6238, "step": 490 }, { "epoch": 12.42, "learning_rate": 0.00022916666666666664, "loss": 0.5296, "step": 500 }, { "epoch": 12.67, "learning_rate": 0.00022708333333333331, "loss": 0.4759, "step": 510 }, { "epoch": 12.92, "learning_rate": 0.000225, "loss": 0.4743, "step": 520 }, { "epoch": 12.99, "eval_accuracy": 0.8309090909090909, "eval_loss": 0.4307559132575989, "eval_runtime": 1.3628, "eval_samples_per_second": 403.57, "eval_steps_per_second": 25.682, "step": 523 }, { "epoch": 13.17, "learning_rate": 0.00022291666666666665, "loss": 0.6277, "step": 530 }, { "epoch": 13.42, "learning_rate": 0.00022083333333333333, "loss": 0.5356, "step": 540 }, { "epoch": 13.66, "learning_rate": 0.00021874999999999998, "loss": 0.5112, "step": 550 }, { "epoch": 13.91, "learning_rate": 0.00021666666666666666, "loss": 0.4679, "step": 560 }, { "epoch": 13.99, "eval_accuracy": 0.84, "eval_loss": 0.39991825819015503, "eval_runtime": 1.3308, "eval_samples_per_second": 413.283, "eval_steps_per_second": 26.3, "step": 563 }, { "epoch": 14.16, "learning_rate": 0.0002145833333333333, "loss": 0.4224, "step": 570 }, { "epoch": 14.41, "learning_rate": 0.0002125, "loss": 0.4699, "step": 580 }, { "epoch": 14.66, "learning_rate": 0.00021041666666666664, "loss": 0.4727, "step": 590 }, { "epoch": 14.91, "learning_rate": 0.00020833333333333332, "loss": 0.4997, "step": 600 }, { "epoch": 14.98, "eval_accuracy": 0.8381818181818181, "eval_loss": 0.41513749957084656, "eval_runtime": 1.3563, "eval_samples_per_second": 405.507, "eval_steps_per_second": 25.805, "step": 603 }, { "epoch": 15.16, "learning_rate": 0.00020624999999999997, "loss": 0.4647, "step": 610 }, { "epoch": 15.4, "learning_rate": 0.00020416666666666665, "loss": 0.507, "step": 620 }, { "epoch": 15.65, "learning_rate": 0.00020208333333333333, "loss": 0.4575, "step": 630 }, { "epoch": 15.9, "learning_rate": 0.00019999999999999998, "loss": 0.4602, "step": 640 }, { "epoch": 16.0, "eval_accuracy": 0.8218181818181818, "eval_loss": 0.41115185618400574, "eval_runtime": 1.3393, "eval_samples_per_second": 410.657, "eval_steps_per_second": 26.133, "step": 644 }, { "epoch": 16.15, "learning_rate": 0.00019791666666666663, "loss": 0.4685, "step": 650 }, { "epoch": 16.4, "learning_rate": 0.00019583333333333331, "loss": 0.4071, "step": 660 }, { "epoch": 16.65, "learning_rate": 0.00019375, "loss": 0.4358, "step": 670 }, { "epoch": 16.89, "learning_rate": 0.00019166666666666665, "loss": 0.4459, "step": 680 }, { "epoch": 16.99, "eval_accuracy": 0.8345454545454546, "eval_loss": 0.4195989668369293, "eval_runtime": 1.4108, "eval_samples_per_second": 389.839, "eval_steps_per_second": 24.808, "step": 684 }, { "epoch": 17.14, "learning_rate": 0.0001895833333333333, "loss": 0.4683, "step": 690 }, { "epoch": 17.39, "learning_rate": 0.00018749999999999998, "loss": 0.4322, "step": 700 }, { "epoch": 17.64, "learning_rate": 0.00018541666666666666, "loss": 0.4099, "step": 710 }, { "epoch": 17.89, "learning_rate": 0.00018333333333333334, "loss": 0.4668, "step": 720 }, { "epoch": 17.99, "eval_accuracy": 0.8290909090909091, "eval_loss": 0.4041709899902344, "eval_runtime": 1.5199, "eval_samples_per_second": 361.876, "eval_steps_per_second": 23.028, "step": 724 }, { "epoch": 18.14, "learning_rate": 0.00018124999999999996, "loss": 0.5344, "step": 730 }, { "epoch": 18.39, "learning_rate": 0.00017916666666666664, "loss": 0.4369, "step": 740 }, { "epoch": 18.63, "learning_rate": 0.00017708333333333332, "loss": 0.4014, "step": 750 }, { "epoch": 18.88, "learning_rate": 0.000175, "loss": 0.41, "step": 760 }, { "epoch": 18.98, "eval_accuracy": 0.8436363636363636, "eval_loss": 0.4112071990966797, "eval_runtime": 1.3473, "eval_samples_per_second": 408.216, "eval_steps_per_second": 25.977, "step": 764 }, { "epoch": 19.13, "learning_rate": 0.00017291666666666662, "loss": 0.3628, "step": 770 }, { "epoch": 19.38, "learning_rate": 0.0001708333333333333, "loss": 0.3968, "step": 780 }, { "epoch": 19.63, "learning_rate": 0.00016874999999999998, "loss": 0.4301, "step": 790 }, { "epoch": 19.88, "learning_rate": 0.00016666666666666666, "loss": 0.4349, "step": 800 }, { "epoch": 20.0, "eval_accuracy": 0.8418181818181818, "eval_loss": 0.4119725525379181, "eval_runtime": 1.3453, "eval_samples_per_second": 408.821, "eval_steps_per_second": 26.016, "step": 805 }, { "epoch": 20.12, "learning_rate": 0.00016458333333333334, "loss": 0.3921, "step": 810 }, { "epoch": 20.37, "learning_rate": 0.00016249999999999997, "loss": 0.4351, "step": 820 }, { "epoch": 20.62, "learning_rate": 0.00016041666666666664, "loss": 0.4144, "step": 830 }, { "epoch": 20.87, "learning_rate": 0.00015833333333333332, "loss": 0.4558, "step": 840 }, { "epoch": 20.99, "eval_accuracy": 0.8218181818181818, "eval_loss": 0.4554423391819, "eval_runtime": 1.3368, "eval_samples_per_second": 411.426, "eval_steps_per_second": 26.182, "step": 845 }, { "epoch": 21.12, "learning_rate": 0.00015625, "loss": 0.4053, "step": 850 }, { "epoch": 21.37, "learning_rate": 0.00015416666666666663, "loss": 0.3483, "step": 860 }, { "epoch": 21.61, "learning_rate": 0.0001520833333333333, "loss": 0.4139, "step": 870 }, { "epoch": 21.86, "learning_rate": 0.00015, "loss": 0.3952, "step": 880 }, { "epoch": 21.99, "eval_accuracy": 0.850909090909091, "eval_loss": 0.3774945139884949, "eval_runtime": 1.3708, "eval_samples_per_second": 401.212, "eval_steps_per_second": 25.532, "step": 885 }, { "epoch": 22.11, "learning_rate": 0.00014791666666666667, "loss": 0.3661, "step": 890 }, { "epoch": 22.36, "learning_rate": 0.00014583333333333332, "loss": 0.3706, "step": 900 }, { "epoch": 22.61, "learning_rate": 0.00014375, "loss": 0.4127, "step": 910 }, { "epoch": 22.86, "learning_rate": 0.00014166666666666665, "loss": 0.3395, "step": 920 }, { "epoch": 22.98, "eval_accuracy": 0.84, "eval_loss": 0.4027109742164612, "eval_runtime": 1.4218, "eval_samples_per_second": 386.825, "eval_steps_per_second": 24.616, "step": 925 }, { "epoch": 23.11, "learning_rate": 0.00013958333333333333, "loss": 0.3821, "step": 930 }, { "epoch": 23.35, "learning_rate": 0.00013749999999999998, "loss": 0.3408, "step": 940 }, { "epoch": 23.6, "learning_rate": 0.00013541666666666666, "loss": 0.3344, "step": 950 }, { "epoch": 23.85, "learning_rate": 0.0001333333333333333, "loss": 0.3525, "step": 960 }, { "epoch": 24.0, "eval_accuracy": 0.850909090909091, "eval_loss": 0.3956778645515442, "eval_runtime": 1.3433, "eval_samples_per_second": 409.433, "eval_steps_per_second": 26.055, "step": 966 }, { "epoch": 24.1, "learning_rate": 0.00013125, "loss": 0.3803, "step": 970 }, { "epoch": 24.35, "learning_rate": 0.00012916666666666667, "loss": 0.3213, "step": 980 }, { "epoch": 24.6, "learning_rate": 0.00012708333333333332, "loss": 0.3845, "step": 990 }, { "epoch": 24.84, "learning_rate": 0.000125, "loss": 0.3497, "step": 1000 }, { "epoch": 24.99, "eval_accuracy": 0.8527272727272728, "eval_loss": 0.4020462930202484, "eval_runtime": 1.3428, "eval_samples_per_second": 409.587, "eval_steps_per_second": 26.065, "step": 1006 }, { "epoch": 25.09, "learning_rate": 0.00012291666666666665, "loss": 0.375, "step": 1010 }, { "epoch": 25.34, "learning_rate": 0.00012083333333333332, "loss": 0.3141, "step": 1020 }, { "epoch": 25.59, "learning_rate": 0.00011874999999999999, "loss": 0.3899, "step": 1030 }, { "epoch": 25.84, "learning_rate": 0.00011666666666666665, "loss": 0.3625, "step": 1040 }, { "epoch": 25.99, "eval_accuracy": 0.8563636363636363, "eval_loss": 0.39101672172546387, "eval_runtime": 1.3918, "eval_samples_per_second": 395.16, "eval_steps_per_second": 25.147, "step": 1046 }, { "epoch": 26.09, "learning_rate": 0.00011458333333333332, "loss": 0.3777, "step": 1050 }, { "epoch": 26.34, "learning_rate": 0.0001125, "loss": 0.3572, "step": 1060 }, { "epoch": 26.58, "learning_rate": 0.00011041666666666666, "loss": 0.3486, "step": 1070 }, { "epoch": 26.83, "learning_rate": 0.00010833333333333333, "loss": 0.3008, "step": 1080 }, { "epoch": 26.98, "eval_accuracy": 0.8581818181818182, "eval_loss": 0.3789851665496826, "eval_runtime": 1.4113, "eval_samples_per_second": 389.701, "eval_steps_per_second": 24.799, "step": 1086 }, { "epoch": 27.08, "learning_rate": 0.00010625, "loss": 0.32, "step": 1090 }, { "epoch": 27.33, "learning_rate": 0.00010416666666666666, "loss": 0.356, "step": 1100 }, { "epoch": 27.58, "learning_rate": 0.00010208333333333333, "loss": 0.3053, "step": 1110 }, { "epoch": 27.83, "learning_rate": 9.999999999999999e-05, "loss": 0.2907, "step": 1120 }, { "epoch": 28.0, "eval_accuracy": 0.8563636363636363, "eval_loss": 0.38305726647377014, "eval_runtime": 1.3603, "eval_samples_per_second": 404.315, "eval_steps_per_second": 25.729, "step": 1127 }, { "epoch": 28.07, "learning_rate": 9.791666666666666e-05, "loss": 0.3384, "step": 1130 }, { "epoch": 28.32, "learning_rate": 9.583333333333332e-05, "loss": 0.2956, "step": 1140 }, { "epoch": 28.57, "learning_rate": 9.374999999999999e-05, "loss": 0.2923, "step": 1150 }, { "epoch": 28.82, "learning_rate": 9.166666666666667e-05, "loss": 0.3165, "step": 1160 }, { "epoch": 28.99, "eval_accuracy": 0.8618181818181818, "eval_loss": 0.4005330204963684, "eval_runtime": 1.3778, "eval_samples_per_second": 399.18, "eval_steps_per_second": 25.402, "step": 1167 }, { "epoch": 29.07, "learning_rate": 8.958333333333332e-05, "loss": 0.302, "step": 1170 }, { "epoch": 29.32, "learning_rate": 8.75e-05, "loss": 0.2723, "step": 1180 }, { "epoch": 29.57, "learning_rate": 8.541666666666665e-05, "loss": 0.2953, "step": 1190 }, { "epoch": 29.81, "learning_rate": 8.333333333333333e-05, "loss": 0.3231, "step": 1200 }, { "epoch": 29.99, "eval_accuracy": 0.8327272727272728, "eval_loss": 0.4512370228767395, "eval_runtime": 1.3443, "eval_samples_per_second": 409.127, "eval_steps_per_second": 26.035, "step": 1207 }, { "epoch": 30.06, "learning_rate": 8.124999999999998e-05, "loss": 0.3583, "step": 1210 }, { "epoch": 30.31, "learning_rate": 7.916666666666666e-05, "loss": 0.3356, "step": 1220 }, { "epoch": 30.56, "learning_rate": 7.708333333333331e-05, "loss": 0.2687, "step": 1230 }, { "epoch": 30.81, "learning_rate": 7.5e-05, "loss": 0.2819, "step": 1240 }, { "epoch": 30.98, "eval_accuracy": 0.8454545454545455, "eval_loss": 0.4227891266345978, "eval_runtime": 1.4114, "eval_samples_per_second": 389.695, "eval_steps_per_second": 24.799, "step": 1247 }, { "epoch": 31.06, "learning_rate": 7.291666666666666e-05, "loss": 0.2949, "step": 1250 }, { "epoch": 31.3, "learning_rate": 7.083333333333332e-05, "loss": 0.2654, "step": 1260 }, { "epoch": 31.55, "learning_rate": 6.874999999999999e-05, "loss": 0.2745, "step": 1270 }, { "epoch": 31.8, "learning_rate": 6.666666666666666e-05, "loss": 0.2704, "step": 1280 }, { "epoch": 32.0, "eval_accuracy": 0.86, "eval_loss": 0.4074237644672394, "eval_runtime": 1.3463, "eval_samples_per_second": 408.52, "eval_steps_per_second": 25.997, "step": 1288 }, { "epoch": 32.05, "learning_rate": 6.458333333333334e-05, "loss": 0.3105, "step": 1290 }, { "epoch": 32.3, "learning_rate": 6.25e-05, "loss": 0.2614, "step": 1300 }, { "epoch": 32.55, "learning_rate": 6.041666666666666e-05, "loss": 0.2842, "step": 1310 }, { "epoch": 32.8, "learning_rate": 5.8333333333333326e-05, "loss": 0.2429, "step": 1320 }, { "epoch": 32.99, "eval_accuracy": 0.8545454545454545, "eval_loss": 0.4405260682106018, "eval_runtime": 1.6104, "eval_samples_per_second": 341.534, "eval_steps_per_second": 21.734, "step": 1328 }, { "epoch": 33.04, "learning_rate": 5.625e-05, "loss": 0.2956, "step": 1330 }, { "epoch": 33.29, "learning_rate": 5.4166666666666664e-05, "loss": 0.2767, "step": 1340 }, { "epoch": 33.54, "learning_rate": 5.208333333333333e-05, "loss": 0.2676, "step": 1350 }, { "epoch": 33.79, "learning_rate": 4.9999999999999996e-05, "loss": 0.2421, "step": 1360 }, { "epoch": 33.99, "eval_accuracy": 0.8527272727272728, "eval_loss": 0.43370744585990906, "eval_runtime": 1.4724, "eval_samples_per_second": 373.551, "eval_steps_per_second": 23.771, "step": 1368 }, { "epoch": 34.04, "learning_rate": 4.791666666666666e-05, "loss": 0.3275, "step": 1370 }, { "epoch": 34.29, "learning_rate": 4.5833333333333334e-05, "loss": 0.2475, "step": 1380 }, { "epoch": 34.53, "learning_rate": 4.375e-05, "loss": 0.288, "step": 1390 }, { "epoch": 34.78, "learning_rate": 4.1666666666666665e-05, "loss": 0.3039, "step": 1400 }, { "epoch": 34.98, "eval_accuracy": 0.8472727272727273, "eval_loss": 0.46279987692832947, "eval_runtime": 1.3718, "eval_samples_per_second": 400.919, "eval_steps_per_second": 25.513, "step": 1408 }, { "epoch": 35.03, "learning_rate": 3.958333333333333e-05, "loss": 0.2737, "step": 1410 }, { "epoch": 35.28, "learning_rate": 3.75e-05, "loss": 0.2746, "step": 1420 }, { "epoch": 35.53, "learning_rate": 3.541666666666666e-05, "loss": 0.2906, "step": 1430 }, { "epoch": 35.78, "learning_rate": 3.333333333333333e-05, "loss": 0.2677, "step": 1440 }, { "epoch": 36.0, "eval_accuracy": 0.8545454545454545, "eval_loss": 0.4411250948905945, "eval_runtime": 1.3953, "eval_samples_per_second": 394.172, "eval_steps_per_second": 25.084, "step": 1449 }, { "epoch": 36.02, "learning_rate": 3.125e-05, "loss": 0.2467, "step": 1450 }, { "epoch": 36.27, "learning_rate": 2.9166666666666663e-05, "loss": 0.2513, "step": 1460 }, { "epoch": 36.52, "learning_rate": 2.7083333333333332e-05, "loss": 0.271, "step": 1470 }, { "epoch": 36.77, "learning_rate": 2.4999999999999998e-05, "loss": 0.2171, "step": 1480 }, { "epoch": 36.99, "eval_accuracy": 0.8618181818181818, "eval_loss": 0.4755066931247711, "eval_runtime": 1.4153, "eval_samples_per_second": 388.599, "eval_steps_per_second": 24.729, "step": 1489 }, { "epoch": 37.02, "learning_rate": 2.2916666666666667e-05, "loss": 0.2207, "step": 1490 }, { "epoch": 37.27, "learning_rate": 2.0833333333333333e-05, "loss": 0.2346, "step": 1500 }, { "epoch": 37.52, "learning_rate": 1.875e-05, "loss": 0.2055, "step": 1510 }, { "epoch": 37.76, "learning_rate": 1.6666666666666664e-05, "loss": 0.2268, "step": 1520 }, { "epoch": 37.99, "eval_accuracy": 0.86, "eval_loss": 0.45060980319976807, "eval_runtime": 1.4394, "eval_samples_per_second": 382.115, "eval_steps_per_second": 24.316, "step": 1529 }, { "epoch": 38.01, "learning_rate": 1.4583333333333331e-05, "loss": 0.2844, "step": 1530 }, { "epoch": 38.26, "learning_rate": 1.2499999999999999e-05, "loss": 0.2419, "step": 1540 }, { "epoch": 38.51, "learning_rate": 1.0416666666666666e-05, "loss": 0.256, "step": 1550 }, { "epoch": 38.76, "learning_rate": 8.333333333333332e-06, "loss": 0.2378, "step": 1560 }, { "epoch": 38.98, "eval_accuracy": 0.8527272727272728, "eval_loss": 0.4633024334907532, "eval_runtime": 1.3843, "eval_samples_per_second": 397.304, "eval_steps_per_second": 25.283, "step": 1569 }, { "epoch": 39.01, "learning_rate": 6.2499999999999995e-06, "loss": 0.3098, "step": 1570 }, { "epoch": 39.25, "learning_rate": 4.166666666666666e-06, "loss": 0.2416, "step": 1580 }, { "epoch": 39.5, "learning_rate": 2.083333333333333e-06, "loss": 0.2014, "step": 1590 }, { "epoch": 39.75, "learning_rate": 0.0, "loss": 0.2021, "step": 1600 }, { "epoch": 39.75, "eval_accuracy": 0.8563636363636363, "eval_loss": 0.4492277503013611, "eval_runtime": 1.3993, "eval_samples_per_second": 393.042, "eval_steps_per_second": 25.012, "step": 1600 }, { "epoch": 39.75, "step": 1600, "total_flos": 2.7935287357114368e+17, "train_loss": 0.4774747739732266, "train_runtime": 572.0649, "train_samples_per_second": 179.21, "train_steps_per_second": 2.797 } ], "logging_steps": 10, "max_steps": 1600, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.7935287357114368e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }