| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.20020782442644267, | |
| "global_step": 1150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 2.8988, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 2.9604, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.8e-06, | |
| "loss": 2.8234, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 2.7224, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3e-06, | |
| "loss": 2.6824, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.6e-06, | |
| "loss": 2.5884, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 2.4736, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 2.3262, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.4e-06, | |
| "loss": 2.3179, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6e-06, | |
| "loss": 2.3003, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.6e-06, | |
| "loss": 2.2296, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.2e-06, | |
| "loss": 2.1433, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.8e-06, | |
| "loss": 2.1452, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 2.078, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9e-06, | |
| "loss": 2.0396, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 2.0853, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.02e-05, | |
| "loss": 2.0704, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.08e-05, | |
| "loss": 1.9803, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.1400000000000001e-05, | |
| "loss": 2.0011, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.93, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.26e-05, | |
| "loss": 1.9777, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.32e-05, | |
| "loss": 1.8732, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.3800000000000002e-05, | |
| "loss": 1.8803, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.44e-05, | |
| "loss": 1.8258, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.8486, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.56e-05, | |
| "loss": 1.8054, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.62e-05, | |
| "loss": 1.8284, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 1.7647, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.74e-05, | |
| "loss": 1.7801, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.8e-05, | |
| "loss": 1.7765, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.86e-05, | |
| "loss": 1.7285, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 1.7652, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.98e-05, | |
| "loss": 1.7792, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.04e-05, | |
| "loss": 1.7661, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.1e-05, | |
| "loss": 1.7171, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.16e-05, | |
| "loss": 1.7829, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.22e-05, | |
| "loss": 1.752, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.2800000000000002e-05, | |
| "loss": 1.6864, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.3400000000000003e-05, | |
| "loss": 1.7163, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.7071, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.4599999999999998e-05, | |
| "loss": 1.7115, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.52e-05, | |
| "loss": 1.7248, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.58e-05, | |
| "loss": 1.6862, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.64e-05, | |
| "loss": 1.6393, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 1.6815, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.7600000000000003e-05, | |
| "loss": 1.6933, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.8199999999999998e-05, | |
| "loss": 1.6984, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.88e-05, | |
| "loss": 1.6677, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.94e-05, | |
| "loss": 1.6683, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 3e-05, | |
| "loss": 1.6234, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.9880000000000002e-05, | |
| "loss": 1.6206, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.976e-05, | |
| "loss": 1.6712, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.964e-05, | |
| "loss": 1.652, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.9520000000000002e-05, | |
| "loss": 1.6202, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.94e-05, | |
| "loss": 1.6745, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.928e-05, | |
| "loss": 1.6772, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.916e-05, | |
| "loss": 1.6238, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.904e-05, | |
| "loss": 1.5743, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.892e-05, | |
| "loss": 1.6013, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.88e-05, | |
| "loss": 1.5846, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.868e-05, | |
| "loss": 1.6274, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.856e-05, | |
| "loss": 1.6018, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.844e-05, | |
| "loss": 1.5927, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.832e-05, | |
| "loss": 1.5887, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.8199999999999998e-05, | |
| "loss": 1.5446, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.8080000000000002e-05, | |
| "loss": 1.6083, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.7960000000000003e-05, | |
| "loss": 1.575, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.784e-05, | |
| "loss": 1.6243, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.7720000000000002e-05, | |
| "loss": 1.5957, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.7600000000000003e-05, | |
| "loss": 1.5684, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.748e-05, | |
| "loss": 1.5778, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.7360000000000002e-05, | |
| "loss": 1.6025, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.724e-05, | |
| "loss": 1.56, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.712e-05, | |
| "loss": 1.5475, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 1.5322, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.688e-05, | |
| "loss": 1.5708, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.676e-05, | |
| "loss": 1.5688, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.6640000000000002e-05, | |
| "loss": 1.5407, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.652e-05, | |
| "loss": 1.4945, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.64e-05, | |
| "loss": 1.5536, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.628e-05, | |
| "loss": 1.5501, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.616e-05, | |
| "loss": 1.5184, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.604e-05, | |
| "loss": 1.5046, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.592e-05, | |
| "loss": 1.5529, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.58e-05, | |
| "loss": 1.5006, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.568e-05, | |
| "loss": 1.4979, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.556e-05, | |
| "loss": 1.5288, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.544e-05, | |
| "loss": 1.5924, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5319999999999998e-05, | |
| "loss": 1.5031, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.52e-05, | |
| "loss": 1.5628, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.508e-05, | |
| "loss": 1.5173, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.4959999999999998e-05, | |
| "loss": 1.5159, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.484e-05, | |
| "loss": 1.4837, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.472e-05, | |
| "loss": 1.528, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4599999999999998e-05, | |
| "loss": 1.5605, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.448e-05, | |
| "loss": 1.4884, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4360000000000004e-05, | |
| "loss": 1.5194, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4240000000000002e-05, | |
| "loss": 1.5145, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4120000000000003e-05, | |
| "loss": 1.5127, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.5129, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.3880000000000002e-05, | |
| "loss": 1.4943, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.3760000000000003e-05, | |
| "loss": 1.5653, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.364e-05, | |
| "loss": 1.5496, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.3520000000000002e-05, | |
| "loss": 1.5118, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.3400000000000003e-05, | |
| "loss": 1.4897, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.328e-05, | |
| "loss": 1.5305, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.3160000000000002e-05, | |
| "loss": 1.518, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.304e-05, | |
| "loss": 1.525, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.292e-05, | |
| "loss": 1.4876, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.2800000000000002e-05, | |
| "loss": 1.4853, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.268e-05, | |
| "loss": 1.5047, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.256e-05, | |
| "loss": 1.4842, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.2440000000000002e-05, | |
| "loss": 1.5162, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.232e-05, | |
| "loss": 1.5132, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.22e-05, | |
| "loss": 1.4743, | |
| "step": 1150 | |
| } | |
| ], | |
| "max_steps": 3000, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.93471168118784e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |