| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 25.0, | |
| "eval_steps": 500, | |
| "global_step": 1100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.001999898043009433, | |
| "loss": 4.5094, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0019995921928281893, | |
| "loss": 3.8047, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.001999082511823396, | |
| "loss": 3.8813, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0019983691039261358, | |
| "loss": 3.7188, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0019974521146102534, | |
| "loss": 3.6695, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.001996331730862691, | |
| "loss": 3.7078, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0019950081811453595, | |
| "loss": 3.6844, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0019934817353485504, | |
| "loss": 3.6961, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0019917527047359027, | |
| "loss": 3.5758, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.001989821441880933, | |
| "loss": 3.4102, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.0019876883405951376, | |
| "loss": 3.3984, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.001985353835847693, | |
| "loss": 3.3602, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0019828184036767556, | |
| "loss": 3.4461, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.0019800825610923932, | |
| "loss": 3.3461, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0019771468659711597, | |
| "loss": 3.4172, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0019740119169423336, | |
| "loss": 3.4359, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.0019706783532658523, | |
| "loss": 3.5141, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.001967146854701957, | |
| "loss": 3.2242, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.0019634181413725788, | |
| "loss": 3.0227, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.0019594929736144974, | |
| "loss": 2.8984, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 0.001955372151824297, | |
| "loss": 3.0781, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.0019510565162951536, | |
| "loss": 3.1203, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.00194654694704549, | |
| "loss": 3.1828, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.0019418443636395248, | |
| "loss": 3.0531, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.001936949724999762, | |
| "loss": 3.1523, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0019318640292114524, | |
| "loss": 3.1156, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.0019265883133190713, | |
| "loss": 2.7844, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.0019211236531148502, | |
| "loss": 2.6711, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.0019154711629194062, | |
| "loss": 2.6609, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 0.0019096319953545184, | |
| "loss": 2.7531, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.0019036073411080917, | |
| "loss": 2.7977, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 0.0018973984286913585, | |
| "loss": 2.7914, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.0018910065241883678, | |
| "loss": 2.8188, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 0.0018844329309978143, | |
| "loss": 2.8945, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.0018776789895672556, | |
| "loss": 2.8883, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 0.0018707460771197773, | |
| "loss": 2.4617, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 0.001863635607373157, | |
| "loss": 2.4633, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 0.001856349030251589, | |
| "loss": 2.5094, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 0.0018488878315900226, | |
| "loss": 2.432, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 0.0018412535328311812, | |
| "loss": 2.5648, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 0.0018334476907153176, | |
| "loss": 2.4836, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 0.001825471896962774, | |
| "loss": 2.6617, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 0.0018173277779494068, | |
| "loss": 2.6734, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0018090169943749475, | |
| "loss": 2.6742, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 0.0018005412409243604, | |
| "loss": 2.1379, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 0.0017919022459222751, | |
| "loss": 2.1508, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 0.0017831017709805555, | |
| "loss": 2.2582, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 0.0017741416106390826, | |
| "loss": 2.2367, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 0.0017650235919998232, | |
| "loss": 2.325, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 0.0017557495743542584, | |
| "loss": 2.2703, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 0.0017463214488042471, | |
| "loss": 2.3703, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 0.001736741137876405, | |
| "loss": 2.4648, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 0.0017270105951300739, | |
| "loss": 2.2734, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 0.0017171318047589637, | |
| "loss": 1.9898, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 0.0017071067811865474, | |
| "loss": 1.9816, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 0.0016969375686552938, | |
| "loss": 1.9648, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 0.0016866262408098134, | |
| "loss": 2.1672, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 0.0016761749002740195, | |
| "loss": 2.0074, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 0.0016655856782223683, | |
| "loss": 2.1598, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 0.0016548607339452852, | |
| "loss": 2.0996, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 0.0016440022544088554, | |
| "loss": 2.1434, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 0.0016330124538088703, | |
| "loss": 2.0699, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 0.0016218935731193223, | |
| "loss": 1.7312, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 0.0016106478796354383, | |
| "loss": 1.7799, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 0.0015992776665113468, | |
| "loss": 1.7008, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.0015877852522924731, | |
| "loss": 1.8969, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 0.0015761729804427528, | |
| "loss": 1.8156, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 0.0015644432188667695, | |
| "loss": 1.9336, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 0.0015525983594269026, | |
| "loss": 1.9918, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 0.0015406408174555976, | |
| "loss": 2.0055, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 0.0015285730312628418, | |
| "loss": 1.7168, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 0.001516397461638962, | |
| "loss": 1.5531, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 0.001504116591352832, | |
| "loss": 1.5922, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 0.001491732924645604, | |
| "loss": 1.618, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 0.0014792489867200569, | |
| "loss": 1.6738, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 0.0014666673232256737, | |
| "loss": 1.7461, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 0.0014539904997395467, | |
| "loss": 1.6746, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 0.0014412211012432212, | |
| "loss": 1.7711, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 0.0014283617315955814, | |
| "loss": 1.8387, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 0.0014154150130018866, | |
| "loss": 1.475, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 0.001402383585479068, | |
| "loss": 1.4523, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 0.0013892701063173917, | |
| "loss": 1.4812, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 0.0013760772495385997, | |
| "loss": 1.525, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 0.001362807705350641, | |
| "loss": 1.398, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 0.0013494641795990985, | |
| "loss": 1.4477, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 0.00133604939321543, | |
| "loss": 1.5801, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 0.0013225660816621341, | |
| "loss": 1.6422, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0013090169943749475, | |
| "loss": 1.5535, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 0.0012954048942022001, | |
| "loss": 1.2324, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 0.0012817325568414298, | |
| "loss": 1.2613, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "learning_rate": 0.001268002770273379, | |
| "loss": 1.3293, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 0.0012542183341934872, | |
| "loss": 1.2852, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "learning_rate": 0.0012403820594409924, | |
| "loss": 1.3295, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "learning_rate": 0.0012264967674257645, | |
| "loss": 1.3287, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 0.0012125652895529767, | |
| "loss": 1.3566, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 0.0011985904666457455, | |
| "loss": 1.4414, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "learning_rate": 0.0011845751483658454, | |
| "loss": 1.3695, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "learning_rate": 0.0011705221926326238, | |
| "loss": 1.1363, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 0.001156434465040231, | |
| "loss": 1.1354, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "learning_rate": 0.0011423148382732854, | |
| "loss": 1.0725, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "learning_rate": 0.001128166191521093, | |
| "loss": 1.1754, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 0.0011139914098905405, | |
| "loss": 1.1848, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "learning_rate": 0.0010997933838177826, | |
| "loss": 1.2354, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "learning_rate": 0.0010855750084788399, | |
| "loss": 1.1984, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "learning_rate": 0.0010713391831992322, | |
| "loss": 1.2666, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "learning_rate": 0.001057088810862768, | |
| "loss": 1.1408, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 12.16, | |
| "learning_rate": 0.0010428267973196027, | |
| "loss": 0.9385, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 12.27, | |
| "learning_rate": 0.0010285560507936962, | |
| "loss": 1.0158, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 0.0010142794812897874, | |
| "loss": 0.9936, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 0.001, | |
| "loss": 0.9891, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "learning_rate": 0.000985720518710213, | |
| "loss": 1.0684, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 12.73, | |
| "learning_rate": 0.0009714439492063038, | |
| "loss": 1.076, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 12.84, | |
| "learning_rate": 0.0009571732026803976, | |
| "loss": 1.0609, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "learning_rate": 0.000942911189137232, | |
| "loss": 1.1297, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 13.07, | |
| "learning_rate": 0.0009286608168007677, | |
| "loss": 0.9342, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 13.18, | |
| "learning_rate": 0.0009144249915211606, | |
| "loss": 0.8511, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "learning_rate": 0.0009002066161822172, | |
| "loss": 0.8336, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 13.41, | |
| "learning_rate": 0.0008860085901094594, | |
| "loss": 0.8652, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "learning_rate": 0.0008718338084789072, | |
| "loss": 0.9744, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "learning_rate": 0.000857685161726715, | |
| "loss": 0.9006, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "learning_rate": 0.000843565534959769, | |
| "loss": 0.9619, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 13.86, | |
| "learning_rate": 0.0008294778073673762, | |
| "loss": 0.9123, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "learning_rate": 0.0008154248516341547, | |
| "loss": 0.9959, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 14.09, | |
| "learning_rate": 0.0008014095333542549, | |
| "loss": 0.7503, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 14.2, | |
| "learning_rate": 0.0007874347104470233, | |
| "loss": 0.7357, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "learning_rate": 0.0007735032325742355, | |
| "loss": 0.7477, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 14.43, | |
| "learning_rate": 0.0007596179405590076, | |
| "loss": 0.8088, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "learning_rate": 0.0007457816658065133, | |
| "loss": 0.7652, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 14.66, | |
| "learning_rate": 0.0007319972297266214, | |
| "loss": 0.7847, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "learning_rate": 0.0007182674431585703, | |
| "loss": 0.7984, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 14.89, | |
| "learning_rate": 0.0007045951057978, | |
| "loss": 0.8732, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.0006909830056250527, | |
| "loss": 0.8258, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 15.11, | |
| "learning_rate": 0.0006774339183378663, | |
| "loss": 0.6311, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 15.23, | |
| "learning_rate": 0.0006639506067845697, | |
| "loss": 0.6543, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "learning_rate": 0.0006505358204009018, | |
| "loss": 0.6421, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "learning_rate": 0.0006371922946493591, | |
| "loss": 0.6937, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 15.57, | |
| "learning_rate": 0.0006239227504614003, | |
| "loss": 0.6887, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 15.68, | |
| "learning_rate": 0.0006107298936826086, | |
| "loss": 0.7097, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 15.8, | |
| "learning_rate": 0.0005976164145209322, | |
| "loss": 0.6778, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 15.91, | |
| "learning_rate": 0.0005845849869981136, | |
| "loss": 0.7124, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 16.02, | |
| "learning_rate": 0.000571638268404419, | |
| "loss": 0.7053, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 16.14, | |
| "learning_rate": 0.0005587788987567784, | |
| "loss": 0.5863, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "learning_rate": 0.0005460095002604533, | |
| "loss": 0.5588, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "learning_rate": 0.0005333326767743263, | |
| "loss": 0.5363, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 16.48, | |
| "learning_rate": 0.0005207510132799435, | |
| "loss": 0.6137, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 16.59, | |
| "learning_rate": 0.0005082670753543961, | |
| "loss": 0.5606, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "learning_rate": 0.0004958834086471683, | |
| "loss": 0.629, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 16.82, | |
| "learning_rate": 0.00048360253836103817, | |
| "loss": 0.5754, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 16.93, | |
| "learning_rate": 0.0004714269687371581, | |
| "loss": 0.6239, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "learning_rate": 0.0004593591825444028, | |
| "loss": 0.5807, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 17.16, | |
| "learning_rate": 0.0004474016405730973, | |
| "loss": 0.465, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 17.27, | |
| "learning_rate": 0.00043555678113323104, | |
| "loss": 0.4871, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 0.00042382701955724725, | |
| "loss": 0.4623, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 0.00041221474770752696, | |
| "loss": 0.5059, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "learning_rate": 0.00040072233348865304, | |
| "loss": 0.5021, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 17.73, | |
| "learning_rate": 0.0003893521203645618, | |
| "loss": 0.5138, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 17.84, | |
| "learning_rate": 0.00037810642688067796, | |
| "loss": 0.5212, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "learning_rate": 0.00036698754619112975, | |
| "loss": 0.5611, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 18.07, | |
| "learning_rate": 0.00035599774559114475, | |
| "loss": 0.4956, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 18.18, | |
| "learning_rate": 0.000345139266054715, | |
| "loss": 0.4243, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 18.3, | |
| "learning_rate": 0.0003344143217776319, | |
| "loss": 0.4391, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "learning_rate": 0.00032382509972598086, | |
| "loss": 0.4627, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 18.52, | |
| "learning_rate": 0.0003133737591901864, | |
| "loss": 0.4208, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 18.64, | |
| "learning_rate": 0.0003030624313447067, | |
| "loss": 0.45, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "learning_rate": 0.00029289321881345256, | |
| "loss": 0.44, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 18.86, | |
| "learning_rate": 0.0002828681952410366, | |
| "loss": 0.4451, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "learning_rate": 0.0002729894048699265, | |
| "loss": 0.4494, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 19.09, | |
| "learning_rate": 0.00026325886212359495, | |
| "loss": 0.3839, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "learning_rate": 0.0002536785511957531, | |
| "loss": 0.3728, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 19.32, | |
| "learning_rate": 0.00024425042564574185, | |
| "loss": 0.4126, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 19.43, | |
| "learning_rate": 0.00023497640800017682, | |
| "loss": 0.4183, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 19.55, | |
| "learning_rate": 0.0002258583893609175, | |
| "loss": 0.3778, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 19.66, | |
| "learning_rate": 0.00021689822901944456, | |
| "loss": 0.3758, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 19.77, | |
| "learning_rate": 0.000208097754077725, | |
| "loss": 0.4034, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 19.89, | |
| "learning_rate": 0.0001994587590756397, | |
| "loss": 0.4085, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.00019098300562505265, | |
| "loss": 0.3673, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "learning_rate": 0.0001826722220505931, | |
| "loss": 0.363, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 20.23, | |
| "learning_rate": 0.000174528103037226, | |
| "loss": 0.3707, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 20.34, | |
| "learning_rate": 0.00016655230928468257, | |
| "loss": 0.369, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 20.45, | |
| "learning_rate": 0.00015874646716881869, | |
| "loss": 0.3528, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 20.57, | |
| "learning_rate": 0.00015111216840997744, | |
| "loss": 0.3581, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 20.68, | |
| "learning_rate": 0.00014365096974841107, | |
| "loss": 0.3466, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "learning_rate": 0.00013636439262684297, | |
| "loss": 0.3274, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 20.91, | |
| "learning_rate": 0.00012925392288022297, | |
| "loss": 0.3401, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 21.02, | |
| "learning_rate": 0.00012232101043274435, | |
| "loss": 0.3435, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 21.14, | |
| "learning_rate": 0.00011556706900218572, | |
| "loss": 0.2972, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "learning_rate": 0.00010899347581163222, | |
| "loss": 0.3153, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 21.36, | |
| "learning_rate": 0.00010260157130864178, | |
| "loss": 0.3315, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 21.48, | |
| "learning_rate": 9.639265889190829e-05, | |
| "loss": 0.3264, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 21.59, | |
| "learning_rate": 9.036800464548156e-05, | |
| "loss": 0.3427, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 21.7, | |
| "learning_rate": 8.4528837080594e-05, | |
| "loss": 0.3415, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 21.82, | |
| "learning_rate": 7.887634688515e-05, | |
| "loss": 0.323, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 21.93, | |
| "learning_rate": 7.341168668092857e-05, | |
| "loss": 0.2961, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 22.05, | |
| "learning_rate": 6.813597078854772e-05, | |
| "loss": 0.3276, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 22.16, | |
| "learning_rate": 6.305027500023842e-05, | |
| "loss": 0.3045, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 22.27, | |
| "learning_rate": 5.8155636360475384e-05, | |
| "loss": 0.3167, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 22.39, | |
| "learning_rate": 5.345305295450997e-05, | |
| "loss": 0.319, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 4.894348370484647e-05, | |
| "loss": 0.2852, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "learning_rate": 4.4627848175703315e-05, | |
| "loss": 0.3034, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "learning_rate": 4.050702638550274e-05, | |
| "loss": 0.2845, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 22.84, | |
| "learning_rate": 3.658185862742103e-05, | |
| "loss": 0.3136, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 22.95, | |
| "learning_rate": 3.285314529804295e-05, | |
| "loss": 0.3187, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 23.07, | |
| "learning_rate": 2.93216467341475e-05, | |
| "loss": 0.2907, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 23.18, | |
| "learning_rate": 2.5988083057666535e-05, | |
| "loss": 0.2955, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 23.3, | |
| "learning_rate": 2.2853134028840594e-05, | |
| "loss": 0.2785, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 23.41, | |
| "learning_rate": 1.9917438907606554e-05, | |
| "loss": 0.3369, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 23.52, | |
| "learning_rate": 1.7181596323244453e-05, | |
| "loss": 0.2837, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 23.64, | |
| "learning_rate": 1.4646164152307017e-05, | |
| "loss": 0.3002, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "learning_rate": 1.231165940486234e-05, | |
| "loss": 0.3062, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 23.86, | |
| "learning_rate": 1.0178558119067316e-05, | |
| "loss": 0.2859, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 23.98, | |
| "learning_rate": 8.247295264097288e-06, | |
| "loss": 0.284, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 24.09, | |
| "learning_rate": 6.518264651449779e-06, | |
| "loss": 0.2607, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 24.2, | |
| "learning_rate": 4.991818854640395e-06, | |
| "loss": 0.3164, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 24.32, | |
| "learning_rate": 3.6682691373086663e-06, | |
| "loss": 0.2597, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 24.43, | |
| "learning_rate": 2.5478853897464847e-06, | |
| "loss": 0.2907, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 24.55, | |
| "learning_rate": 1.630896073864352e-06, | |
| "loss": 0.3033, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 24.66, | |
| "learning_rate": 9.174881766043087e-07, | |
| "loss": 0.3089, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 24.77, | |
| "learning_rate": 4.078071718107701e-07, | |
| "loss": 0.2964, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 24.89, | |
| "learning_rate": 1.0195699056669839e-07, | |
| "loss": 0.2995, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.2936, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "step": 1100, | |
| "total_flos": 5.602696856046797e+17, | |
| "train_loss": 1.3768115234375, | |
| "train_runtime": 24197.7873, | |
| "train_samples_per_second": 0.724, | |
| "train_steps_per_second": 0.045 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 100, | |
| "total_flos": 5.602696856046797e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |