| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500.0, | |
| "global_step": 6237, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02405002405002405, | |
| "grad_norm": 0.4139963388442993, | |
| "learning_rate": 0.00019996828714700116, | |
| "loss": 1.5971, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0481000481000481, | |
| "grad_norm": 0.3423018157482147, | |
| "learning_rate": 0.00019987316870210547, | |
| "loss": 1.274, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07215007215007214, | |
| "grad_norm": 0.3551710247993469, | |
| "learning_rate": 0.0001997147049948582, | |
| "loss": 1.2519, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0962000962000962, | |
| "grad_norm": 0.32329073548316956, | |
| "learning_rate": 0.0001994929965319844, | |
| "loss": 1.2382, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12025012025012025, | |
| "grad_norm": 0.48585018515586853, | |
| "learning_rate": 0.0001992081839336419, | |
| "loss": 1.2293, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1443001443001443, | |
| "grad_norm": 0.40136224031448364, | |
| "learning_rate": 0.00019886044784423197, | |
| "loss": 1.2214, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16835016835016836, | |
| "grad_norm": 0.574002206325531, | |
| "learning_rate": 0.00019845000881782432, | |
| "loss": 1.2184, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1924001924001924, | |
| "grad_norm": 0.4179827570915222, | |
| "learning_rate": 0.00019797712717826914, | |
| "loss": 1.2064, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21645021645021645, | |
| "grad_norm": 0.33033809065818787, | |
| "learning_rate": 0.00019744210285408488, | |
| "loss": 1.2055, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2405002405002405, | |
| "grad_norm": 0.2719138562679291, | |
| "learning_rate": 0.0001968452751882264, | |
| "loss": 1.2077, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26455026455026454, | |
| "grad_norm": 0.29797521233558655, | |
| "learning_rate": 0.00019618702272285434, | |
| "loss": 1.2096, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2886002886002886, | |
| "grad_norm": 0.3336372673511505, | |
| "learning_rate": 0.00019546776295924212, | |
| "loss": 1.2072, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3126503126503126, | |
| "grad_norm": 0.26755037903785706, | |
| "learning_rate": 0.0001946879520929728, | |
| "loss": 1.1974, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3367003367003367, | |
| "grad_norm": 0.36268576979637146, | |
| "learning_rate": 0.00019384808472459368, | |
| "loss": 1.2045, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.36075036075036077, | |
| "grad_norm": 0.3121575713157654, | |
| "learning_rate": 0.0001929486935459127, | |
| "loss": 1.1889, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3848003848003848, | |
| "grad_norm": 0.3159404993057251, | |
| "learning_rate": 0.00019199034900213452, | |
| "loss": 1.1921, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.40885040885040885, | |
| "grad_norm": 0.7236579060554504, | |
| "learning_rate": 0.000190973658930052, | |
| "loss": 1.194, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4329004329004329, | |
| "grad_norm": 0.24907168745994568, | |
| "learning_rate": 0.00018989926817252113, | |
| "loss": 1.191, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.45695045695045694, | |
| "grad_norm": 0.24481187760829926, | |
| "learning_rate": 0.00018876785816946505, | |
| "loss": 1.1857, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.481000481000481, | |
| "grad_norm": 0.2668200731277466, | |
| "learning_rate": 0.00018758014652566597, | |
| "loss": 1.1957, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 0.2687171399593353, | |
| "learning_rate": 0.0001863368865556191, | |
| "loss": 1.1864, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5291005291005291, | |
| "grad_norm": 0.23915782570838928, | |
| "learning_rate": 0.0001850388668057379, | |
| "loss": 1.184, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5531505531505532, | |
| "grad_norm": 0.37159469723701477, | |
| "learning_rate": 0.0001836869105542127, | |
| "loss": 1.1849, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5772005772005772, | |
| "grad_norm": 0.2752649784088135, | |
| "learning_rate": 0.0001822818752888408, | |
| "loss": 1.1843, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6012506012506013, | |
| "grad_norm": 0.19733025133609772, | |
| "learning_rate": 0.00018082465216315882, | |
| "loss": 1.1766, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6253006253006252, | |
| "grad_norm": 0.2180165797472, | |
| "learning_rate": 0.00017931616543122214, | |
| "loss": 1.1865, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6493506493506493, | |
| "grad_norm": 0.25025510787963867, | |
| "learning_rate": 0.00017775737186139038, | |
| "loss": 1.1723, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6734006734006734, | |
| "grad_norm": 0.2865007817745209, | |
| "learning_rate": 0.00017614926012949028, | |
| "loss": 1.172, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6974506974506974, | |
| "grad_norm": 0.3406023681163788, | |
| "learning_rate": 0.00017449285019174098, | |
| "loss": 1.1795, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7215007215007215, | |
| "grad_norm": 0.19766800105571747, | |
| "learning_rate": 0.00017278919263783978, | |
| "loss": 1.1784, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7455507455507455, | |
| "grad_norm": 0.1965962052345276, | |
| "learning_rate": 0.00017103936802461797, | |
| "loss": 1.1754, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7696007696007696, | |
| "grad_norm": 0.2381555736064911, | |
| "learning_rate": 0.00016924448619069023, | |
| "loss": 1.1671, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 0.20156389474868774, | |
| "learning_rate": 0.00016740568555253155, | |
| "loss": 1.1738, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8177008177008177, | |
| "grad_norm": 0.18294361233711243, | |
| "learning_rate": 0.00016552413238242857, | |
| "loss": 1.1727, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8417508417508418, | |
| "grad_norm": 0.2975623309612274, | |
| "learning_rate": 0.00016360102006876317, | |
| "loss": 1.1677, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8658008658008658, | |
| "grad_norm": 0.1871371865272522, | |
| "learning_rate": 0.0001616375683590974, | |
| "loss": 1.1689, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8898508898508899, | |
| "grad_norm": 0.21457934379577637, | |
| "learning_rate": 0.00015963502258654005, | |
| "loss": 1.1605, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9139009139009139, | |
| "grad_norm": 0.20261706411838531, | |
| "learning_rate": 0.0001575946528798853, | |
| "loss": 1.1627, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.937950937950938, | |
| "grad_norm": 0.17685186862945557, | |
| "learning_rate": 0.0001555177533580245, | |
| "loss": 1.1627, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.962000962000962, | |
| "grad_norm": 0.212468221783638, | |
| "learning_rate": 0.00015340564130914233, | |
| "loss": 1.161, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9860509860509861, | |
| "grad_norm": 0.175174742937088, | |
| "learning_rate": 0.00015125965635521724, | |
| "loss": 1.1688, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 0.19970253109931946, | |
| "learning_rate": 0.00014908115960235682, | |
| "loss": 1.142, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.034151034151034, | |
| "grad_norm": 0.21254608035087585, | |
| "learning_rate": 0.00014687153277750676, | |
| "loss": 1.1271, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.0582010582010581, | |
| "grad_norm": 0.1651500016450882, | |
| "learning_rate": 0.00014463217735208062, | |
| "loss": 1.121, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.0822510822510822, | |
| "grad_norm": 0.2405405044555664, | |
| "learning_rate": 0.00014236451365306674, | |
| "loss": 1.1313, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.1063011063011063, | |
| "grad_norm": 0.17223596572875977, | |
| "learning_rate": 0.00014006997996217593, | |
| "loss": 1.1344, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.1303511303511304, | |
| "grad_norm": 0.1969347894191742, | |
| "learning_rate": 0.00013775003160360096, | |
| "loss": 1.1176, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.1544011544011543, | |
| "grad_norm": 0.187143936753273, | |
| "learning_rate": 0.00013540614002096701, | |
| "loss": 1.1322, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.1784511784511784, | |
| "grad_norm": 0.1838238537311554, | |
| "learning_rate": 0.00013303979184405826, | |
| "loss": 1.1293, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.2025012025012025, | |
| "grad_norm": 0.17928341031074524, | |
| "learning_rate": 0.00013065248794591223, | |
| "loss": 1.1268, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2265512265512266, | |
| "grad_norm": 0.2683047950267792, | |
| "learning_rate": 0.00012824574249088063, | |
| "loss": 1.1234, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.2506012506012505, | |
| "grad_norm": 0.18034860491752625, | |
| "learning_rate": 0.0001258210819742599, | |
| "loss": 1.125, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.2746512746512746, | |
| "grad_norm": 0.26357391476631165, | |
| "learning_rate": 0.00012338004425410074, | |
| "loss": 1.1217, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.2987012987012987, | |
| "grad_norm": 0.17828579246997833, | |
| "learning_rate": 0.00012092417757581085, | |
| "loss": 1.1262, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.3227513227513228, | |
| "grad_norm": 0.20247310400009155, | |
| "learning_rate": 0.00011845503959016928, | |
| "loss": 1.1246, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.3468013468013469, | |
| "grad_norm": 0.17381271719932556, | |
| "learning_rate": 0.0001159741963653755, | |
| "loss": 1.1181, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.370851370851371, | |
| "grad_norm": 0.19958114624023438, | |
| "learning_rate": 0.00011348322139375948, | |
| "loss": 1.1307, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.3949013949013949, | |
| "grad_norm": 0.21912401914596558, | |
| "learning_rate": 0.00011098369459378328, | |
| "loss": 1.1264, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.418951418951419, | |
| "grad_norm": 0.1694297194480896, | |
| "learning_rate": 0.00010847720130796631, | |
| "loss": 1.1256, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.443001443001443, | |
| "grad_norm": 0.13446395099163055, | |
| "learning_rate": 0.00010596533129737092, | |
| "loss": 1.1258, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.467051467051467, | |
| "grad_norm": 0.140371173620224, | |
| "learning_rate": 0.00010344967773328507, | |
| "loss": 1.1191, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.491101491101491, | |
| "grad_norm": 0.18016813695430756, | |
| "learning_rate": 0.00010093183618674224, | |
| "loss": 1.114, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 0.17306862771511078, | |
| "learning_rate": 9.84134036165192e-05, | |
| "loss": 1.1149, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.5392015392015392, | |
| "grad_norm": 0.14116255939006805, | |
| "learning_rate": 9.589597735625377e-05, | |
| "loss": 1.123, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.5632515632515633, | |
| "grad_norm": 0.16819800436496735, | |
| "learning_rate": 9.338115410132441e-05, | |
| "loss": 1.1203, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 0.21958529949188232, | |
| "learning_rate": 9.087052889613518e-05, | |
| "loss": 1.1226, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.6113516113516113, | |
| "grad_norm": 0.15786272287368774, | |
| "learning_rate": 8.836569412244745e-05, | |
| "loss": 1.1212, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.6354016354016354, | |
| "grad_norm": 0.17366796731948853, | |
| "learning_rate": 8.586823848940047e-05, | |
| "loss": 1.1129, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.6594516594516593, | |
| "grad_norm": 0.21448016166687012, | |
| "learning_rate": 8.337974602586152e-05, | |
| "loss": 1.1216, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "grad_norm": 0.17243099212646484, | |
| "learning_rate": 8.090179507574427e-05, | |
| "loss": 1.1096, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.7075517075517075, | |
| "grad_norm": 0.1429734081029892, | |
| "learning_rate": 7.843595729693316e-05, | |
| "loss": 1.1071, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.7316017316017316, | |
| "grad_norm": 0.15200386941432953, | |
| "learning_rate": 7.598379666444808e-05, | |
| "loss": 1.1158, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.7556517556517557, | |
| "grad_norm": 0.1442406326532364, | |
| "learning_rate": 7.354686847848242e-05, | |
| "loss": 1.112, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.7797017797017798, | |
| "grad_norm": 0.17678239941596985, | |
| "learning_rate": 7.11267183779428e-05, | |
| "loss": 1.1118, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.8037518037518039, | |
| "grad_norm": 0.147593155503273, | |
| "learning_rate": 6.872488136011667e-05, | |
| "loss": 1.1165, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.8278018278018278, | |
| "grad_norm": 0.1334652155637741, | |
| "learning_rate": 6.634288080708952e-05, | |
| "loss": 1.1135, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 0.14890378713607788, | |
| "learning_rate": 6.398222751952899e-05, | |
| "loss": 1.1086, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.8759018759018757, | |
| "grad_norm": 0.1334807574748993, | |
| "learning_rate": 6.164441875844882e-05, | |
| "loss": 1.1144, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.8999518999518998, | |
| "grad_norm": 0.12897680699825287, | |
| "learning_rate": 5.933093729556062e-05, | |
| "loss": 1.1116, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.924001924001924, | |
| "grad_norm": 0.17530564963817596, | |
| "learning_rate": 5.7043250472815356e-05, | |
| "loss": 1.1039, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.948051948051948, | |
| "grad_norm": 0.15966495871543884, | |
| "learning_rate": 5.478280927173145e-05, | |
| "loss": 1.101, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.9721019721019721, | |
| "grad_norm": 0.18890446424484253, | |
| "learning_rate": 5.255104739309924e-05, | |
| "loss": 1.1077, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.9961519961519962, | |
| "grad_norm": 0.1547369807958603, | |
| "learning_rate": 5.0349380347646494e-05, | |
| "loss": 1.103, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 0.13888758420944214, | |
| "learning_rate": 4.8179204558240444e-05, | |
| "loss": 1.0826, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.0442520442520444, | |
| "grad_norm": 0.11266086250543594, | |
| "learning_rate": 4.6041896474197e-05, | |
| "loss": 1.071, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.068302068302068, | |
| "grad_norm": 0.14245671033859253, | |
| "learning_rate": 4.393881169825779e-05, | |
| "loss": 1.0759, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.092352092352092, | |
| "grad_norm": 0.1226249411702156, | |
| "learning_rate": 4.187128412678969e-05, | |
| "loss": 1.0742, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.1164021164021163, | |
| "grad_norm": 0.12307476997375488, | |
| "learning_rate": 3.984062510375155e-05, | |
| "loss": 1.0721, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.1404521404521404, | |
| "grad_norm": 0.12813834846019745, | |
| "learning_rate": 3.7848122588965144e-05, | |
| "loss": 1.0726, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.1645021645021645, | |
| "grad_norm": 0.13432885706424713, | |
| "learning_rate": 3.5895040341217543e-05, | |
| "loss": 1.0745, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.1885521885521886, | |
| "grad_norm": 0.11649097502231598, | |
| "learning_rate": 3.398261711671309e-05, | |
| "loss": 1.079, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.2126022126022127, | |
| "grad_norm": 0.11140163242816925, | |
| "learning_rate": 3.211206588338358e-05, | |
| "loss": 1.0748, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.236652236652237, | |
| "grad_norm": 0.10978424549102783, | |
| "learning_rate": 3.028457305155483e-05, | |
| "loss": 1.0726, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.260702260702261, | |
| "grad_norm": 0.11395589262247086, | |
| "learning_rate": 2.8501297721457422e-05, | |
| "loss": 1.0656, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.284752284752285, | |
| "grad_norm": 0.10599405318498611, | |
| "learning_rate": 2.6763370948059353e-05, | |
| "loss": 1.0765, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.3088023088023086, | |
| "grad_norm": 0.11157254874706268, | |
| "learning_rate": 2.5071895023686442e-05, | |
| "loss": 1.0726, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.3328523328523327, | |
| "grad_norm": 0.1390163153409958, | |
| "learning_rate": 2.342794277888547e-05, | |
| "loss": 1.0731, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.356902356902357, | |
| "grad_norm": 0.1519329994916916, | |
| "learning_rate": 2.1832556901973965e-05, | |
| "loss": 1.0704, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.1278182566165924, | |
| "learning_rate": 2.0286749277707782e-05, | |
| "loss": 1.0661, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.405002405002405, | |
| "grad_norm": 0.10508263111114502, | |
| "learning_rate": 1.879150034548588e-05, | |
| "loss": 1.0758, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.429052429052429, | |
| "grad_norm": 0.09690719097852707, | |
| "learning_rate": 1.7347758477500044e-05, | |
| "loss": 1.0644, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.4531024531024532, | |
| "grad_norm": 0.10174595564603806, | |
| "learning_rate": 1.5956439377222798e-05, | |
| "loss": 1.0726, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.4771524771524773, | |
| "grad_norm": 0.10294167697429657, | |
| "learning_rate": 1.4618425498616162e-05, | |
| "loss": 1.0655, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.501202501202501, | |
| "grad_norm": 0.11103129386901855, | |
| "learning_rate": 1.3334565486428996e-05, | |
| "loss": 1.0651, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.525252525252525, | |
| "grad_norm": 0.10614852607250214, | |
| "learning_rate": 1.2105673637938053e-05, | |
| "loss": 1.0701, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.549302549302549, | |
| "grad_norm": 0.09437720477581024, | |
| "learning_rate": 1.0932529386474188e-05, | |
| "loss": 1.0673, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.5733525733525733, | |
| "grad_norm": 0.0965106412768364, | |
| "learning_rate": 9.815876807061264e-06, | |
| "loss": 1.0769, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.5974025974025974, | |
| "grad_norm": 0.09335634112358093, | |
| "learning_rate": 8.756424144481312e-06, | |
| "loss": 1.0646, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.6214526214526215, | |
| "grad_norm": 0.09890544414520264, | |
| "learning_rate": 7.75484336406529e-06, | |
| "loss": 1.0757, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.6455026455026456, | |
| "grad_norm": 0.09670912474393845, | |
| "learning_rate": 6.8117697254943106e-06, | |
| "loss": 1.0668, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.6695526695526697, | |
| "grad_norm": 0.09898468106985092, | |
| "learning_rate": 5.927801379881714e-06, | |
| "loss": 1.0745, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.6936026936026938, | |
| "grad_norm": 0.08697386831045151, | |
| "learning_rate": 5.103498990391509e-06, | |
| "loss": 1.0653, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.717652717652718, | |
| "grad_norm": 0.09457134455442429, | |
| "learning_rate": 4.339385376633775e-06, | |
| "loss": 1.0678, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.741702741702742, | |
| "grad_norm": 0.09092475473880768, | |
| "learning_rate": 3.6359451830626723e-06, | |
| "loss": 1.0635, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.7657527657527656, | |
| "grad_norm": 0.08736653625965118, | |
| "learning_rate": 2.993624571587239e-06, | |
| "loss": 1.0639, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.7898027898027897, | |
| "grad_norm": 0.09138292819261551, | |
| "learning_rate": 2.4128309385900717e-06, | |
| "loss": 1.065, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.813852813852814, | |
| "grad_norm": 0.08842656016349792, | |
| "learning_rate": 1.8939326565333037e-06, | |
| "loss": 1.0636, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.837902837902838, | |
| "grad_norm": 0.08870802819728851, | |
| "learning_rate": 1.437258840315714e-06, | |
| "loss": 1.0706, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.861952861952862, | |
| "grad_norm": 0.08659425377845764, | |
| "learning_rate": 1.0430991385293575e-06, | |
| "loss": 1.0673, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.886002886002886, | |
| "grad_norm": 0.08142086863517761, | |
| "learning_rate": 7.117035497478553e-07, | |
| "loss": 1.0697, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.91005291005291, | |
| "grad_norm": 0.080448217689991, | |
| "learning_rate": 4.432822639630407e-07, | |
| "loss": 1.0655, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.934102934102934, | |
| "grad_norm": 0.08980288356542587, | |
| "learning_rate": 2.380055292704575e-07, | |
| "loss": 1.0701, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.958152958152958, | |
| "grad_norm": 0.08309097588062286, | |
| "learning_rate": 9.600354388833443e-08, | |
| "loss": 1.0684, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.982202982202982, | |
| "grad_norm": 0.08456841111183167, | |
| "learning_rate": 1.7366373578442397e-08, | |
| "loss": 1.0684, | |
| "step": 6200 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 6237, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.056700790948663e+20, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |