| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9986348122866895, | |
| "eval_steps": 500, | |
| "global_step": 366, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005460750853242321, | |
| "grad_norm": 3.971062381322324, | |
| "learning_rate": 2.702702702702703e-07, | |
| "loss": 0.5016, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010921501706484642, | |
| "grad_norm": 4.14826323264813, | |
| "learning_rate": 5.405405405405406e-07, | |
| "loss": 0.5208, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.016382252559726963, | |
| "grad_norm": 3.745507693250097, | |
| "learning_rate": 8.108108108108109e-07, | |
| "loss": 0.4379, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.021843003412969283, | |
| "grad_norm": 3.8553920899968372, | |
| "learning_rate": 1.0810810810810812e-06, | |
| "loss": 0.4602, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.027303754266211604, | |
| "grad_norm": 4.006658163193665, | |
| "learning_rate": 1.3513513513513515e-06, | |
| "loss": 0.4578, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.032764505119453925, | |
| "grad_norm": 3.0693234037079433, | |
| "learning_rate": 1.6216216216216219e-06, | |
| "loss": 0.4416, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03822525597269624, | |
| "grad_norm": 2.3221220798320386, | |
| "learning_rate": 1.8918918918918922e-06, | |
| "loss": 0.4635, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04368600682593857, | |
| "grad_norm": 1.9622941289180669, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 0.4471, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.049146757679180884, | |
| "grad_norm": 1.5785046223376498, | |
| "learning_rate": 2.432432432432433e-06, | |
| "loss": 0.3669, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05460750853242321, | |
| "grad_norm": 1.592167130140165, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 0.4232, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.060068259385665526, | |
| "grad_norm": 1.4756853657479083, | |
| "learning_rate": 2.9729729729729736e-06, | |
| "loss": 0.4105, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06552901023890785, | |
| "grad_norm": 1.327708040902143, | |
| "learning_rate": 3.2432432432432437e-06, | |
| "loss": 0.386, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07098976109215017, | |
| "grad_norm": 1.5458703208762807, | |
| "learning_rate": 3.513513513513514e-06, | |
| "loss": 0.405, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07645051194539249, | |
| "grad_norm": 1.6200361209703527, | |
| "learning_rate": 3.7837837837837844e-06, | |
| "loss": 0.3908, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08191126279863481, | |
| "grad_norm": 1.5715327605819764, | |
| "learning_rate": 4.0540540540540545e-06, | |
| "loss": 0.3972, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08737201365187713, | |
| "grad_norm": 1.2301811554389595, | |
| "learning_rate": 4.324324324324325e-06, | |
| "loss": 0.3739, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09283276450511946, | |
| "grad_norm": 1.0413606131616007, | |
| "learning_rate": 4.594594594594596e-06, | |
| "loss": 0.3521, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09829351535836177, | |
| "grad_norm": 1.1239884207253636, | |
| "learning_rate": 4.864864864864866e-06, | |
| "loss": 0.4348, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1037542662116041, | |
| "grad_norm": 1.1123432515923368, | |
| "learning_rate": 5.135135135135135e-06, | |
| "loss": 0.3949, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10921501706484642, | |
| "grad_norm": 1.0996938196641266, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 0.3775, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11467576791808874, | |
| "grad_norm": 1.0868866085505373, | |
| "learning_rate": 5.675675675675676e-06, | |
| "loss": 0.3721, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.12013651877133105, | |
| "grad_norm": 1.10559810934531, | |
| "learning_rate": 5.945945945945947e-06, | |
| "loss": 0.3884, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12559726962457338, | |
| "grad_norm": 1.0187684787484814, | |
| "learning_rate": 6.2162162162162164e-06, | |
| "loss": 0.394, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1310580204778157, | |
| "grad_norm": 0.9515401547070604, | |
| "learning_rate": 6.486486486486487e-06, | |
| "loss": 0.3664, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13651877133105803, | |
| "grad_norm": 0.9873275768348283, | |
| "learning_rate": 6.7567567567567575e-06, | |
| "loss": 0.3872, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14197952218430035, | |
| "grad_norm": 0.9420302821261468, | |
| "learning_rate": 7.027027027027028e-06, | |
| "loss": 0.3926, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14744027303754267, | |
| "grad_norm": 0.8628951646680264, | |
| "learning_rate": 7.297297297297298e-06, | |
| "loss": 0.3395, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15290102389078497, | |
| "grad_norm": 0.8883050014456254, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 0.3692, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.1583617747440273, | |
| "grad_norm": 0.9314104245334247, | |
| "learning_rate": 7.837837837837838e-06, | |
| "loss": 0.3562, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16382252559726962, | |
| "grad_norm": 0.8388999546883599, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 0.3291, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16928327645051194, | |
| "grad_norm": 0.9110394289660935, | |
| "learning_rate": 8.378378378378378e-06, | |
| "loss": 0.3761, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.17474402730375427, | |
| "grad_norm": 0.8529619793059433, | |
| "learning_rate": 8.64864864864865e-06, | |
| "loss": 0.3634, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1802047781569966, | |
| "grad_norm": 1.034615680095172, | |
| "learning_rate": 8.91891891891892e-06, | |
| "loss": 0.4073, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.18566552901023892, | |
| "grad_norm": 0.9654399340446536, | |
| "learning_rate": 9.189189189189191e-06, | |
| "loss": 0.3832, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.19112627986348124, | |
| "grad_norm": 0.8266008406999349, | |
| "learning_rate": 9.45945945945946e-06, | |
| "loss": 0.366, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.19658703071672354, | |
| "grad_norm": 1.0298041047732736, | |
| "learning_rate": 9.729729729729732e-06, | |
| "loss": 0.3828, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.20204778156996586, | |
| "grad_norm": 1.0253984164765952, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4253, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2075085324232082, | |
| "grad_norm": 0.845565254392157, | |
| "learning_rate": 9.999772047343259e-06, | |
| "loss": 0.3426, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.2129692832764505, | |
| "grad_norm": 0.9194984294487474, | |
| "learning_rate": 9.999088210158001e-06, | |
| "loss": 0.343, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.21843003412969283, | |
| "grad_norm": 0.8148322661922577, | |
| "learning_rate": 9.997948550797227e-06, | |
| "loss": 0.325, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22389078498293516, | |
| "grad_norm": 0.9623977105015672, | |
| "learning_rate": 9.99635317317629e-06, | |
| "loss": 0.385, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.22935153583617748, | |
| "grad_norm": 0.814300333357098, | |
| "learning_rate": 9.994302222763415e-06, | |
| "loss": 0.3462, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.2348122866894198, | |
| "grad_norm": 0.8725030955526336, | |
| "learning_rate": 9.991795886566443e-06, | |
| "loss": 0.3401, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2402730375426621, | |
| "grad_norm": 0.9974480242764955, | |
| "learning_rate": 9.988834393115768e-06, | |
| "loss": 0.3424, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.24573378839590443, | |
| "grad_norm": 0.8805730275929089, | |
| "learning_rate": 9.98541801244351e-06, | |
| "loss": 0.3742, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.25119453924914675, | |
| "grad_norm": 0.8332420506302001, | |
| "learning_rate": 9.981547056058893e-06, | |
| "loss": 0.3435, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2566552901023891, | |
| "grad_norm": 0.9445729244701234, | |
| "learning_rate": 9.977221876919833e-06, | |
| "loss": 0.3442, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2621160409556314, | |
| "grad_norm": 0.859922027597315, | |
| "learning_rate": 9.97244286940076e-06, | |
| "loss": 0.358, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2675767918088737, | |
| "grad_norm": 0.8022442917536148, | |
| "learning_rate": 9.967210469256657e-06, | |
| "loss": 0.3329, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.27303754266211605, | |
| "grad_norm": 0.8369993999252197, | |
| "learning_rate": 9.961525153583327e-06, | |
| "loss": 0.3474, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2784982935153584, | |
| "grad_norm": 0.8719055464818419, | |
| "learning_rate": 9.955387440773902e-06, | |
| "loss": 0.3364, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2839590443686007, | |
| "grad_norm": 0.9269845480680101, | |
| "learning_rate": 9.948797890471552e-06, | |
| "loss": 0.3684, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.289419795221843, | |
| "grad_norm": 0.8246571303849338, | |
| "learning_rate": 9.94175710351848e-06, | |
| "loss": 0.3564, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.29488054607508535, | |
| "grad_norm": 0.9162125135698432, | |
| "learning_rate": 9.93426572190112e-06, | |
| "loss": 0.3526, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.3003412969283277, | |
| "grad_norm": 0.9689985766932336, | |
| "learning_rate": 9.926324428691612e-06, | |
| "loss": 0.3825, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.30580204778156994, | |
| "grad_norm": 0.9203465649703365, | |
| "learning_rate": 9.917933947985508e-06, | |
| "loss": 0.3492, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.31126279863481227, | |
| "grad_norm": 0.810691112658576, | |
| "learning_rate": 9.909095044835755e-06, | |
| "loss": 0.3147, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3167235494880546, | |
| "grad_norm": 0.8980168883992854, | |
| "learning_rate": 9.899808525182935e-06, | |
| "loss": 0.3351, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3221843003412969, | |
| "grad_norm": 0.8843165617295874, | |
| "learning_rate": 9.89007523578178e-06, | |
| "loss": 0.3452, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.32764505119453924, | |
| "grad_norm": 0.8660715276442186, | |
| "learning_rate": 9.879896064123961e-06, | |
| "loss": 0.3601, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.33310580204778156, | |
| "grad_norm": 0.8638898824914902, | |
| "learning_rate": 9.869271938357168e-06, | |
| "loss": 0.3565, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3385665529010239, | |
| "grad_norm": 0.8349466672789928, | |
| "learning_rate": 9.858203827200477e-06, | |
| "loss": 0.3592, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.3440273037542662, | |
| "grad_norm": 0.9346433422616252, | |
| "learning_rate": 9.846692739856023e-06, | |
| "loss": 0.3935, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.34948805460750854, | |
| "grad_norm": 0.8287634034991234, | |
| "learning_rate": 9.834739725916988e-06, | |
| "loss": 0.3089, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.35494880546075086, | |
| "grad_norm": 0.8040217244181859, | |
| "learning_rate": 9.822345875271884e-06, | |
| "loss": 0.313, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3604095563139932, | |
| "grad_norm": 0.8053513263090958, | |
| "learning_rate": 9.80951231800518e-06, | |
| "loss": 0.3355, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3658703071672355, | |
| "grad_norm": 0.7533298814162714, | |
| "learning_rate": 9.79624022429427e-06, | |
| "loss": 0.3067, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.37133105802047783, | |
| "grad_norm": 0.9386782501271983, | |
| "learning_rate": 9.782530804302763e-06, | |
| "loss": 0.3593, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.37679180887372016, | |
| "grad_norm": 0.8507056335702303, | |
| "learning_rate": 9.768385308070139e-06, | |
| "loss": 0.3629, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3822525597269625, | |
| "grad_norm": 0.782049564136347, | |
| "learning_rate": 9.75380502539778e-06, | |
| "loss": 0.3458, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.38771331058020475, | |
| "grad_norm": 0.9109652113851044, | |
| "learning_rate": 9.738791285731353e-06, | |
| "loss": 0.348, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3931740614334471, | |
| "grad_norm": 0.8457379953081087, | |
| "learning_rate": 9.723345458039595e-06, | |
| "loss": 0.3701, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3986348122866894, | |
| "grad_norm": 0.8354411100444213, | |
| "learning_rate": 9.70746895068949e-06, | |
| "loss": 0.3453, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4040955631399317, | |
| "grad_norm": 0.7824544054631952, | |
| "learning_rate": 9.691163211317853e-06, | |
| "loss": 0.3393, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.40955631399317405, | |
| "grad_norm": 0.7752036290890001, | |
| "learning_rate": 9.674429726699324e-06, | |
| "loss": 0.3121, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4150170648464164, | |
| "grad_norm": 0.9047037383020493, | |
| "learning_rate": 9.657270022610814e-06, | |
| "loss": 0.3507, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4204778156996587, | |
| "grad_norm": 0.8453635648693023, | |
| "learning_rate": 9.63968566369238e-06, | |
| "loss": 0.3641, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.425938566552901, | |
| "grad_norm": 0.8290743120901927, | |
| "learning_rate": 9.62167825330455e-06, | |
| "loss": 0.3739, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.43139931740614335, | |
| "grad_norm": 0.8977215293449932, | |
| "learning_rate": 9.603249433382145e-06, | |
| "loss": 0.3185, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.43686006825938567, | |
| "grad_norm": 0.9078617748361664, | |
| "learning_rate": 9.584400884284546e-06, | |
| "loss": 0.3415, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.442320819112628, | |
| "grad_norm": 0.8589830385419883, | |
| "learning_rate": 9.565134324642491e-06, | |
| "loss": 0.3331, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.4477815699658703, | |
| "grad_norm": 0.804380018393787, | |
| "learning_rate": 9.545451511201365e-06, | |
| "loss": 0.322, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.45324232081911264, | |
| "grad_norm": 0.8685230840996425, | |
| "learning_rate": 9.52535423866101e-06, | |
| "loss": 0.3476, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.45870307167235497, | |
| "grad_norm": 0.9643956240091752, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 0.3671, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.4641638225255973, | |
| "grad_norm": 0.8997894029115073, | |
| "learning_rate": 9.483923683869025e-06, | |
| "loss": 0.352, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4696245733788396, | |
| "grad_norm": 0.9409163478885427, | |
| "learning_rate": 9.462594179299408e-06, | |
| "loss": 0.3533, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4750853242320819, | |
| "grad_norm": 1.0349789755076704, | |
| "learning_rate": 9.440857770650139e-06, | |
| "loss": 0.3501, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4805460750853242, | |
| "grad_norm": 0.7719492270463393, | |
| "learning_rate": 9.418716439870056e-06, | |
| "loss": 0.3092, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.48600682593856653, | |
| "grad_norm": 0.9082886643398166, | |
| "learning_rate": 9.396172205829235e-06, | |
| "loss": 0.3514, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.49146757679180886, | |
| "grad_norm": 0.8358389654564478, | |
| "learning_rate": 9.373227124134888e-06, | |
| "loss": 0.3489, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4969283276450512, | |
| "grad_norm": 0.8974753686960236, | |
| "learning_rate": 9.349883286943951e-06, | |
| "loss": 0.3632, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5023890784982935, | |
| "grad_norm": 0.8827201059716774, | |
| "learning_rate": 9.326142822772301e-06, | |
| "loss": 0.3584, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5078498293515359, | |
| "grad_norm": 0.813182991570662, | |
| "learning_rate": 9.302007896300697e-06, | |
| "loss": 0.3591, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5133105802047782, | |
| "grad_norm": 0.7442842781039997, | |
| "learning_rate": 9.27748070817738e-06, | |
| "loss": 0.3143, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5187713310580204, | |
| "grad_norm": 0.906866423901588, | |
| "learning_rate": 9.252563494817426e-06, | |
| "loss": 0.3772, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5242320819112628, | |
| "grad_norm": 0.7894206448318375, | |
| "learning_rate": 9.227258528198832e-06, | |
| "loss": 0.3131, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5296928327645051, | |
| "grad_norm": 0.8009536933279702, | |
| "learning_rate": 9.201568115655343e-06, | |
| "loss": 0.329, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5351535836177475, | |
| "grad_norm": 0.8048929927509286, | |
| "learning_rate": 9.175494599666078e-06, | |
| "loss": 0.3278, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5406143344709897, | |
| "grad_norm": 0.814222453793431, | |
| "learning_rate": 9.14904035764193e-06, | |
| "loss": 0.3225, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5460750853242321, | |
| "grad_norm": 0.8732367458543802, | |
| "learning_rate": 9.122207801708802e-06, | |
| "loss": 0.3524, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5515358361774744, | |
| "grad_norm": 0.8134905761183274, | |
| "learning_rate": 9.094999378487659e-06, | |
| "loss": 0.3546, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5569965870307167, | |
| "grad_norm": 0.8567463415353727, | |
| "learning_rate": 9.067417568871444e-06, | |
| "loss": 0.3548, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.562457337883959, | |
| "grad_norm": 0.8151562254810784, | |
| "learning_rate": 9.03946488779887e-06, | |
| "loss": 0.3439, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5679180887372014, | |
| "grad_norm": 0.8746438505757359, | |
| "learning_rate": 9.0111438840251e-06, | |
| "loss": 0.3242, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5733788395904437, | |
| "grad_norm": 0.8085121266810896, | |
| "learning_rate": 8.982457139889358e-06, | |
| "loss": 0.3598, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.578839590443686, | |
| "grad_norm": 0.8191769217039168, | |
| "learning_rate": 8.953407271079456e-06, | |
| "loss": 0.3425, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5843003412969283, | |
| "grad_norm": 0.874872463262842, | |
| "learning_rate": 8.923996926393306e-06, | |
| "loss": 0.3795, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5897610921501707, | |
| "grad_norm": 0.8469769243731713, | |
| "learning_rate": 8.894228787497389e-06, | |
| "loss": 0.3555, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.595221843003413, | |
| "grad_norm": 0.7907533312057188, | |
| "learning_rate": 8.864105568682245e-06, | |
| "loss": 0.3425, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.6006825938566553, | |
| "grad_norm": 0.9105392675920642, | |
| "learning_rate": 8.833630016614976e-06, | |
| "loss": 0.3214, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6061433447098976, | |
| "grad_norm": 0.7743632593675985, | |
| "learning_rate": 8.80280491008881e-06, | |
| "loss": 0.3477, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6116040955631399, | |
| "grad_norm": 0.9007334854740756, | |
| "learning_rate": 8.771633059769712e-06, | |
| "loss": 0.3836, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6170648464163823, | |
| "grad_norm": 0.810760704066922, | |
| "learning_rate": 8.740117307940123e-06, | |
| "loss": 0.3397, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6225255972696245, | |
| "grad_norm": 0.9072711750424595, | |
| "learning_rate": 8.708260528239788e-06, | |
| "loss": 0.3389, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6279863481228669, | |
| "grad_norm": 0.8621760047744049, | |
| "learning_rate": 8.676065625403733e-06, | |
| "loss": 0.3788, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6334470989761092, | |
| "grad_norm": 0.8388020553913726, | |
| "learning_rate": 8.64353553499741e-06, | |
| "loss": 0.3274, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6389078498293516, | |
| "grad_norm": 0.8591445453801212, | |
| "learning_rate": 8.610673223149036e-06, | |
| "loss": 0.3598, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6443686006825938, | |
| "grad_norm": 0.8057297251815362, | |
| "learning_rate": 8.577481686279123e-06, | |
| "loss": 0.3522, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6498293515358362, | |
| "grad_norm": 0.779914515334107, | |
| "learning_rate": 8.543963950827279e-06, | |
| "loss": 0.3416, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6552901023890785, | |
| "grad_norm": 0.8241347234199242, | |
| "learning_rate": 8.51012307297624e-06, | |
| "loss": 0.341, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6607508532423209, | |
| "grad_norm": 0.7674873201219691, | |
| "learning_rate": 8.475962138373212e-06, | |
| "loss": 0.3268, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6662116040955631, | |
| "grad_norm": 0.7983877124268901, | |
| "learning_rate": 8.441484261848514e-06, | |
| "loss": 0.3744, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6716723549488055, | |
| "grad_norm": 0.924444516956386, | |
| "learning_rate": 8.406692587131569e-06, | |
| "loss": 0.341, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6771331058020478, | |
| "grad_norm": 0.7648013324474998, | |
| "learning_rate": 8.371590286564247e-06, | |
| "loss": 0.3239, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6825938566552902, | |
| "grad_norm": 0.8275858574184091, | |
| "learning_rate": 8.336180560811619e-06, | |
| "loss": 0.3588, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6880546075085324, | |
| "grad_norm": 0.7874924257335151, | |
| "learning_rate": 8.30046663857011e-06, | |
| "loss": 0.3431, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6935153583617747, | |
| "grad_norm": 0.8745641415217126, | |
| "learning_rate": 8.264451776273104e-06, | |
| "loss": 0.3489, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6989761092150171, | |
| "grad_norm": 0.8858812955767805, | |
| "learning_rate": 8.228139257794012e-06, | |
| "loss": 0.3595, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7044368600682593, | |
| "grad_norm": 0.812177330348684, | |
| "learning_rate": 8.191532394146865e-06, | |
| "loss": 0.328, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7098976109215017, | |
| "grad_norm": 0.7755088132854933, | |
| "learning_rate": 8.154634523184389e-06, | |
| "loss": 0.3392, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.715358361774744, | |
| "grad_norm": 0.8715295143660003, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.3482, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7208191126279864, | |
| "grad_norm": 0.7737148258150855, | |
| "learning_rate": 8.07997924308938e-06, | |
| "loss": 0.3258, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7262798634812286, | |
| "grad_norm": 0.7616464397737633, | |
| "learning_rate": 8.042228641104622e-06, | |
| "loss": 0.3164, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.731740614334471, | |
| "grad_norm": 0.7706843428925245, | |
| "learning_rate": 8.004200645479403e-06, | |
| "loss": 0.3267, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7372013651877133, | |
| "grad_norm": 0.807087784184599, | |
| "learning_rate": 7.965898723646777e-06, | |
| "loss": 0.3556, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7426621160409557, | |
| "grad_norm": 0.8571787444948499, | |
| "learning_rate": 7.927326368016677e-06, | |
| "loss": 0.349, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7481228668941979, | |
| "grad_norm": 0.758611440956407, | |
| "learning_rate": 7.888487095657484e-06, | |
| "loss": 0.3301, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7535836177474403, | |
| "grad_norm": 0.7924167608304931, | |
| "learning_rate": 7.849384447975322e-06, | |
| "loss": 0.3534, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7590443686006826, | |
| "grad_norm": 0.8750460208633537, | |
| "learning_rate": 7.810021990391163e-06, | |
| "loss": 0.3405, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.764505119453925, | |
| "grad_norm": 0.7895037781717571, | |
| "learning_rate": 7.77040331201572e-06, | |
| "loss": 0.3678, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7699658703071672, | |
| "grad_norm": 0.8223142510608592, | |
| "learning_rate": 7.73053202532219e-06, | |
| "loss": 0.3469, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7754266211604095, | |
| "grad_norm": 0.8172036706667312, | |
| "learning_rate": 7.690411765816864e-06, | |
| "loss": 0.3395, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7808873720136519, | |
| "grad_norm": 0.7717521340469524, | |
| "learning_rate": 7.650046191707641e-06, | |
| "loss": 0.3352, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7863481228668942, | |
| "grad_norm": 0.8745843729938327, | |
| "learning_rate": 7.609438983570461e-06, | |
| "loss": 0.34, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7918088737201365, | |
| "grad_norm": 0.8462879664073518, | |
| "learning_rate": 7.5685938440137185e-06, | |
| "loss": 0.3434, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7972696245733788, | |
| "grad_norm": 0.8887194246240154, | |
| "learning_rate": 7.527514497340642e-06, | |
| "loss": 0.3536, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.8027303754266212, | |
| "grad_norm": 0.720734982965855, | |
| "learning_rate": 7.486204689209719e-06, | |
| "loss": 0.3071, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.8081911262798634, | |
| "grad_norm": 0.7852180915891143, | |
| "learning_rate": 7.444668186293153e-06, | |
| "loss": 0.3318, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8136518771331058, | |
| "grad_norm": 0.8169236397844766, | |
| "learning_rate": 7.402908775933419e-06, | |
| "loss": 0.3282, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8191126279863481, | |
| "grad_norm": 0.8409146266167947, | |
| "learning_rate": 7.360930265797934e-06, | |
| "loss": 0.3592, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8245733788395905, | |
| "grad_norm": 0.7893736430445095, | |
| "learning_rate": 7.318736483531861e-06, | |
| "loss": 0.3455, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8300341296928327, | |
| "grad_norm": 0.7092487578490618, | |
| "learning_rate": 7.2763312764091055e-06, | |
| "loss": 0.307, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8354948805460751, | |
| "grad_norm": 0.7643841671055314, | |
| "learning_rate": 7.23371851098152e-06, | |
| "loss": 0.3104, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8409556313993174, | |
| "grad_norm": 0.8743703462981528, | |
| "learning_rate": 7.190902072726336e-06, | |
| "loss": 0.3601, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8464163822525598, | |
| "grad_norm": 0.8748161240027253, | |
| "learning_rate": 7.147885865691899e-06, | |
| "loss": 0.3592, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.851877133105802, | |
| "grad_norm": 0.6528952892311825, | |
| "learning_rate": 7.104673812141676e-06, | |
| "loss": 0.2919, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8573378839590444, | |
| "grad_norm": 0.8161745547126792, | |
| "learning_rate": 7.061269852196633e-06, | |
| "loss": 0.345, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8627986348122867, | |
| "grad_norm": 0.8321903783865391, | |
| "learning_rate": 7.017677943475962e-06, | |
| "loss": 0.321, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.868259385665529, | |
| "grad_norm": 0.83313681444351, | |
| "learning_rate": 6.973902060736226e-06, | |
| "loss": 0.3435, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8737201365187713, | |
| "grad_norm": 0.7505151585539925, | |
| "learning_rate": 6.929946195508933e-06, | |
| "loss": 0.3163, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8791808873720136, | |
| "grad_norm": 0.7304802524364322, | |
| "learning_rate": 6.8858143557365865e-06, | |
| "loss": 0.328, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.884641638225256, | |
| "grad_norm": 0.8143420713928606, | |
| "learning_rate": 6.841510565407235e-06, | |
| "loss": 0.3341, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8901023890784983, | |
| "grad_norm": 0.7567204344075086, | |
| "learning_rate": 6.797038864187564e-06, | |
| "loss": 0.3059, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8955631399317406, | |
| "grad_norm": 0.7826567782778101, | |
| "learning_rate": 6.752403307054549e-06, | |
| "loss": 0.3283, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.9010238907849829, | |
| "grad_norm": 0.7886900433942758, | |
| "learning_rate": 6.707607963925725e-06, | |
| "loss": 0.3592, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9064846416382253, | |
| "grad_norm": 0.820709571232716, | |
| "learning_rate": 6.66265691928808e-06, | |
| "loss": 0.3605, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.9119453924914676, | |
| "grad_norm": 0.7681789982648866, | |
| "learning_rate": 6.617554271825636e-06, | |
| "loss": 0.3051, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9174061433447099, | |
| "grad_norm": 0.8006459558215293, | |
| "learning_rate": 6.5723041340457175e-06, | |
| "loss": 0.3542, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9228668941979522, | |
| "grad_norm": 0.7333102829214887, | |
| "learning_rate": 6.526910631903973e-06, | |
| "loss": 0.3254, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9283276450511946, | |
| "grad_norm": 0.7766899671870917, | |
| "learning_rate": 6.481377904428171e-06, | |
| "loss": 0.3297, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9337883959044369, | |
| "grad_norm": 0.8887532080533157, | |
| "learning_rate": 6.435710103340787e-06, | |
| "loss": 0.3531, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9392491467576792, | |
| "grad_norm": 0.7606421967689092, | |
| "learning_rate": 6.3899113926804565e-06, | |
| "loss": 0.3279, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9447098976109215, | |
| "grad_norm": 0.7894203946388427, | |
| "learning_rate": 6.3439859484222874e-06, | |
| "loss": 0.3206, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9501706484641638, | |
| "grad_norm": 0.8106143896629081, | |
| "learning_rate": 6.297937958097094e-06, | |
| "loss": 0.3185, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9556313993174061, | |
| "grad_norm": 0.7673331407317434, | |
| "learning_rate": 6.251771620409563e-06, | |
| "loss": 0.3408, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9610921501706484, | |
| "grad_norm": 0.7678720102410665, | |
| "learning_rate": 6.205491144855432e-06, | |
| "loss": 0.3388, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9665529010238908, | |
| "grad_norm": 0.8058357314804626, | |
| "learning_rate": 6.1591007513376425e-06, | |
| "loss": 0.348, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9720136518771331, | |
| "grad_norm": 0.7150290944167804, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 0.3187, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9774744027303754, | |
| "grad_norm": 0.7724885943742522, | |
| "learning_rate": 6.066007139749351e-06, | |
| "loss": 0.3112, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9829351535836177, | |
| "grad_norm": 0.7079636144073458, | |
| "learning_rate": 6.019312410053286e-06, | |
| "loss": 0.3115, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9883959044368601, | |
| "grad_norm": 0.7145124027416198, | |
| "learning_rate": 5.972524738368452e-06, | |
| "loss": 0.3015, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9938566552901024, | |
| "grad_norm": 0.7747190577463166, | |
| "learning_rate": 5.925648390844476e-06, | |
| "loss": 0.3405, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9993174061433447, | |
| "grad_norm": 0.7411495697672651, | |
| "learning_rate": 5.878687641716539e-06, | |
| "loss": 0.3241, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.004778156996587, | |
| "grad_norm": 2.2845026949908367, | |
| "learning_rate": 5.831646772915651e-06, | |
| "loss": 0.5887, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.0102389078498293, | |
| "grad_norm": 0.767067987662548, | |
| "learning_rate": 5.7845300736782205e-06, | |
| "loss": 0.2696, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0156996587030718, | |
| "grad_norm": 0.6444558204539116, | |
| "learning_rate": 5.7373418401549565e-06, | |
| "loss": 0.2179, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.021160409556314, | |
| "grad_norm": 0.6326875020427418, | |
| "learning_rate": 5.690086375019135e-06, | |
| "loss": 0.2063, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.0266211604095563, | |
| "grad_norm": 0.6386737207560813, | |
| "learning_rate": 5.642767987074288e-06, | |
| "loss": 0.2395, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.0320819112627986, | |
| "grad_norm": 0.7191360204333792, | |
| "learning_rate": 5.595390990861311e-06, | |
| "loss": 0.2593, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.0375426621160408, | |
| "grad_norm": 0.7139153218951271, | |
| "learning_rate": 5.547959706265068e-06, | |
| "loss": 0.25, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0430034129692833, | |
| "grad_norm": 0.7132174285440807, | |
| "learning_rate": 5.500478458120493e-06, | |
| "loss": 0.2656, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0484641638225256, | |
| "grad_norm": 0.7373698973683902, | |
| "learning_rate": 5.45295157581825e-06, | |
| "loss": 0.2643, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0539249146757679, | |
| "grad_norm": 0.6806301830473086, | |
| "learning_rate": 5.405383392909973e-06, | |
| "loss": 0.2521, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.0593856655290101, | |
| "grad_norm": 0.6785558579468979, | |
| "learning_rate": 5.357778246713131e-06, | |
| "loss": 0.254, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.0648464163822526, | |
| "grad_norm": 0.6701400051635917, | |
| "learning_rate": 5.310140477915544e-06, | |
| "loss": 0.2303, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.070307167235495, | |
| "grad_norm": 0.7408218245910705, | |
| "learning_rate": 5.262474430179597e-06, | |
| "loss": 0.2587, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.0757679180887372, | |
| "grad_norm": 0.652029496994987, | |
| "learning_rate": 5.2147844497461745e-06, | |
| "loss": 0.2201, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.0812286689419794, | |
| "grad_norm": 0.6145756306559864, | |
| "learning_rate": 5.1670748850383734e-06, | |
| "loss": 0.2131, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.086689419795222, | |
| "grad_norm": 0.6166424191446999, | |
| "learning_rate": 5.1193500862650045e-06, | |
| "loss": 0.2272, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0921501706484642, | |
| "grad_norm": 0.6183167944122974, | |
| "learning_rate": 5.071614405023938e-06, | |
| "loss": 0.2239, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0976109215017065, | |
| "grad_norm": 0.7073017446193981, | |
| "learning_rate": 5.023872193905316e-06, | |
| "loss": 0.2564, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.1030716723549487, | |
| "grad_norm": 0.6785368103283103, | |
| "learning_rate": 4.976127806094685e-06, | |
| "loss": 0.2598, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.108532423208191, | |
| "grad_norm": 0.6686099383276679, | |
| "learning_rate": 4.928385594976063e-06, | |
| "loss": 0.2391, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.1139931740614335, | |
| "grad_norm": 0.6046536649329635, | |
| "learning_rate": 4.880649913734996e-06, | |
| "loss": 0.2111, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.1194539249146758, | |
| "grad_norm": 0.6455972829075776, | |
| "learning_rate": 4.832925114961629e-06, | |
| "loss": 0.2291, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.124914675767918, | |
| "grad_norm": 0.6294601922178525, | |
| "learning_rate": 4.785215550253826e-06, | |
| "loss": 0.2237, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.1303754266211605, | |
| "grad_norm": 0.6539972986726327, | |
| "learning_rate": 4.737525569820405e-06, | |
| "loss": 0.2415, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.1358361774744028, | |
| "grad_norm": 0.6841776523547041, | |
| "learning_rate": 4.689859522084457e-06, | |
| "loss": 0.2573, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.141296928327645, | |
| "grad_norm": 0.708275852733329, | |
| "learning_rate": 4.64222175328687e-06, | |
| "loss": 0.2535, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1467576791808873, | |
| "grad_norm": 0.6327858698732379, | |
| "learning_rate": 4.594616607090028e-06, | |
| "loss": 0.2284, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1522184300341296, | |
| "grad_norm": 0.6408257648532151, | |
| "learning_rate": 4.547048424181751e-06, | |
| "loss": 0.2294, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.157679180887372, | |
| "grad_norm": 0.6159123552870842, | |
| "learning_rate": 4.499521541879508e-06, | |
| "loss": 0.2226, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.1631399317406144, | |
| "grad_norm": 0.5823300781202381, | |
| "learning_rate": 4.452040293734934e-06, | |
| "loss": 0.2108, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.1686006825938566, | |
| "grad_norm": 0.6041391928150867, | |
| "learning_rate": 4.40460900913869e-06, | |
| "loss": 0.2224, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.174061433447099, | |
| "grad_norm": 0.6641306892375002, | |
| "learning_rate": 4.357232012925714e-06, | |
| "loss": 0.2384, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.1795221843003414, | |
| "grad_norm": 0.6503207204016519, | |
| "learning_rate": 4.309913624980866e-06, | |
| "loss": 0.2347, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.1849829351535837, | |
| "grad_norm": 0.62805580635999, | |
| "learning_rate": 4.262658159845046e-06, | |
| "loss": 0.229, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.190443686006826, | |
| "grad_norm": 0.6275617918722145, | |
| "learning_rate": 4.2154699263217794e-06, | |
| "loss": 0.2286, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1959044368600682, | |
| "grad_norm": 0.7617460871871701, | |
| "learning_rate": 4.1683532270843505e-06, | |
| "loss": 0.2574, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.2013651877133107, | |
| "grad_norm": 0.6140110702778818, | |
| "learning_rate": 4.121312358283464e-06, | |
| "loss": 0.2149, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.206825938566553, | |
| "grad_norm": 0.6247259244040378, | |
| "learning_rate": 4.074351609155527e-06, | |
| "loss": 0.2381, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.2122866894197952, | |
| "grad_norm": 0.6591171660703798, | |
| "learning_rate": 4.0274752616315485e-06, | |
| "loss": 0.2344, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.2177474402730375, | |
| "grad_norm": 0.6436293629709356, | |
| "learning_rate": 3.980687589946715e-06, | |
| "loss": 0.2319, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.2232081911262798, | |
| "grad_norm": 0.7670288305294722, | |
| "learning_rate": 3.9339928602506505e-06, | |
| "loss": 0.2497, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2286689419795223, | |
| "grad_norm": 0.6962942427446093, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 0.2336, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.2341296928327645, | |
| "grad_norm": 0.6965676226191562, | |
| "learning_rate": 3.840899248662358e-06, | |
| "loss": 0.2552, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.2395904436860068, | |
| "grad_norm": 0.6641042493963545, | |
| "learning_rate": 3.7945088551445698e-06, | |
| "loss": 0.2563, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.245051194539249, | |
| "grad_norm": 0.6196955536234605, | |
| "learning_rate": 3.748228379590438e-06, | |
| "loss": 0.2291, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.2505119453924913, | |
| "grad_norm": 0.6278753799486634, | |
| "learning_rate": 3.7020620419029095e-06, | |
| "loss": 0.2141, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2559726962457338, | |
| "grad_norm": 0.6082644009007588, | |
| "learning_rate": 3.656014051577713e-06, | |
| "loss": 0.2122, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.261433447098976, | |
| "grad_norm": 0.6736359984841271, | |
| "learning_rate": 3.610088607319544e-06, | |
| "loss": 0.2367, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.2668941979522184, | |
| "grad_norm": 0.6329301932656438, | |
| "learning_rate": 3.5642898966592145e-06, | |
| "loss": 0.235, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2723549488054609, | |
| "grad_norm": 0.619963960271499, | |
| "learning_rate": 3.518622095571831e-06, | |
| "loss": 0.2208, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.2778156996587031, | |
| "grad_norm": 0.6693990598652739, | |
| "learning_rate": 3.4730893680960267e-06, | |
| "loss": 0.2406, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.2832764505119454, | |
| "grad_norm": 0.6864592317171182, | |
| "learning_rate": 3.4276958659542838e-06, | |
| "loss": 0.243, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.2887372013651877, | |
| "grad_norm": 0.7236681291816511, | |
| "learning_rate": 3.382445728174365e-06, | |
| "loss": 0.2586, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.29419795221843, | |
| "grad_norm": 0.6176752667693888, | |
| "learning_rate": 3.3373430807119212e-06, | |
| "loss": 0.2251, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.2996587030716724, | |
| "grad_norm": 0.7022262485772638, | |
| "learning_rate": 3.292392036074277e-06, | |
| "loss": 0.2316, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.3051194539249147, | |
| "grad_norm": 0.6404240889042992, | |
| "learning_rate": 3.2475966929454505e-06, | |
| "loss": 0.2384, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.310580204778157, | |
| "grad_norm": 0.7080127357360425, | |
| "learning_rate": 3.202961135812437e-06, | |
| "loss": 0.248, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.3160409556313994, | |
| "grad_norm": 0.668638034601711, | |
| "learning_rate": 3.1584894345927663e-06, | |
| "loss": 0.2212, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.3215017064846417, | |
| "grad_norm": 0.6729621818012642, | |
| "learning_rate": 3.114185644263415e-06, | |
| "loss": 0.222, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.326962457337884, | |
| "grad_norm": 0.6723316181938683, | |
| "learning_rate": 3.0700538044910684e-06, | |
| "loss": 0.2246, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.3324232081911263, | |
| "grad_norm": 0.6671061425745013, | |
| "learning_rate": 3.0260979392637753e-06, | |
| "loss": 0.2518, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.3378839590443685, | |
| "grad_norm": 0.5962051816320753, | |
| "learning_rate": 2.9823220565240396e-06, | |
| "loss": 0.2224, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.343344709897611, | |
| "grad_norm": 0.6511243444073086, | |
| "learning_rate": 2.9387301478033694e-06, | |
| "loss": 0.2521, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.3488054607508533, | |
| "grad_norm": 0.6112346949791394, | |
| "learning_rate": 2.8953261878583263e-06, | |
| "loss": 0.2164, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.3542662116040955, | |
| "grad_norm": 0.6844064518509092, | |
| "learning_rate": 2.852114134308104e-06, | |
| "loss": 0.2532, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3597269624573378, | |
| "grad_norm": 0.6383549765315465, | |
| "learning_rate": 2.8090979272736663e-06, | |
| "loss": 0.2401, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.36518771331058, | |
| "grad_norm": 0.5944587646093914, | |
| "learning_rate": 2.766281489018482e-06, | |
| "loss": 0.2293, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3706484641638226, | |
| "grad_norm": 0.6307687611402706, | |
| "learning_rate": 2.7236687235908953e-06, | |
| "loss": 0.2188, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.3761092150170648, | |
| "grad_norm": 0.6238745929037437, | |
| "learning_rate": 2.681263516468139e-06, | |
| "loss": 0.2475, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.3815699658703071, | |
| "grad_norm": 0.6373763499985442, | |
| "learning_rate": 2.6390697342020665e-06, | |
| "loss": 0.2343, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.3870307167235496, | |
| "grad_norm": 0.6734367954708225, | |
| "learning_rate": 2.5970912240665815e-06, | |
| "loss": 0.2353, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.3924914675767919, | |
| "grad_norm": 0.6582720475674197, | |
| "learning_rate": 2.5553318137068473e-06, | |
| "loss": 0.2474, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.3979522184300341, | |
| "grad_norm": 0.6052116695135601, | |
| "learning_rate": 2.5137953107902814e-06, | |
| "loss": 0.2322, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.4034129692832764, | |
| "grad_norm": 0.6552372504854818, | |
| "learning_rate": 2.472485502659358e-06, | |
| "loss": 0.2468, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.4088737201365187, | |
| "grad_norm": 0.6302233591025854, | |
| "learning_rate": 2.4314061559862836e-06, | |
| "loss": 0.2398, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.4143344709897612, | |
| "grad_norm": 0.654084080401935, | |
| "learning_rate": 2.3905610164295394e-06, | |
| "loss": 0.2329, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.4197952218430034, | |
| "grad_norm": 0.6485524489438387, | |
| "learning_rate": 2.3499538082923607e-06, | |
| "loss": 0.2446, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4252559726962457, | |
| "grad_norm": 0.6260148485778105, | |
| "learning_rate": 2.309588234183137e-06, | |
| "loss": 0.215, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.430716723549488, | |
| "grad_norm": 0.6165139801898837, | |
| "learning_rate": 2.2694679746778116e-06, | |
| "loss": 0.2235, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.4361774744027302, | |
| "grad_norm": 0.6073663452431178, | |
| "learning_rate": 2.22959668798428e-06, | |
| "loss": 0.21, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.4416382252559727, | |
| "grad_norm": 0.6687068934985456, | |
| "learning_rate": 2.1899780096088375e-06, | |
| "loss": 0.2609, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.447098976109215, | |
| "grad_norm": 0.5999286753849784, | |
| "learning_rate": 2.1506155520246795e-06, | |
| "loss": 0.2275, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4525597269624573, | |
| "grad_norm": 0.6562378405208374, | |
| "learning_rate": 2.1115129043425188e-06, | |
| "loss": 0.2577, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.4580204778156998, | |
| "grad_norm": 0.6564390690276799, | |
| "learning_rate": 2.072673631983323e-06, | |
| "loss": 0.2583, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.463481228668942, | |
| "grad_norm": 0.6175405413313049, | |
| "learning_rate": 2.0341012763532243e-06, | |
| "loss": 0.2252, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4689419795221843, | |
| "grad_norm": 0.6294576454771881, | |
| "learning_rate": 1.995799354520598e-06, | |
| "loss": 0.2282, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.4744027303754266, | |
| "grad_norm": 0.6491408881404807, | |
| "learning_rate": 1.9577713588953797e-06, | |
| "loss": 0.2204, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.4798634812286688, | |
| "grad_norm": 0.626492440911862, | |
| "learning_rate": 1.9200207569106216e-06, | |
| "loss": 0.2363, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.4853242320819113, | |
| "grad_norm": 0.6328542452711655, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 0.2312, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.4907849829351536, | |
| "grad_norm": 0.6502462448470019, | |
| "learning_rate": 1.8453654768156138e-06, | |
| "loss": 0.2512, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.4962457337883959, | |
| "grad_norm": 0.6012861830171234, | |
| "learning_rate": 1.8084676058531376e-06, | |
| "loss": 0.2285, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.5017064846416384, | |
| "grad_norm": 0.6834276368175269, | |
| "learning_rate": 1.771860742205988e-06, | |
| "loss": 0.2512, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.5071672354948804, | |
| "grad_norm": 0.6528553917231606, | |
| "learning_rate": 1.7355482237268983e-06, | |
| "loss": 0.2382, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.512627986348123, | |
| "grad_norm": 0.5819350883243867, | |
| "learning_rate": 1.6995333614298908e-06, | |
| "loss": 0.2097, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.5180887372013652, | |
| "grad_norm": 0.6231114790212122, | |
| "learning_rate": 1.6638194391883822e-06, | |
| "loss": 0.2352, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.5235494880546074, | |
| "grad_norm": 0.6082248229800555, | |
| "learning_rate": 1.6284097134357535e-06, | |
| "loss": 0.2241, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.52901023890785, | |
| "grad_norm": 0.5824479788396233, | |
| "learning_rate": 1.5933074128684333e-06, | |
| "loss": 0.2149, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.5344709897610922, | |
| "grad_norm": 0.6414988574509947, | |
| "learning_rate": 1.5585157381514875e-06, | |
| "loss": 0.2629, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.5399317406143345, | |
| "grad_norm": 0.6281763626708758, | |
| "learning_rate": 1.5240378616267887e-06, | |
| "loss": 0.2213, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.545392491467577, | |
| "grad_norm": 0.6342381052621304, | |
| "learning_rate": 1.4898769270237611e-06, | |
| "loss": 0.2469, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.550853242320819, | |
| "grad_norm": 0.63348306460088, | |
| "learning_rate": 1.4560360491727233e-06, | |
| "loss": 0.2369, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5563139931740615, | |
| "grad_norm": 0.6211597914243564, | |
| "learning_rate": 1.4225183137208775e-06, | |
| "loss": 0.2464, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.5617747440273038, | |
| "grad_norm": 0.6287566376737247, | |
| "learning_rate": 1.389326776850966e-06, | |
| "loss": 0.2378, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.567235494880546, | |
| "grad_norm": 0.6155227503256832, | |
| "learning_rate": 1.3564644650025894e-06, | |
| "loss": 0.2501, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.5726962457337885, | |
| "grad_norm": 0.6705552802557545, | |
| "learning_rate": 1.323934374596268e-06, | |
| "loss": 0.2642, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.5781569965870306, | |
| "grad_norm": 0.6468155028186762, | |
| "learning_rate": 1.2917394717602123e-06, | |
| "loss": 0.2391, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.583617747440273, | |
| "grad_norm": 0.6059123207863814, | |
| "learning_rate": 1.2598826920598773e-06, | |
| "loss": 0.2471, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.5890784982935153, | |
| "grad_norm": 0.622906276078447, | |
| "learning_rate": 1.2283669402302878e-06, | |
| "loss": 0.2441, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.5945392491467576, | |
| "grad_norm": 0.6123734431964898, | |
| "learning_rate": 1.197195089911191e-06, | |
| "loss": 0.2359, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.6173696097759185, | |
| "learning_rate": 1.166369983385024e-06, | |
| "loss": 0.2302, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.6054607508532424, | |
| "grad_norm": 0.6309581460125253, | |
| "learning_rate": 1.1358944313177566e-06, | |
| "loss": 0.2312, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.6109215017064846, | |
| "grad_norm": 0.6390016010862638, | |
| "learning_rate": 1.1057712125026116e-06, | |
| "loss": 0.2442, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.6163822525597271, | |
| "grad_norm": 0.6276135127186313, | |
| "learning_rate": 1.0760030736066952e-06, | |
| "loss": 0.2297, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.6218430034129692, | |
| "grad_norm": 0.612896281499447, | |
| "learning_rate": 1.0465927289205452e-06, | |
| "loss": 0.2346, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.6273037542662117, | |
| "grad_norm": 0.5859909784104106, | |
| "learning_rate": 1.0175428601106441e-06, | |
| "loss": 0.2119, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.632764505119454, | |
| "grad_norm": 0.6209786013367464, | |
| "learning_rate": 9.888561159748995e-07, | |
| "loss": 0.2432, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.6382252559726962, | |
| "grad_norm": 0.6352233419147338, | |
| "learning_rate": 9.605351122011308e-07, | |
| "loss": 0.2392, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6436860068259387, | |
| "grad_norm": 0.5719749997080835, | |
| "learning_rate": 9.325824311285564e-07, | |
| "loss": 0.2173, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.6491467576791807, | |
| "grad_norm": 0.6657684609315078, | |
| "learning_rate": 9.050006215123419e-07, | |
| "loss": 0.2606, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.6546075085324232, | |
| "grad_norm": 0.5728781873883457, | |
| "learning_rate": 8.777921982911996e-07, | |
| "loss": 0.2214, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.6600682593856655, | |
| "grad_norm": 0.6162844560531199, | |
| "learning_rate": 8.509596423580712e-07, | |
| "loss": 0.2464, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.6655290102389078, | |
| "grad_norm": 0.5952374213138941, | |
| "learning_rate": 8.245054003339247e-07, | |
| "loss": 0.226, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.6709897610921502, | |
| "grad_norm": 0.5961891699890017, | |
| "learning_rate": 7.984318843446593e-07, | |
| "loss": 0.2221, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.6764505119453925, | |
| "grad_norm": 0.5937715818201527, | |
| "learning_rate": 7.727414718011706e-07, | |
| "loss": 0.2117, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.6819112627986348, | |
| "grad_norm": 0.6853681886295775, | |
| "learning_rate": 7.474365051825749e-07, | |
| "loss": 0.257, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.6873720136518773, | |
| "grad_norm": 0.6359287265633253, | |
| "learning_rate": 7.225192918226215e-07, | |
| "loss": 0.2395, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.6928327645051193, | |
| "grad_norm": 0.5963026735275871, | |
| "learning_rate": 6.979921036993042e-07, | |
| "loss": 0.2233, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6982935153583618, | |
| "grad_norm": 0.6319949149208983, | |
| "learning_rate": 6.738571772276997e-07, | |
| "loss": 0.2416, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.703754266211604, | |
| "grad_norm": 0.6203658184125409, | |
| "learning_rate": 6.501167130560515e-07, | |
| "loss": 0.2283, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.7092150170648464, | |
| "grad_norm": 0.5938364868062161, | |
| "learning_rate": 6.267728758651131e-07, | |
| "loss": 0.2302, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.7146757679180888, | |
| "grad_norm": 0.6087142064405593, | |
| "learning_rate": 6.038277941707671e-07, | |
| "loss": 0.2039, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.7201365187713311, | |
| "grad_norm": 0.6401443657158584, | |
| "learning_rate": 5.812835601299438e-07, | |
| "loss": 0.254, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.7255972696245734, | |
| "grad_norm": 0.5597403672115617, | |
| "learning_rate": 5.591422293498633e-07, | |
| "loss": 0.2074, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.7310580204778157, | |
| "grad_norm": 0.6510913944426799, | |
| "learning_rate": 5.374058207005945e-07, | |
| "loss": 0.242, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.736518771331058, | |
| "grad_norm": 0.5851164556251767, | |
| "learning_rate": 5.160763161309768e-07, | |
| "loss": 0.2208, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.7419795221843004, | |
| "grad_norm": 0.5863228147924701, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 0.2167, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.7474402730375427, | |
| "grad_norm": 0.6434185214532853, | |
| "learning_rate": 4.7464576133899043e-07, | |
| "loss": 0.2208, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.752901023890785, | |
| "grad_norm": 0.6536708886817881, | |
| "learning_rate": 4.545484887986368e-07, | |
| "loss": 0.2558, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.7583617747440274, | |
| "grad_norm": 0.5913503349988765, | |
| "learning_rate": 4.348656753575092e-07, | |
| "loss": 0.2412, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.7638225255972695, | |
| "grad_norm": 0.6056503972371652, | |
| "learning_rate": 4.1559911571545544e-07, | |
| "loss": 0.2302, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.769283276450512, | |
| "grad_norm": 0.6287557474105558, | |
| "learning_rate": 3.9675056661785563e-07, | |
| "loss": 0.2184, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.7747440273037542, | |
| "grad_norm": 0.6387002743522794, | |
| "learning_rate": 3.783217466954503e-07, | |
| "loss": 0.2302, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.7802047781569965, | |
| "grad_norm": 0.5919282825342829, | |
| "learning_rate": 3.603143363076217e-07, | |
| "loss": 0.2155, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.785665529010239, | |
| "grad_norm": 0.6473381627525607, | |
| "learning_rate": 3.427299773891868e-07, | |
| "loss": 0.2661, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.7911262798634813, | |
| "grad_norm": 0.5931604711623788, | |
| "learning_rate": 3.255702733006766e-07, | |
| "loss": 0.2338, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.7965870307167235, | |
| "grad_norm": 0.6129008231127687, | |
| "learning_rate": 3.088367886821481e-07, | |
| "loss": 0.2514, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.802047781569966, | |
| "grad_norm": 0.6062339345391793, | |
| "learning_rate": 2.925310493105099e-07, | |
| "loss": 0.208, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.807508532423208, | |
| "grad_norm": 0.640691825777882, | |
| "learning_rate": 2.7665454196040665e-07, | |
| "loss": 0.2568, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.8129692832764506, | |
| "grad_norm": 0.6228877907424235, | |
| "learning_rate": 2.6120871426864866e-07, | |
| "loss": 0.2445, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.8184300341296928, | |
| "grad_norm": 0.6109720444847905, | |
| "learning_rate": 2.4619497460222184e-07, | |
| "loss": 0.2408, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.823890784982935, | |
| "grad_norm": 0.6345094654832795, | |
| "learning_rate": 2.316146919298623e-07, | |
| "loss": 0.221, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.8293515358361776, | |
| "grad_norm": 0.5612296424074616, | |
| "learning_rate": 2.1746919569723858e-07, | |
| "loss": 0.2137, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.8348122866894196, | |
| "grad_norm": 0.5831279872950338, | |
| "learning_rate": 2.037597757057297e-07, | |
| "loss": 0.2178, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.8402730375426621, | |
| "grad_norm": 0.639477342500686, | |
| "learning_rate": 1.9048768199481983e-07, | |
| "loss": 0.2417, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.8457337883959044, | |
| "grad_norm": 0.5861730216308635, | |
| "learning_rate": 1.776541247281177e-07, | |
| "loss": 0.229, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.8511945392491467, | |
| "grad_norm": 0.6019956045442054, | |
| "learning_rate": 1.6526027408301227e-07, | |
| "loss": 0.2212, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.8566552901023892, | |
| "grad_norm": 0.59132952240458, | |
| "learning_rate": 1.5330726014397668e-07, | |
| "loss": 0.2301, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8621160409556314, | |
| "grad_norm": 0.6078005334703832, | |
| "learning_rate": 1.417961727995254e-07, | |
| "loss": 0.2239, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.8675767918088737, | |
| "grad_norm": 0.585630251596369, | |
| "learning_rate": 1.307280616428336e-07, | |
| "loss": 0.2093, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.8730375426621162, | |
| "grad_norm": 0.6003005675220531, | |
| "learning_rate": 1.2010393587603975e-07, | |
| "loss": 0.2558, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.8784982935153582, | |
| "grad_norm": 0.6007862550074166, | |
| "learning_rate": 1.0992476421822052e-07, | |
| "loss": 0.2217, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.8839590443686007, | |
| "grad_norm": 0.5798264754441842, | |
| "learning_rate": 1.0019147481706626e-07, | |
| "loss": 0.2, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.889419795221843, | |
| "grad_norm": 0.5801152259959127, | |
| "learning_rate": 9.090495516424713e-08, | |
| "loss": 0.2219, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.8948805460750853, | |
| "grad_norm": 0.5923845690395708, | |
| "learning_rate": 8.206605201449447e-08, | |
| "loss": 0.2029, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.9003412969283278, | |
| "grad_norm": 0.5987478859251119, | |
| "learning_rate": 7.367557130838921e-08, | |
| "loss": 0.2256, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.9058020477815698, | |
| "grad_norm": 0.5842539453063824, | |
| "learning_rate": 6.573427809888067e-08, | |
| "loss": 0.2003, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.9112627986348123, | |
| "grad_norm": 0.6351293922330473, | |
| "learning_rate": 5.824289648152126e-08, | |
| "loss": 0.2395, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.9167235494880546, | |
| "grad_norm": 0.6674020151826169, | |
| "learning_rate": 5.120210952844873e-08, | |
| "loss": 0.271, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.9221843003412968, | |
| "grad_norm": 0.5675294646121625, | |
| "learning_rate": 4.461255922609986e-08, | |
| "loss": 0.2172, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.9276450511945393, | |
| "grad_norm": 0.6285436049388992, | |
| "learning_rate": 3.8474846416672874e-08, | |
| "loss": 0.2399, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.9331058020477816, | |
| "grad_norm": 0.6175868696026672, | |
| "learning_rate": 3.278953074334512e-08, | |
| "loss": 0.2212, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.9385665529010239, | |
| "grad_norm": 0.6190764065365002, | |
| "learning_rate": 2.75571305992417e-08, | |
| "loss": 0.2374, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.9440273037542664, | |
| "grad_norm": 0.6366754795576353, | |
| "learning_rate": 2.2778123080167136e-08, | |
| "loss": 0.2317, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.9494880546075084, | |
| "grad_norm": 0.6088811286257059, | |
| "learning_rate": 1.845294394110686e-08, | |
| "loss": 0.2161, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.954948805460751, | |
| "grad_norm": 0.5983150959140915, | |
| "learning_rate": 1.4581987556490095e-08, | |
| "loss": 0.2273, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.9604095563139932, | |
| "grad_norm": 0.6172951456789394, | |
| "learning_rate": 1.1165606884234182e-08, | |
| "loss": 0.2514, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.9658703071672354, | |
| "grad_norm": 0.6110206348502005, | |
| "learning_rate": 8.204113433559202e-09, | |
| "loss": 0.2248, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.971331058020478, | |
| "grad_norm": 0.5973553403473014, | |
| "learning_rate": 5.6977772365857105e-09, | |
| "loss": 0.2415, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.9767918088737202, | |
| "grad_norm": 0.6109000206055947, | |
| "learning_rate": 3.6468268237105364e-09, | |
| "loss": 0.2319, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.9822525597269625, | |
| "grad_norm": 0.6357125881065677, | |
| "learning_rate": 2.0514492027728928e-09, | |
| "loss": 0.2457, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.9877133105802047, | |
| "grad_norm": 0.669166818877388, | |
| "learning_rate": 9.117898419991333e-10, | |
| "loss": 0.2574, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.993174061433447, | |
| "grad_norm": 0.6418127603308712, | |
| "learning_rate": 2.2795265674113721e-10, | |
| "loss": 0.2499, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.9986348122866895, | |
| "grad_norm": 0.6075592770961785, | |
| "learning_rate": 0.0, | |
| "loss": 0.2179, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.9986348122866895, | |
| "step": 366, | |
| "total_flos": 86796684656640.0, | |
| "train_loss": 0.2950779539965541, | |
| "train_runtime": 2999.324, | |
| "train_samples_per_second": 15.628, | |
| "train_steps_per_second": 0.122 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 366, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 86796684656640.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |