{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0709233061963033, "eval_steps": 0, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7730826549075824e-06, "grad_norm": 25.625, "learning_rate": 0.0, "loss": 3.8026, "step": 1 }, { "epoch": 3.546165309815165e-06, "grad_norm": 26.5, "learning_rate": 2e-06, "loss": 4.5357, "step": 2 }, { "epoch": 7.09233061963033e-06, "grad_norm": 21.875, "learning_rate": 6e-06, "loss": 4.3725, "step": 4 }, { "epoch": 1.0638495929445495e-05, "grad_norm": 23.5, "learning_rate": 1e-05, "loss": 4.4881, "step": 6 }, { "epoch": 1.418466123926066e-05, "grad_norm": 18.875, "learning_rate": 1.4e-05, "loss": 4.6479, "step": 8 }, { "epoch": 1.7730826549075824e-05, "grad_norm": 18.375, "learning_rate": 1.8e-05, "loss": 4.6278, "step": 10 }, { "epoch": 2.127699185889099e-05, "grad_norm": 17.75, "learning_rate": 2.2e-05, "loss": 4.3834, "step": 12 }, { "epoch": 2.4823157168706156e-05, "grad_norm": 26.75, "learning_rate": 2.6e-05, "loss": 4.3062, "step": 14 }, { "epoch": 2.836932247852132e-05, "grad_norm": 19.625, "learning_rate": 3e-05, "loss": 4.1917, "step": 16 }, { "epoch": 3.1915487788336485e-05, "grad_norm": 20.625, "learning_rate": 3.4000000000000007e-05, "loss": 3.8393, "step": 18 }, { "epoch": 3.546165309815165e-05, "grad_norm": 16.75, "learning_rate": 3.8e-05, "loss": 4.0825, "step": 20 }, { "epoch": 3.900781840796682e-05, "grad_norm": 16.375, "learning_rate": 4.2000000000000004e-05, "loss": 3.4053, "step": 22 }, { "epoch": 4.255398371778198e-05, "grad_norm": 22.75, "learning_rate": 4.6e-05, "loss": 4.1534, "step": 24 }, { "epoch": 4.610014902759714e-05, "grad_norm": 11.5, "learning_rate": 5e-05, "loss": 3.4799, "step": 26 }, { "epoch": 4.964631433741231e-05, "grad_norm": 10.75, "learning_rate": 5.4e-05, "loss": 2.9566, "step": 28 }, { "epoch": 5.3192479647227476e-05, "grad_norm": 9.6875, "learning_rate": 5.800000000000001e-05, "loss": 3.3893, "step": 30 }, { "epoch": 5.673864495704264e-05, "grad_norm": 14.0, "learning_rate": 6.2e-05, "loss": 3.7127, "step": 32 }, { "epoch": 6.028481026685781e-05, "grad_norm": 7.21875, "learning_rate": 6.6e-05, "loss": 2.708, "step": 34 }, { "epoch": 6.383097557667297e-05, "grad_norm": 7.75, "learning_rate": 7.000000000000001e-05, "loss": 3.3579, "step": 36 }, { "epoch": 6.737714088648813e-05, "grad_norm": 5.8125, "learning_rate": 7.4e-05, "loss": 2.5665, "step": 38 }, { "epoch": 7.09233061963033e-05, "grad_norm": 7.59375, "learning_rate": 7.8e-05, "loss": 2.7873, "step": 40 }, { "epoch": 7.446947150611846e-05, "grad_norm": 6.78125, "learning_rate": 8.2e-05, "loss": 2.4843, "step": 42 }, { "epoch": 7.801563681593364e-05, "grad_norm": 6.28125, "learning_rate": 8.599999999999999e-05, "loss": 3.0413, "step": 44 }, { "epoch": 8.15618021257488e-05, "grad_norm": 4.46875, "learning_rate": 8.999999999999999e-05, "loss": 2.2008, "step": 46 }, { "epoch": 8.510796743556396e-05, "grad_norm": 4.5, "learning_rate": 9.400000000000001e-05, "loss": 2.182, "step": 48 }, { "epoch": 8.865413274537912e-05, "grad_norm": 3.421875, "learning_rate": 9.800000000000001e-05, "loss": 2.0904, "step": 50 }, { "epoch": 9.220029805519429e-05, "grad_norm": 4.84375, "learning_rate": 0.000102, "loss": 2.1894, "step": 52 }, { "epoch": 9.574646336500945e-05, "grad_norm": 4.8125, "learning_rate": 0.000106, "loss": 2.4153, "step": 54 }, { "epoch": 9.929262867482463e-05, "grad_norm": 3.75, "learning_rate": 0.00011, "loss": 2.2583, "step": 56 }, { "epoch": 0.00010283879398463979, "grad_norm": 3.0625, "learning_rate": 0.000114, "loss": 2.08, "step": 58 }, { "epoch": 0.00010638495929445495, "grad_norm": 3.125, "learning_rate": 0.000118, "loss": 2.6455, "step": 60 }, { "epoch": 0.00010993112460427011, "grad_norm": 5.59375, "learning_rate": 0.000122, "loss": 2.1495, "step": 62 }, { "epoch": 0.00011347728991408528, "grad_norm": 4.46875, "learning_rate": 0.000126, "loss": 1.957, "step": 64 }, { "epoch": 0.00011702345522390044, "grad_norm": 4.53125, "learning_rate": 0.00013000000000000002, "loss": 2.142, "step": 66 }, { "epoch": 0.00012056962053371562, "grad_norm": 4.40625, "learning_rate": 0.000134, "loss": 2.088, "step": 68 }, { "epoch": 0.00012411578584353076, "grad_norm": 5.34375, "learning_rate": 0.00013800000000000002, "loss": 2.3496, "step": 70 }, { "epoch": 0.00012766195115334594, "grad_norm": 5.46875, "learning_rate": 0.00014199999999999998, "loss": 2.3067, "step": 72 }, { "epoch": 0.00013120811646316112, "grad_norm": 3.265625, "learning_rate": 0.000146, "loss": 1.7205, "step": 74 }, { "epoch": 0.00013475428177297627, "grad_norm": 3.53125, "learning_rate": 0.00015, "loss": 1.9972, "step": 76 }, { "epoch": 0.00013830044708279144, "grad_norm": 3.53125, "learning_rate": 0.000154, "loss": 1.8041, "step": 78 }, { "epoch": 0.0001418466123926066, "grad_norm": 2.65625, "learning_rate": 0.000158, "loss": 2.2117, "step": 80 }, { "epoch": 0.00014539277770242177, "grad_norm": 4.4375, "learning_rate": 0.000162, "loss": 2.1783, "step": 82 }, { "epoch": 0.00014893894301223692, "grad_norm": 2.140625, "learning_rate": 0.00016600000000000002, "loss": 1.9516, "step": 84 }, { "epoch": 0.0001524851083220521, "grad_norm": 4.3125, "learning_rate": 0.00017, "loss": 2.0596, "step": 86 }, { "epoch": 0.00015603127363186727, "grad_norm": 2.484375, "learning_rate": 0.000174, "loss": 1.9511, "step": 88 }, { "epoch": 0.00015957743894168242, "grad_norm": 2.171875, "learning_rate": 0.000178, "loss": 1.9226, "step": 90 }, { "epoch": 0.0001631236042514976, "grad_norm": 6.15625, "learning_rate": 0.000182, "loss": 2.1279, "step": 92 }, { "epoch": 0.00016666976956131275, "grad_norm": 2.109375, "learning_rate": 0.000186, "loss": 1.6153, "step": 94 }, { "epoch": 0.00017021593487112792, "grad_norm": 3.265625, "learning_rate": 0.00019, "loss": 2.2568, "step": 96 }, { "epoch": 0.0001737621001809431, "grad_norm": 3.0625, "learning_rate": 0.000194, "loss": 1.8394, "step": 98 }, { "epoch": 0.00017730826549075825, "grad_norm": 4.0, "learning_rate": 0.00019800000000000002, "loss": 1.9269, "step": 100 }, { "epoch": 0.00018085443080057342, "grad_norm": 2.328125, "learning_rate": 0.000202, "loss": 1.8469, "step": 102 }, { "epoch": 0.00018440059611038857, "grad_norm": 3.6875, "learning_rate": 0.000206, "loss": 2.1011, "step": 104 }, { "epoch": 0.00018794676142020375, "grad_norm": 8.375, "learning_rate": 0.00021, "loss": 1.9144, "step": 106 }, { "epoch": 0.0001914929267300189, "grad_norm": 2.734375, "learning_rate": 0.000214, "loss": 1.855, "step": 108 }, { "epoch": 0.00019503909203983407, "grad_norm": 1.453125, "learning_rate": 0.000218, "loss": 1.5246, "step": 110 }, { "epoch": 0.00019858525734964925, "grad_norm": 2.71875, "learning_rate": 0.000222, "loss": 1.6882, "step": 112 }, { "epoch": 0.0002021314226594644, "grad_norm": 2.125, "learning_rate": 0.00022600000000000002, "loss": 1.9064, "step": 114 }, { "epoch": 0.00020567758796927958, "grad_norm": 3.53125, "learning_rate": 0.00023, "loss": 1.932, "step": 116 }, { "epoch": 0.00020922375327909473, "grad_norm": 5.28125, "learning_rate": 0.00023400000000000002, "loss": 1.8807, "step": 118 }, { "epoch": 0.0002127699185889099, "grad_norm": 3.015625, "learning_rate": 0.00023799999999999998, "loss": 1.8285, "step": 120 }, { "epoch": 0.00021631608389872508, "grad_norm": 3.0, "learning_rate": 0.000242, "loss": 1.7112, "step": 122 }, { "epoch": 0.00021986224920854023, "grad_norm": 3.390625, "learning_rate": 0.000246, "loss": 1.8058, "step": 124 }, { "epoch": 0.0002234084145183554, "grad_norm": 1.875, "learning_rate": 0.00025, "loss": 1.7402, "step": 126 }, { "epoch": 0.00022695457982817055, "grad_norm": 1.953125, "learning_rate": 0.000254, "loss": 1.7423, "step": 128 }, { "epoch": 0.00023050074513798573, "grad_norm": 1.671875, "learning_rate": 0.00025800000000000004, "loss": 1.9259, "step": 130 }, { "epoch": 0.00023404691044780088, "grad_norm": 1.6796875, "learning_rate": 0.000262, "loss": 1.9277, "step": 132 }, { "epoch": 0.00023759307575761606, "grad_norm": 2.125, "learning_rate": 0.000266, "loss": 2.1548, "step": 134 }, { "epoch": 0.00024113924106743123, "grad_norm": 3.796875, "learning_rate": 0.00027, "loss": 2.0847, "step": 136 }, { "epoch": 0.0002446854063772464, "grad_norm": 3.546875, "learning_rate": 0.00027400000000000005, "loss": 1.9213, "step": 138 }, { "epoch": 0.00024823157168706153, "grad_norm": 1.6796875, "learning_rate": 0.00027800000000000004, "loss": 1.9205, "step": 140 }, { "epoch": 0.00025177773699687673, "grad_norm": 1.765625, "learning_rate": 0.00028199999999999997, "loss": 1.9867, "step": 142 }, { "epoch": 0.0002553239023066919, "grad_norm": 0.8203125, "learning_rate": 0.00028599999999999996, "loss": 1.5982, "step": 144 }, { "epoch": 0.00025887006761650703, "grad_norm": 2.5625, "learning_rate": 0.00029, "loss": 1.7819, "step": 146 }, { "epoch": 0.00026241623292632224, "grad_norm": 1.5234375, "learning_rate": 0.000294, "loss": 1.8248, "step": 148 }, { "epoch": 0.0002659623982361374, "grad_norm": 2.671875, "learning_rate": 0.000298, "loss": 1.8077, "step": 150 }, { "epoch": 0.00026950856354595253, "grad_norm": 1.140625, "learning_rate": 0.000302, "loss": 1.6061, "step": 152 }, { "epoch": 0.0002730547288557677, "grad_norm": 1.5078125, "learning_rate": 0.000306, "loss": 1.865, "step": 154 }, { "epoch": 0.0002766008941655829, "grad_norm": 1.4765625, "learning_rate": 0.00031, "loss": 1.7381, "step": 156 }, { "epoch": 0.00028014705947539804, "grad_norm": 1.109375, "learning_rate": 0.000314, "loss": 1.5453, "step": 158 }, { "epoch": 0.0002836932247852132, "grad_norm": 2.171875, "learning_rate": 0.00031800000000000003, "loss": 1.7776, "step": 160 }, { "epoch": 0.0002872393900950284, "grad_norm": 1.7109375, "learning_rate": 0.000322, "loss": 1.5954, "step": 162 }, { "epoch": 0.00029078555540484354, "grad_norm": 3.59375, "learning_rate": 0.000326, "loss": 1.7511, "step": 164 }, { "epoch": 0.0002943317207146587, "grad_norm": 1.4453125, "learning_rate": 0.00033, "loss": 1.5288, "step": 166 }, { "epoch": 0.00029787788602447384, "grad_norm": 1.40625, "learning_rate": 0.00033400000000000004, "loss": 1.5858, "step": 168 }, { "epoch": 0.00030142405133428904, "grad_norm": 2.015625, "learning_rate": 0.00033800000000000003, "loss": 1.4616, "step": 170 }, { "epoch": 0.0003049702166441042, "grad_norm": 3.53125, "learning_rate": 0.000342, "loss": 1.8694, "step": 172 }, { "epoch": 0.00030851638195391934, "grad_norm": 2.75, "learning_rate": 0.000346, "loss": 1.6349, "step": 174 }, { "epoch": 0.00031206254726373454, "grad_norm": 1.328125, "learning_rate": 0.00035, "loss": 1.5103, "step": 176 }, { "epoch": 0.0003156087125735497, "grad_norm": 1.78125, "learning_rate": 0.000354, "loss": 1.5582, "step": 178 }, { "epoch": 0.00031915487788336484, "grad_norm": 1.015625, "learning_rate": 0.000358, "loss": 1.7065, "step": 180 }, { "epoch": 0.00032270104319318, "grad_norm": 0.890625, "learning_rate": 0.000362, "loss": 1.5301, "step": 182 }, { "epoch": 0.0003262472085029952, "grad_norm": 1.828125, "learning_rate": 0.000366, "loss": 1.5613, "step": 184 }, { "epoch": 0.00032979337381281034, "grad_norm": 3.828125, "learning_rate": 0.00037, "loss": 1.9838, "step": 186 }, { "epoch": 0.0003333395391226255, "grad_norm": 4.875, "learning_rate": 0.000374, "loss": 2.3287, "step": 188 }, { "epoch": 0.0003368857044324407, "grad_norm": 0.9375, "learning_rate": 0.000378, "loss": 1.7776, "step": 190 }, { "epoch": 0.00034043186974225584, "grad_norm": 1.6484375, "learning_rate": 0.000382, "loss": 1.6625, "step": 192 }, { "epoch": 0.000343978035052071, "grad_norm": 0.64453125, "learning_rate": 0.000386, "loss": 1.398, "step": 194 }, { "epoch": 0.0003475242003618862, "grad_norm": 1.8125, "learning_rate": 0.00039000000000000005, "loss": 1.7848, "step": 196 }, { "epoch": 0.00035107036567170135, "grad_norm": 1.390625, "learning_rate": 0.00039400000000000004, "loss": 1.8591, "step": 198 }, { "epoch": 0.0003546165309815165, "grad_norm": 0.8203125, "learning_rate": 0.000398, "loss": 1.3619, "step": 200 }, { "epoch": 0.00035816269629133164, "grad_norm": 1.3203125, "learning_rate": 0.000402, "loss": 1.9058, "step": 202 }, { "epoch": 0.00036170886160114685, "grad_norm": 0.74609375, "learning_rate": 0.00040600000000000006, "loss": 1.4741, "step": 204 }, { "epoch": 0.000365255026910962, "grad_norm": 0.71484375, "learning_rate": 0.00041, "loss": 1.5142, "step": 206 }, { "epoch": 0.00036880119222077715, "grad_norm": 2.171875, "learning_rate": 0.000414, "loss": 1.861, "step": 208 }, { "epoch": 0.00037234735753059235, "grad_norm": 0.82421875, "learning_rate": 0.00041799999999999997, "loss": 1.3935, "step": 210 }, { "epoch": 0.0003758935228404075, "grad_norm": 0.94140625, "learning_rate": 0.000422, "loss": 1.3792, "step": 212 }, { "epoch": 0.00037943968815022265, "grad_norm": 1.5390625, "learning_rate": 0.000426, "loss": 1.7609, "step": 214 }, { "epoch": 0.0003829858534600378, "grad_norm": 0.83203125, "learning_rate": 0.00043, "loss": 1.3885, "step": 216 }, { "epoch": 0.000386532018769853, "grad_norm": 1.5, "learning_rate": 0.00043400000000000003, "loss": 1.4444, "step": 218 }, { "epoch": 0.00039007818407966815, "grad_norm": 1.59375, "learning_rate": 0.000438, "loss": 1.3774, "step": 220 }, { "epoch": 0.0003936243493894833, "grad_norm": 0.85546875, "learning_rate": 0.000442, "loss": 1.698, "step": 222 }, { "epoch": 0.0003971705146992985, "grad_norm": 0.96875, "learning_rate": 0.000446, "loss": 1.4195, "step": 224 }, { "epoch": 0.00040071668000911365, "grad_norm": 0.765625, "learning_rate": 0.00045000000000000004, "loss": 1.6253, "step": 226 }, { "epoch": 0.0004042628453189288, "grad_norm": 1.40625, "learning_rate": 0.00045400000000000003, "loss": 1.5447, "step": 228 }, { "epoch": 0.00040780901062874395, "grad_norm": 0.78125, "learning_rate": 0.000458, "loss": 1.5232, "step": 230 }, { "epoch": 0.00041135517593855915, "grad_norm": 2.140625, "learning_rate": 0.000462, "loss": 1.9986, "step": 232 }, { "epoch": 0.0004149013412483743, "grad_norm": 1.5546875, "learning_rate": 0.00046600000000000005, "loss": 1.3245, "step": 234 }, { "epoch": 0.00041844750655818945, "grad_norm": 0.8671875, "learning_rate": 0.00047, "loss": 2.1678, "step": 236 }, { "epoch": 0.00042199367186800466, "grad_norm": 1.734375, "learning_rate": 0.000474, "loss": 1.6768, "step": 238 }, { "epoch": 0.0004255398371778198, "grad_norm": 0.62890625, "learning_rate": 0.00047799999999999996, "loss": 1.5694, "step": 240 }, { "epoch": 0.00042908600248763495, "grad_norm": 3.15625, "learning_rate": 0.000482, "loss": 1.3832, "step": 242 }, { "epoch": 0.00043263216779745016, "grad_norm": 2.390625, "learning_rate": 0.000486, "loss": 1.4785, "step": 244 }, { "epoch": 0.0004361783331072653, "grad_norm": 1.8671875, "learning_rate": 0.00049, "loss": 1.7814, "step": 246 }, { "epoch": 0.00043972449841708046, "grad_norm": 1.3828125, "learning_rate": 0.000494, "loss": 1.3818, "step": 248 }, { "epoch": 0.0004432706637268956, "grad_norm": 0.890625, "learning_rate": 0.000498, "loss": 1.4585, "step": 250 }, { "epoch": 0.0004468168290367108, "grad_norm": 0.62890625, "learning_rate": 0.0005020000000000001, "loss": 1.4761, "step": 252 }, { "epoch": 0.00045036299434652596, "grad_norm": 1.8203125, "learning_rate": 0.000506, "loss": 2.0438, "step": 254 }, { "epoch": 0.0004539091596563411, "grad_norm": 0.671875, "learning_rate": 0.00051, "loss": 1.7384, "step": 256 }, { "epoch": 0.0004574553249661563, "grad_norm": 2.5625, "learning_rate": 0.000514, "loss": 1.5145, "step": 258 }, { "epoch": 0.00046100149027597146, "grad_norm": 1.5703125, "learning_rate": 0.000518, "loss": 1.5793, "step": 260 }, { "epoch": 0.0004645476555857866, "grad_norm": 0.71484375, "learning_rate": 0.000522, "loss": 1.5587, "step": 262 }, { "epoch": 0.00046809382089560176, "grad_norm": 0.62109375, "learning_rate": 0.000526, "loss": 1.5064, "step": 264 }, { "epoch": 0.00047163998620541696, "grad_norm": 0.6171875, "learning_rate": 0.0005300000000000001, "loss": 1.571, "step": 266 }, { "epoch": 0.0004751861515152321, "grad_norm": 1.859375, "learning_rate": 0.0005340000000000001, "loss": 1.5269, "step": 268 }, { "epoch": 0.00047873231682504726, "grad_norm": 0.89453125, "learning_rate": 0.0005380000000000001, "loss": 1.7835, "step": 270 }, { "epoch": 0.00048227848213486246, "grad_norm": 1.0, "learning_rate": 0.0005420000000000001, "loss": 1.8869, "step": 272 }, { "epoch": 0.0004858246474446776, "grad_norm": 0.55078125, "learning_rate": 0.000546, "loss": 1.5411, "step": 274 }, { "epoch": 0.0004893708127544928, "grad_norm": 0.8203125, "learning_rate": 0.00055, "loss": 1.7809, "step": 276 }, { "epoch": 0.000492916978064308, "grad_norm": 1.3671875, "learning_rate": 0.000554, "loss": 1.6076, "step": 278 }, { "epoch": 0.0004964631433741231, "grad_norm": 0.64453125, "learning_rate": 0.000558, "loss": 1.5887, "step": 280 }, { "epoch": 0.0005000093086839383, "grad_norm": 6.28125, "learning_rate": 0.0005620000000000001, "loss": 1.6691, "step": 282 }, { "epoch": 0.0005035554739937535, "grad_norm": 3.890625, "learning_rate": 0.000566, "loss": 1.4103, "step": 284 }, { "epoch": 0.0005071016393035686, "grad_norm": 0.83203125, "learning_rate": 0.00057, "loss": 1.4613, "step": 286 }, { "epoch": 0.0005106478046133838, "grad_norm": 1.7265625, "learning_rate": 0.000574, "loss": 1.5102, "step": 288 }, { "epoch": 0.000514193969923199, "grad_norm": 1.3203125, "learning_rate": 0.000578, "loss": 1.9516, "step": 290 }, { "epoch": 0.0005177401352330141, "grad_norm": 1.7734375, "learning_rate": 0.0005819999999999999, "loss": 2.0735, "step": 292 }, { "epoch": 0.0005212863005428293, "grad_norm": 0.396484375, "learning_rate": 0.0005859999999999999, "loss": 1.4782, "step": 294 }, { "epoch": 0.0005248324658526445, "grad_norm": 0.72265625, "learning_rate": 0.00059, "loss": 1.5791, "step": 296 }, { "epoch": 0.0005283786311624596, "grad_norm": 1.4140625, "learning_rate": 0.000594, "loss": 1.662, "step": 298 }, { "epoch": 0.0005319247964722748, "grad_norm": 1.15625, "learning_rate": 0.000598, "loss": 1.7771, "step": 300 }, { "epoch": 0.0005354709617820899, "grad_norm": 1.296875, "learning_rate": 0.000602, "loss": 2.0136, "step": 302 }, { "epoch": 0.0005390171270919051, "grad_norm": 1.984375, "learning_rate": 0.000606, "loss": 1.6989, "step": 304 }, { "epoch": 0.0005425632924017203, "grad_norm": 0.84375, "learning_rate": 0.00061, "loss": 1.3634, "step": 306 }, { "epoch": 0.0005461094577115354, "grad_norm": 0.99609375, "learning_rate": 0.000614, "loss": 2.0646, "step": 308 }, { "epoch": 0.0005496556230213506, "grad_norm": 0.56640625, "learning_rate": 0.0006180000000000001, "loss": 1.6761, "step": 310 }, { "epoch": 0.0005532017883311658, "grad_norm": 1.234375, "learning_rate": 0.000622, "loss": 1.5955, "step": 312 }, { "epoch": 0.0005567479536409809, "grad_norm": 1.8203125, "learning_rate": 0.000626, "loss": 1.7073, "step": 314 }, { "epoch": 0.0005602941189507961, "grad_norm": 0.88671875, "learning_rate": 0.00063, "loss": 1.517, "step": 316 }, { "epoch": 0.0005638402842606113, "grad_norm": 0.439453125, "learning_rate": 0.000634, "loss": 1.2879, "step": 318 }, { "epoch": 0.0005673864495704264, "grad_norm": 1.7109375, "learning_rate": 0.000638, "loss": 1.5404, "step": 320 }, { "epoch": 0.0005709326148802416, "grad_norm": 0.79296875, "learning_rate": 0.000642, "loss": 1.289, "step": 322 }, { "epoch": 0.0005744787801900568, "grad_norm": 0.7734375, "learning_rate": 0.000646, "loss": 1.538, "step": 324 }, { "epoch": 0.0005780249454998719, "grad_norm": 0.66796875, "learning_rate": 0.0006500000000000001, "loss": 1.5195, "step": 326 }, { "epoch": 0.0005815711108096871, "grad_norm": 1.421875, "learning_rate": 0.0006540000000000001, "loss": 1.5568, "step": 328 }, { "epoch": 0.0005851172761195023, "grad_norm": 3.703125, "learning_rate": 0.0006580000000000001, "loss": 1.5764, "step": 330 }, { "epoch": 0.0005886634414293174, "grad_norm": 1.109375, "learning_rate": 0.000662, "loss": 1.6677, "step": 332 }, { "epoch": 0.0005922096067391326, "grad_norm": 0.67578125, "learning_rate": 0.000666, "loss": 1.7501, "step": 334 }, { "epoch": 0.0005957557720489477, "grad_norm": 0.90234375, "learning_rate": 0.00067, "loss": 1.2844, "step": 336 }, { "epoch": 0.0005993019373587629, "grad_norm": 3.078125, "learning_rate": 0.000674, "loss": 1.7816, "step": 338 }, { "epoch": 0.0006028481026685781, "grad_norm": 1.1328125, "learning_rate": 0.0006780000000000001, "loss": 1.436, "step": 340 }, { "epoch": 0.0006063942679783932, "grad_norm": 0.73046875, "learning_rate": 0.0006820000000000001, "loss": 1.4653, "step": 342 }, { "epoch": 0.0006099404332882084, "grad_norm": 1.1328125, "learning_rate": 0.0006860000000000001, "loss": 1.8547, "step": 344 }, { "epoch": 0.0006134865985980236, "grad_norm": 0.7265625, "learning_rate": 0.00069, "loss": 1.4515, "step": 346 }, { "epoch": 0.0006170327639078387, "grad_norm": 0.9921875, "learning_rate": 0.000694, "loss": 1.4289, "step": 348 }, { "epoch": 0.0006205789292176539, "grad_norm": 1.4296875, "learning_rate": 0.0006979999999999999, "loss": 1.6906, "step": 350 }, { "epoch": 0.0006241250945274691, "grad_norm": 2.40625, "learning_rate": 0.0007019999999999999, "loss": 1.4184, "step": 352 }, { "epoch": 0.0006276712598372842, "grad_norm": 0.94140625, "learning_rate": 0.0007059999999999999, "loss": 1.2701, "step": 354 }, { "epoch": 0.0006312174251470994, "grad_norm": 4.46875, "learning_rate": 0.00071, "loss": 1.3772, "step": 356 }, { "epoch": 0.0006347635904569146, "grad_norm": 3.5625, "learning_rate": 0.000714, "loss": 1.3526, "step": 358 }, { "epoch": 0.0006383097557667297, "grad_norm": 1.0234375, "learning_rate": 0.000718, "loss": 1.3579, "step": 360 }, { "epoch": 0.0006418559210765449, "grad_norm": 1.7265625, "learning_rate": 0.000722, "loss": 1.4318, "step": 362 }, { "epoch": 0.00064540208638636, "grad_norm": 2.015625, "learning_rate": 0.000726, "loss": 1.355, "step": 364 }, { "epoch": 0.0006489482516961752, "grad_norm": 1.3359375, "learning_rate": 0.00073, "loss": 1.3997, "step": 366 }, { "epoch": 0.0006524944170059904, "grad_norm": 1.4140625, "learning_rate": 0.000734, "loss": 1.0671, "step": 368 }, { "epoch": 0.0006560405823158055, "grad_norm": 1.484375, "learning_rate": 0.000738, "loss": 1.1156, "step": 370 }, { "epoch": 0.0006595867476256207, "grad_norm": 2.421875, "learning_rate": 0.000742, "loss": 1.2581, "step": 372 }, { "epoch": 0.0006631329129354359, "grad_norm": 1.0625, "learning_rate": 0.000746, "loss": 0.8968, "step": 374 }, { "epoch": 0.000666679078245251, "grad_norm": 3.765625, "learning_rate": 0.00075, "loss": 1.1869, "step": 376 }, { "epoch": 0.0006702252435550662, "grad_norm": 1.7734375, "learning_rate": 0.000754, "loss": 0.9751, "step": 378 }, { "epoch": 0.0006737714088648814, "grad_norm": 2.890625, "learning_rate": 0.000758, "loss": 1.2709, "step": 380 }, { "epoch": 0.0006773175741746965, "grad_norm": 1.6171875, "learning_rate": 0.000762, "loss": 1.1142, "step": 382 }, { "epoch": 0.0006808637394845117, "grad_norm": 3.796875, "learning_rate": 0.0007660000000000001, "loss": 0.9331, "step": 384 }, { "epoch": 0.0006844099047943269, "grad_norm": 2.859375, "learning_rate": 0.0007700000000000001, "loss": 0.9445, "step": 386 }, { "epoch": 0.000687956070104142, "grad_norm": 3.40625, "learning_rate": 0.0007740000000000001, "loss": 1.1067, "step": 388 }, { "epoch": 0.0006915022354139572, "grad_norm": 1.234375, "learning_rate": 0.000778, "loss": 0.8197, "step": 390 }, { "epoch": 0.0006950484007237724, "grad_norm": 2.671875, "learning_rate": 0.000782, "loss": 0.9289, "step": 392 }, { "epoch": 0.0006985945660335875, "grad_norm": 5.53125, "learning_rate": 0.000786, "loss": 1.2645, "step": 394 }, { "epoch": 0.0007021407313434027, "grad_norm": 2.96875, "learning_rate": 0.00079, "loss": 0.7802, "step": 396 }, { "epoch": 0.0007056868966532178, "grad_norm": 4.96875, "learning_rate": 0.0007940000000000001, "loss": 0.8851, "step": 398 }, { "epoch": 0.000709233061963033, "grad_norm": 1.5234375, "learning_rate": 0.0007980000000000001, "loss": 0.8057, "step": 400 }, { "epoch": 0.0007127792272728482, "grad_norm": 0.91796875, "learning_rate": 0.0008020000000000001, "loss": 0.8375, "step": 402 }, { "epoch": 0.0007163253925826633, "grad_norm": 0.98828125, "learning_rate": 0.0008060000000000001, "loss": 0.7738, "step": 404 }, { "epoch": 0.0007198715578924785, "grad_norm": 1.921875, "learning_rate": 0.0008100000000000001, "loss": 0.9653, "step": 406 }, { "epoch": 0.0007234177232022937, "grad_norm": 2.0625, "learning_rate": 0.0008139999999999999, "loss": 0.8432, "step": 408 }, { "epoch": 0.0007269638885121088, "grad_norm": 0.8359375, "learning_rate": 0.0008179999999999999, "loss": 0.8312, "step": 410 }, { "epoch": 0.000730510053821924, "grad_norm": 5.0625, "learning_rate": 0.0008219999999999999, "loss": 0.8912, "step": 412 }, { "epoch": 0.0007340562191317392, "grad_norm": 4.0625, "learning_rate": 0.000826, "loss": 0.8716, "step": 414 }, { "epoch": 0.0007376023844415543, "grad_norm": 6.6875, "learning_rate": 0.00083, "loss": 0.7662, "step": 416 }, { "epoch": 0.0007411485497513695, "grad_norm": 0.95703125, "learning_rate": 0.000834, "loss": 0.9265, "step": 418 }, { "epoch": 0.0007446947150611847, "grad_norm": 1.0078125, "learning_rate": 0.000838, "loss": 0.9106, "step": 420 }, { "epoch": 0.0007482408803709998, "grad_norm": 3.0625, "learning_rate": 0.000842, "loss": 0.8929, "step": 422 }, { "epoch": 0.000751787045680815, "grad_norm": 2.34375, "learning_rate": 0.000846, "loss": 0.8809, "step": 424 }, { "epoch": 0.0007553332109906301, "grad_norm": 0.76953125, "learning_rate": 0.00085, "loss": 0.6671, "step": 426 }, { "epoch": 0.0007588793763004453, "grad_norm": 0.7734375, "learning_rate": 0.000854, "loss": 0.7229, "step": 428 }, { "epoch": 0.0007624255416102605, "grad_norm": 2.484375, "learning_rate": 0.000858, "loss": 0.9, "step": 430 }, { "epoch": 0.0007659717069200756, "grad_norm": 2.203125, "learning_rate": 0.000862, "loss": 0.8212, "step": 432 }, { "epoch": 0.0007695178722298908, "grad_norm": 0.66015625, "learning_rate": 0.000866, "loss": 0.5973, "step": 434 }, { "epoch": 0.000773064037539706, "grad_norm": 2.328125, "learning_rate": 0.00087, "loss": 0.5344, "step": 436 }, { "epoch": 0.0007766102028495211, "grad_norm": 1.0, "learning_rate": 0.000874, "loss": 0.5902, "step": 438 }, { "epoch": 0.0007801563681593363, "grad_norm": 0.80078125, "learning_rate": 0.000878, "loss": 0.6295, "step": 440 }, { "epoch": 0.0007837025334691515, "grad_norm": 0.75, "learning_rate": 0.000882, "loss": 0.6326, "step": 442 }, { "epoch": 0.0007872486987789666, "grad_norm": 0.9453125, "learning_rate": 0.0008860000000000001, "loss": 0.5642, "step": 444 }, { "epoch": 0.0007907948640887818, "grad_norm": 0.62890625, "learning_rate": 0.0008900000000000001, "loss": 0.602, "step": 446 }, { "epoch": 0.000794341029398597, "grad_norm": 3.234375, "learning_rate": 0.000894, "loss": 0.7297, "step": 448 }, { "epoch": 0.0007978871947084121, "grad_norm": 0.7890625, "learning_rate": 0.000898, "loss": 0.701, "step": 450 }, { "epoch": 0.0008014333600182273, "grad_norm": 0.69140625, "learning_rate": 0.000902, "loss": 0.5983, "step": 452 }, { "epoch": 0.0008049795253280425, "grad_norm": 1.421875, "learning_rate": 0.000906, "loss": 0.4609, "step": 454 }, { "epoch": 0.0008085256906378576, "grad_norm": 1.1484375, "learning_rate": 0.00091, "loss": 0.5314, "step": 456 }, { "epoch": 0.0008120718559476728, "grad_norm": 4.4375, "learning_rate": 0.0009140000000000001, "loss": 1.0837, "step": 458 }, { "epoch": 0.0008156180212574879, "grad_norm": 2.953125, "learning_rate": 0.0009180000000000001, "loss": 0.7036, "step": 460 }, { "epoch": 0.0008191641865673031, "grad_norm": 0.75, "learning_rate": 0.0009220000000000001, "loss": 0.699, "step": 462 }, { "epoch": 0.0008227103518771183, "grad_norm": 0.6171875, "learning_rate": 0.0009260000000000001, "loss": 0.6727, "step": 464 }, { "epoch": 0.0008262565171869334, "grad_norm": 2.0, "learning_rate": 0.00093, "loss": 0.6781, "step": 466 }, { "epoch": 0.0008298026824967486, "grad_norm": 0.70703125, "learning_rate": 0.000934, "loss": 0.603, "step": 468 }, { "epoch": 0.0008333488478065638, "grad_norm": 0.62890625, "learning_rate": 0.0009379999999999999, "loss": 0.5876, "step": 470 }, { "epoch": 0.0008368950131163789, "grad_norm": 0.5703125, "learning_rate": 0.000942, "loss": 0.5201, "step": 472 }, { "epoch": 0.0008404411784261941, "grad_norm": 3.390625, "learning_rate": 0.000946, "loss": 0.6795, "step": 474 }, { "epoch": 0.0008439873437360093, "grad_norm": 1.125, "learning_rate": 0.00095, "loss": 0.5322, "step": 476 }, { "epoch": 0.0008475335090458244, "grad_norm": 2.15625, "learning_rate": 0.000954, "loss": 0.636, "step": 478 }, { "epoch": 0.0008510796743556396, "grad_norm": 0.66015625, "learning_rate": 0.000958, "loss": 0.5606, "step": 480 }, { "epoch": 0.0008546258396654548, "grad_norm": 0.66796875, "learning_rate": 0.000962, "loss": 0.4997, "step": 482 }, { "epoch": 0.0008581720049752699, "grad_norm": 1.1953125, "learning_rate": 0.000966, "loss": 0.704, "step": 484 }, { "epoch": 0.0008617181702850851, "grad_norm": 0.8828125, "learning_rate": 0.0009699999999999999, "loss": 0.5248, "step": 486 }, { "epoch": 0.0008652643355949003, "grad_norm": 5.75, "learning_rate": 0.000974, "loss": 0.7647, "step": 488 }, { "epoch": 0.0008688105009047154, "grad_norm": 0.59765625, "learning_rate": 0.000978, "loss": 0.54, "step": 490 }, { "epoch": 0.0008723566662145306, "grad_norm": 3.109375, "learning_rate": 0.000982, "loss": 0.6266, "step": 492 }, { "epoch": 0.0008759028315243457, "grad_norm": 2.875, "learning_rate": 0.0009860000000000001, "loss": 0.6661, "step": 494 }, { "epoch": 0.0008794489968341609, "grad_norm": 0.77734375, "learning_rate": 0.00099, "loss": 0.5463, "step": 496 }, { "epoch": 0.0008829951621439761, "grad_norm": 1.078125, "learning_rate": 0.000994, "loss": 0.5743, "step": 498 }, { "epoch": 0.0008865413274537912, "grad_norm": 1.28125, "learning_rate": 0.000998, "loss": 0.6301, "step": 500 }, { "epoch": 0.0008900874927636064, "grad_norm": 0.44921875, "learning_rate": 0.001002, "loss": 0.4796, "step": 502 }, { "epoch": 0.0008936336580734216, "grad_norm": 0.6015625, "learning_rate": 0.001006, "loss": 0.5895, "step": 504 }, { "epoch": 0.0008971798233832367, "grad_norm": 1.875, "learning_rate": 0.00101, "loss": 0.5832, "step": 506 }, { "epoch": 0.0009007259886930519, "grad_norm": 1.296875, "learning_rate": 0.001014, "loss": 0.6405, "step": 508 }, { "epoch": 0.0009042721540028671, "grad_norm": 0.455078125, "learning_rate": 0.001018, "loss": 0.4511, "step": 510 }, { "epoch": 0.0009078183193126822, "grad_norm": 0.515625, "learning_rate": 0.0010220000000000001, "loss": 0.4704, "step": 512 }, { "epoch": 0.0009113644846224974, "grad_norm": 1.1953125, "learning_rate": 0.001026, "loss": 0.55, "step": 514 }, { "epoch": 0.0009149106499323126, "grad_norm": 2.359375, "learning_rate": 0.00103, "loss": 0.6907, "step": 516 }, { "epoch": 0.0009184568152421277, "grad_norm": 1.859375, "learning_rate": 0.001034, "loss": 0.7927, "step": 518 }, { "epoch": 0.0009220029805519429, "grad_norm": 2.484375, "learning_rate": 0.001038, "loss": 0.5656, "step": 520 }, { "epoch": 0.000925549145861758, "grad_norm": 0.78515625, "learning_rate": 0.001042, "loss": 0.6883, "step": 522 }, { "epoch": 0.0009290953111715732, "grad_norm": 1.421875, "learning_rate": 0.001046, "loss": 0.7126, "step": 524 }, { "epoch": 0.0009326414764813884, "grad_norm": 1.328125, "learning_rate": 0.0010500000000000002, "loss": 0.5154, "step": 526 }, { "epoch": 0.0009361876417912035, "grad_norm": 0.71484375, "learning_rate": 0.001054, "loss": 0.5787, "step": 528 }, { "epoch": 0.0009397338071010187, "grad_norm": 0.69140625, "learning_rate": 0.0010580000000000001, "loss": 0.5701, "step": 530 }, { "epoch": 0.0009432799724108339, "grad_norm": 0.640625, "learning_rate": 0.001062, "loss": 0.6469, "step": 532 }, { "epoch": 0.000946826137720649, "grad_norm": 0.9375, "learning_rate": 0.001066, "loss": 0.5711, "step": 534 }, { "epoch": 0.0009503723030304642, "grad_norm": 13.6875, "learning_rate": 0.00107, "loss": 0.559, "step": 536 }, { "epoch": 0.0009539184683402794, "grad_norm": 0.60546875, "learning_rate": 0.001074, "loss": 0.4705, "step": 538 }, { "epoch": 0.0009574646336500945, "grad_norm": 1.2890625, "learning_rate": 0.0010780000000000002, "loss": 0.5083, "step": 540 }, { "epoch": 0.0009610107989599097, "grad_norm": 1.0703125, "learning_rate": 0.001082, "loss": 0.6336, "step": 542 }, { "epoch": 0.0009645569642697249, "grad_norm": 3.21875, "learning_rate": 0.0010860000000000002, "loss": 0.6729, "step": 544 }, { "epoch": 0.00096810312957954, "grad_norm": 0.6640625, "learning_rate": 0.00109, "loss": 0.4936, "step": 546 }, { "epoch": 0.0009716492948893552, "grad_norm": 2.421875, "learning_rate": 0.0010940000000000001, "loss": 0.7861, "step": 548 }, { "epoch": 0.0009751954601991704, "grad_norm": 0.9375, "learning_rate": 0.001098, "loss": 0.5245, "step": 550 }, { "epoch": 0.0009787416255089855, "grad_norm": 1.015625, "learning_rate": 0.0011020000000000001, "loss": 0.4803, "step": 552 }, { "epoch": 0.0009822877908188007, "grad_norm": 4.1875, "learning_rate": 0.0011060000000000002, "loss": 0.5755, "step": 554 }, { "epoch": 0.000985833956128616, "grad_norm": 0.453125, "learning_rate": 0.00111, "loss": 0.4838, "step": 556 }, { "epoch": 0.0009893801214384311, "grad_norm": 1.6171875, "learning_rate": 0.0011140000000000002, "loss": 0.6264, "step": 558 }, { "epoch": 0.0009929262867482461, "grad_norm": 2.25, "learning_rate": 0.001118, "loss": 0.8499, "step": 560 }, { "epoch": 0.0009964724520580613, "grad_norm": 0.76171875, "learning_rate": 0.0011220000000000002, "loss": 0.5218, "step": 562 }, { "epoch": 0.0010000186173678765, "grad_norm": 0.7265625, "learning_rate": 0.0011259999999999998, "loss": 0.6252, "step": 564 }, { "epoch": 0.0010035647826776917, "grad_norm": 0.439453125, "learning_rate": 0.00113, "loss": 0.455, "step": 566 }, { "epoch": 0.001007110947987507, "grad_norm": 0.76953125, "learning_rate": 0.001134, "loss": 0.4322, "step": 568 }, { "epoch": 0.001010657113297322, "grad_norm": 5.59375, "learning_rate": 0.001138, "loss": 1.0676, "step": 570 }, { "epoch": 0.0010142032786071371, "grad_norm": 1.40625, "learning_rate": 0.001142, "loss": 0.5648, "step": 572 }, { "epoch": 0.0010177494439169523, "grad_norm": 0.984375, "learning_rate": 0.0011459999999999999, "loss": 0.8953, "step": 574 }, { "epoch": 0.0010212956092267675, "grad_norm": 0.65234375, "learning_rate": 0.00115, "loss": 0.4995, "step": 576 }, { "epoch": 0.0010248417745365827, "grad_norm": 1.3203125, "learning_rate": 0.0011539999999999999, "loss": 0.5413, "step": 578 }, { "epoch": 0.001028387939846398, "grad_norm": 3.859375, "learning_rate": 0.001158, "loss": 0.7195, "step": 580 }, { "epoch": 0.001031934105156213, "grad_norm": 0.97265625, "learning_rate": 0.0011619999999999998, "loss": 0.6171, "step": 582 }, { "epoch": 0.0010354802704660281, "grad_norm": 0.70703125, "learning_rate": 0.001166, "loss": 0.5062, "step": 584 }, { "epoch": 0.0010390264357758433, "grad_norm": 0.734375, "learning_rate": 0.00117, "loss": 0.5645, "step": 586 }, { "epoch": 0.0010425726010856585, "grad_norm": 0.70703125, "learning_rate": 0.001174, "loss": 0.4871, "step": 588 }, { "epoch": 0.0010461187663954737, "grad_norm": 1.703125, "learning_rate": 0.001178, "loss": 0.4328, "step": 590 }, { "epoch": 0.001049664931705289, "grad_norm": 0.73828125, "learning_rate": 0.0011819999999999999, "loss": 0.5342, "step": 592 }, { "epoch": 0.001053211097015104, "grad_norm": 1.3984375, "learning_rate": 0.001186, "loss": 0.4844, "step": 594 }, { "epoch": 0.0010567572623249191, "grad_norm": 0.9609375, "learning_rate": 0.0011899999999999999, "loss": 0.4575, "step": 596 }, { "epoch": 0.0010603034276347343, "grad_norm": 0.482421875, "learning_rate": 0.001194, "loss": 0.5371, "step": 598 }, { "epoch": 0.0010638495929445495, "grad_norm": 1.59375, "learning_rate": 0.001198, "loss": 0.4835, "step": 600 }, { "epoch": 0.0010673957582543647, "grad_norm": 0.55078125, "learning_rate": 0.001202, "loss": 0.4884, "step": 602 }, { "epoch": 0.0010709419235641797, "grad_norm": 0.5625, "learning_rate": 0.001206, "loss": 0.6999, "step": 604 }, { "epoch": 0.001074488088873995, "grad_norm": 1.03125, "learning_rate": 0.00121, "loss": 0.4397, "step": 606 }, { "epoch": 0.0010780342541838101, "grad_norm": 1.3984375, "learning_rate": 0.001214, "loss": 0.6247, "step": 608 }, { "epoch": 0.0010815804194936253, "grad_norm": 0.91015625, "learning_rate": 0.001218, "loss": 0.3905, "step": 610 }, { "epoch": 0.0010851265848034405, "grad_norm": 0.5546875, "learning_rate": 0.001222, "loss": 0.4631, "step": 612 }, { "epoch": 0.0010886727501132557, "grad_norm": 1.421875, "learning_rate": 0.001226, "loss": 0.4027, "step": 614 }, { "epoch": 0.0010922189154230707, "grad_norm": 0.40234375, "learning_rate": 0.00123, "loss": 0.5303, "step": 616 }, { "epoch": 0.001095765080732886, "grad_norm": 0.7109375, "learning_rate": 0.001234, "loss": 0.4526, "step": 618 }, { "epoch": 0.0010993112460427011, "grad_norm": 2.140625, "learning_rate": 0.001238, "loss": 0.6035, "step": 620 }, { "epoch": 0.0011028574113525163, "grad_norm": 1.15625, "learning_rate": 0.001242, "loss": 0.5793, "step": 622 }, { "epoch": 0.0011064035766623315, "grad_norm": 0.416015625, "learning_rate": 0.001246, "loss": 0.4564, "step": 624 }, { "epoch": 0.0011099497419721467, "grad_norm": 0.58203125, "learning_rate": 0.00125, "loss": 0.4803, "step": 626 }, { "epoch": 0.0011134959072819617, "grad_norm": 0.88671875, "learning_rate": 0.0012540000000000001, "loss": 0.6734, "step": 628 }, { "epoch": 0.001117042072591777, "grad_norm": 0.66015625, "learning_rate": 0.001258, "loss": 0.4855, "step": 630 }, { "epoch": 0.0011205882379015921, "grad_norm": 1.15625, "learning_rate": 0.001262, "loss": 0.4557, "step": 632 }, { "epoch": 0.0011241344032114073, "grad_norm": 1.78125, "learning_rate": 0.001266, "loss": 0.5451, "step": 634 }, { "epoch": 0.0011276805685212225, "grad_norm": 1.7265625, "learning_rate": 0.00127, "loss": 0.5126, "step": 636 }, { "epoch": 0.0011312267338310375, "grad_norm": 1.265625, "learning_rate": 0.001274, "loss": 0.5094, "step": 638 }, { "epoch": 0.0011347728991408527, "grad_norm": 2.21875, "learning_rate": 0.001278, "loss": 0.5792, "step": 640 }, { "epoch": 0.001138319064450668, "grad_norm": 1.796875, "learning_rate": 0.0012820000000000002, "loss": 0.5892, "step": 642 }, { "epoch": 0.0011418652297604831, "grad_norm": 0.69921875, "learning_rate": 0.001286, "loss": 0.5642, "step": 644 }, { "epoch": 0.0011454113950702983, "grad_norm": 0.50390625, "learning_rate": 0.0012900000000000001, "loss": 0.4613, "step": 646 }, { "epoch": 0.0011489575603801136, "grad_norm": 0.4140625, "learning_rate": 0.001294, "loss": 0.5105, "step": 648 }, { "epoch": 0.0011525037256899285, "grad_norm": 0.5234375, "learning_rate": 0.0012980000000000001, "loss": 0.7083, "step": 650 }, { "epoch": 0.0011560498909997437, "grad_norm": 0.6640625, "learning_rate": 0.001302, "loss": 0.4454, "step": 652 }, { "epoch": 0.001159596056309559, "grad_norm": 0.52734375, "learning_rate": 0.001306, "loss": 0.3984, "step": 654 }, { "epoch": 0.0011631422216193741, "grad_norm": 1.7578125, "learning_rate": 0.0013100000000000002, "loss": 0.4624, "step": 656 }, { "epoch": 0.0011666883869291894, "grad_norm": 0.39453125, "learning_rate": 0.001314, "loss": 0.5275, "step": 658 }, { "epoch": 0.0011702345522390046, "grad_norm": 0.62890625, "learning_rate": 0.0013180000000000002, "loss": 0.4649, "step": 660 }, { "epoch": 0.0011737807175488195, "grad_norm": 0.451171875, "learning_rate": 0.001322, "loss": 0.4366, "step": 662 }, { "epoch": 0.0011773268828586347, "grad_norm": 0.41015625, "learning_rate": 0.0013260000000000001, "loss": 0.4079, "step": 664 }, { "epoch": 0.00118087304816845, "grad_norm": 0.466796875, "learning_rate": 0.00133, "loss": 0.537, "step": 666 }, { "epoch": 0.0011844192134782652, "grad_norm": 0.8046875, "learning_rate": 0.0013340000000000001, "loss": 0.4374, "step": 668 }, { "epoch": 0.0011879653787880804, "grad_norm": 0.828125, "learning_rate": 0.0013380000000000002, "loss": 0.5113, "step": 670 }, { "epoch": 0.0011915115440978953, "grad_norm": 0.5546875, "learning_rate": 0.001342, "loss": 0.4182, "step": 672 }, { "epoch": 0.0011950577094077105, "grad_norm": 2.015625, "learning_rate": 0.0013460000000000002, "loss": 0.4994, "step": 674 }, { "epoch": 0.0011986038747175258, "grad_norm": 1.5546875, "learning_rate": 0.00135, "loss": 0.7251, "step": 676 }, { "epoch": 0.001202150040027341, "grad_norm": 2.65625, "learning_rate": 0.0013540000000000002, "loss": 0.8491, "step": 678 }, { "epoch": 0.0012056962053371562, "grad_norm": 0.62890625, "learning_rate": 0.001358, "loss": 0.3866, "step": 680 }, { "epoch": 0.0012092423706469714, "grad_norm": 0.82421875, "learning_rate": 0.0013620000000000001, "loss": 0.4451, "step": 682 }, { "epoch": 0.0012127885359567863, "grad_norm": 0.796875, "learning_rate": 0.001366, "loss": 0.3521, "step": 684 }, { "epoch": 0.0012163347012666016, "grad_norm": 0.392578125, "learning_rate": 0.0013700000000000001, "loss": 0.4155, "step": 686 }, { "epoch": 0.0012198808665764168, "grad_norm": 1.28125, "learning_rate": 0.0013740000000000002, "loss": 0.5518, "step": 688 }, { "epoch": 0.001223427031886232, "grad_norm": 0.462890625, "learning_rate": 0.0013779999999999999, "loss": 0.3655, "step": 690 }, { "epoch": 0.0012269731971960472, "grad_norm": 1.109375, "learning_rate": 0.001382, "loss": 0.3804, "step": 692 }, { "epoch": 0.0012305193625058621, "grad_norm": 2.796875, "learning_rate": 0.0013859999999999999, "loss": 0.663, "step": 694 }, { "epoch": 0.0012340655278156774, "grad_norm": 1.203125, "learning_rate": 0.00139, "loss": 0.5644, "step": 696 }, { "epoch": 0.0012376116931254926, "grad_norm": 4.28125, "learning_rate": 0.0013939999999999998, "loss": 0.5431, "step": 698 }, { "epoch": 0.0012411578584353078, "grad_norm": 0.7578125, "learning_rate": 0.001398, "loss": 0.4251, "step": 700 }, { "epoch": 0.001244704023745123, "grad_norm": 0.390625, "learning_rate": 0.001402, "loss": 0.4524, "step": 702 }, { "epoch": 0.0012482501890549382, "grad_norm": 0.36328125, "learning_rate": 0.001406, "loss": 0.3926, "step": 704 }, { "epoch": 0.0012517963543647532, "grad_norm": 1.1484375, "learning_rate": 0.00141, "loss": 0.4461, "step": 706 }, { "epoch": 0.0012553425196745684, "grad_norm": 0.431640625, "learning_rate": 0.001414, "loss": 0.5012, "step": 708 }, { "epoch": 0.0012588886849843836, "grad_norm": 0.46484375, "learning_rate": 0.001418, "loss": 0.4257, "step": 710 }, { "epoch": 0.0012624348502941988, "grad_norm": 1.75, "learning_rate": 0.0014219999999999999, "loss": 0.4114, "step": 712 }, { "epoch": 0.001265981015604014, "grad_norm": 0.66796875, "learning_rate": 0.001426, "loss": 0.4855, "step": 714 }, { "epoch": 0.0012695271809138292, "grad_norm": 2.171875, "learning_rate": 0.00143, "loss": 0.6086, "step": 716 }, { "epoch": 0.0012730733462236442, "grad_norm": 1.375, "learning_rate": 0.001434, "loss": 0.5398, "step": 718 }, { "epoch": 0.0012766195115334594, "grad_norm": 1.2109375, "learning_rate": 0.001438, "loss": 0.5382, "step": 720 }, { "epoch": 0.0012801656768432746, "grad_norm": 1.9765625, "learning_rate": 0.001442, "loss": 0.4693, "step": 722 }, { "epoch": 0.0012837118421530898, "grad_norm": 1.1015625, "learning_rate": 0.001446, "loss": 0.4705, "step": 724 }, { "epoch": 0.001287258007462905, "grad_norm": 0.83984375, "learning_rate": 0.00145, "loss": 0.4714, "step": 726 }, { "epoch": 0.00129080417277272, "grad_norm": 0.66796875, "learning_rate": 0.001454, "loss": 0.9141, "step": 728 }, { "epoch": 0.0012943503380825352, "grad_norm": 0.77734375, "learning_rate": 0.001458, "loss": 0.6113, "step": 730 }, { "epoch": 0.0012978965033923504, "grad_norm": 2.09375, "learning_rate": 0.001462, "loss": 0.4465, "step": 732 }, { "epoch": 0.0013014426687021656, "grad_norm": 1.1875, "learning_rate": 0.001466, "loss": 0.4927, "step": 734 }, { "epoch": 0.0013049888340119808, "grad_norm": 0.3671875, "learning_rate": 0.00147, "loss": 0.3272, "step": 736 }, { "epoch": 0.001308534999321796, "grad_norm": 0.7578125, "learning_rate": 0.001474, "loss": 0.5498, "step": 738 }, { "epoch": 0.001312081164631611, "grad_norm": 2.6875, "learning_rate": 0.001478, "loss": 0.726, "step": 740 }, { "epoch": 0.0013156273299414262, "grad_norm": 0.97265625, "learning_rate": 0.001482, "loss": 0.4328, "step": 742 }, { "epoch": 0.0013191734952512414, "grad_norm": 3.953125, "learning_rate": 0.0014860000000000001, "loss": 0.4116, "step": 744 }, { "epoch": 0.0013227196605610566, "grad_norm": 1.1015625, "learning_rate": 0.00149, "loss": 0.403, "step": 746 }, { "epoch": 0.0013262658258708718, "grad_norm": 0.671875, "learning_rate": 0.001494, "loss": 0.4467, "step": 748 }, { "epoch": 0.001329811991180687, "grad_norm": 0.96875, "learning_rate": 0.001498, "loss": 0.4754, "step": 750 }, { "epoch": 0.001333358156490502, "grad_norm": 0.9453125, "learning_rate": 0.001502, "loss": 0.5166, "step": 752 }, { "epoch": 0.0013369043218003172, "grad_norm": 1.3125, "learning_rate": 0.001506, "loss": 0.4412, "step": 754 }, { "epoch": 0.0013404504871101324, "grad_norm": 0.83984375, "learning_rate": 0.00151, "loss": 0.3362, "step": 756 }, { "epoch": 0.0013439966524199476, "grad_norm": 0.796875, "learning_rate": 0.001514, "loss": 0.3429, "step": 758 }, { "epoch": 0.0013475428177297628, "grad_norm": 0.470703125, "learning_rate": 0.001518, "loss": 0.4573, "step": 760 }, { "epoch": 0.0013510889830395778, "grad_norm": 0.84375, "learning_rate": 0.0015220000000000001, "loss": 0.5393, "step": 762 }, { "epoch": 0.001354635148349393, "grad_norm": 0.9453125, "learning_rate": 0.001526, "loss": 0.3999, "step": 764 }, { "epoch": 0.0013581813136592082, "grad_norm": 0.447265625, "learning_rate": 0.0015300000000000001, "loss": 0.4177, "step": 766 }, { "epoch": 0.0013617274789690234, "grad_norm": 1.1796875, "learning_rate": 0.001534, "loss": 0.7588, "step": 768 }, { "epoch": 0.0013652736442788386, "grad_norm": 0.66015625, "learning_rate": 0.001538, "loss": 0.4386, "step": 770 }, { "epoch": 0.0013688198095886538, "grad_norm": 1.3359375, "learning_rate": 0.001542, "loss": 0.3225, "step": 772 }, { "epoch": 0.0013723659748984688, "grad_norm": 1.671875, "learning_rate": 0.001546, "loss": 0.4429, "step": 774 }, { "epoch": 0.001375912140208284, "grad_norm": 0.625, "learning_rate": 0.0015500000000000002, "loss": 0.4224, "step": 776 }, { "epoch": 0.0013794583055180992, "grad_norm": 1.0703125, "learning_rate": 0.001554, "loss": 0.4154, "step": 778 }, { "epoch": 0.0013830044708279144, "grad_norm": 0.6015625, "learning_rate": 0.0015580000000000001, "loss": 0.3761, "step": 780 }, { "epoch": 0.0013865506361377296, "grad_norm": 0.515625, "learning_rate": 0.001562, "loss": 0.3326, "step": 782 }, { "epoch": 0.0013900968014475448, "grad_norm": 0.54296875, "learning_rate": 0.0015660000000000001, "loss": 0.4234, "step": 784 }, { "epoch": 0.0013936429667573598, "grad_norm": 0.87109375, "learning_rate": 0.00157, "loss": 0.3707, "step": 786 }, { "epoch": 0.001397189132067175, "grad_norm": 2.421875, "learning_rate": 0.001574, "loss": 0.4198, "step": 788 }, { "epoch": 0.0014007352973769902, "grad_norm": 1.6015625, "learning_rate": 0.0015780000000000002, "loss": 0.6297, "step": 790 }, { "epoch": 0.0014042814626868054, "grad_norm": 2.15625, "learning_rate": 0.001582, "loss": 0.437, "step": 792 }, { "epoch": 0.0014078276279966206, "grad_norm": 0.333984375, "learning_rate": 0.0015860000000000002, "loss": 0.3658, "step": 794 }, { "epoch": 0.0014113737933064356, "grad_norm": 0.4921875, "learning_rate": 0.00159, "loss": 0.423, "step": 796 }, { "epoch": 0.0014149199586162508, "grad_norm": 0.322265625, "learning_rate": 0.0015940000000000001, "loss": 0.353, "step": 798 }, { "epoch": 0.001418466123926066, "grad_norm": 2.28125, "learning_rate": 0.001598, "loss": 0.4482, "step": 800 }, { "epoch": 0.0014220122892358812, "grad_norm": 0.357421875, "learning_rate": 0.0016020000000000001, "loss": 0.5871, "step": 802 }, { "epoch": 0.0014255584545456964, "grad_norm": 0.62109375, "learning_rate": 0.0016060000000000002, "loss": 0.4032, "step": 804 }, { "epoch": 0.0014291046198555116, "grad_norm": 0.7109375, "learning_rate": 0.00161, "loss": 0.3488, "step": 806 }, { "epoch": 0.0014326507851653266, "grad_norm": 1.3828125, "learning_rate": 0.0016140000000000002, "loss": 0.4464, "step": 808 }, { "epoch": 0.0014361969504751418, "grad_norm": 0.70703125, "learning_rate": 0.001618, "loss": 0.427, "step": 810 }, { "epoch": 0.001439743115784957, "grad_norm": 0.796875, "learning_rate": 0.0016220000000000002, "loss": 0.3566, "step": 812 }, { "epoch": 0.0014432892810947722, "grad_norm": 0.3359375, "learning_rate": 0.0016259999999999998, "loss": 0.3749, "step": 814 }, { "epoch": 0.0014468354464045874, "grad_norm": 1.046875, "learning_rate": 0.00163, "loss": 0.4408, "step": 816 }, { "epoch": 0.0014503816117144026, "grad_norm": 0.435546875, "learning_rate": 0.001634, "loss": 0.4197, "step": 818 }, { "epoch": 0.0014539277770242176, "grad_norm": 0.5234375, "learning_rate": 0.001638, "loss": 0.4298, "step": 820 }, { "epoch": 0.0014574739423340328, "grad_norm": 0.59765625, "learning_rate": 0.001642, "loss": 0.3424, "step": 822 }, { "epoch": 0.001461020107643848, "grad_norm": 0.466796875, "learning_rate": 0.001646, "loss": 0.4055, "step": 824 }, { "epoch": 0.0014645662729536632, "grad_norm": 0.3203125, "learning_rate": 0.00165, "loss": 0.387, "step": 826 }, { "epoch": 0.0014681124382634784, "grad_norm": 0.4453125, "learning_rate": 0.0016539999999999999, "loss": 0.4673, "step": 828 }, { "epoch": 0.0014716586035732934, "grad_norm": 1.34375, "learning_rate": 0.001658, "loss": 0.884, "step": 830 }, { "epoch": 0.0014752047688831086, "grad_norm": 0.3828125, "learning_rate": 0.0016619999999999998, "loss": 0.3106, "step": 832 }, { "epoch": 0.0014787509341929238, "grad_norm": 1.09375, "learning_rate": 0.001666, "loss": 0.4596, "step": 834 }, { "epoch": 0.001482297099502739, "grad_norm": 0.875, "learning_rate": 0.00167, "loss": 0.5467, "step": 836 }, { "epoch": 0.0014858432648125542, "grad_norm": 0.68359375, "learning_rate": 0.001674, "loss": 0.3769, "step": 838 }, { "epoch": 0.0014893894301223694, "grad_norm": 0.73828125, "learning_rate": 0.001678, "loss": 0.4614, "step": 840 }, { "epoch": 0.0014929355954321844, "grad_norm": 0.6015625, "learning_rate": 0.001682, "loss": 0.4218, "step": 842 }, { "epoch": 0.0014964817607419996, "grad_norm": 0.98828125, "learning_rate": 0.001686, "loss": 0.3143, "step": 844 }, { "epoch": 0.0015000279260518148, "grad_norm": 1.703125, "learning_rate": 0.0016899999999999999, "loss": 0.6092, "step": 846 }, { "epoch": 0.00150357409136163, "grad_norm": 0.416015625, "learning_rate": 0.001694, "loss": 0.3918, "step": 848 }, { "epoch": 0.0015071202566714452, "grad_norm": 0.47265625, "learning_rate": 0.001698, "loss": 0.3889, "step": 850 }, { "epoch": 0.0015106664219812602, "grad_norm": 0.8515625, "learning_rate": 0.001702, "loss": 0.4317, "step": 852 }, { "epoch": 0.0015142125872910754, "grad_norm": 0.5546875, "learning_rate": 0.001706, "loss": 0.433, "step": 854 }, { "epoch": 0.0015177587526008906, "grad_norm": 0.5, "learning_rate": 0.00171, "loss": 0.402, "step": 856 }, { "epoch": 0.0015213049179107058, "grad_norm": 0.984375, "learning_rate": 0.001714, "loss": 0.3781, "step": 858 }, { "epoch": 0.001524851083220521, "grad_norm": 1.125, "learning_rate": 0.001718, "loss": 0.4915, "step": 860 }, { "epoch": 0.0015283972485303362, "grad_norm": 0.48046875, "learning_rate": 0.001722, "loss": 0.485, "step": 862 }, { "epoch": 0.0015319434138401512, "grad_norm": 0.54296875, "learning_rate": 0.001726, "loss": 0.5497, "step": 864 }, { "epoch": 0.0015354895791499664, "grad_norm": 1.046875, "learning_rate": 0.00173, "loss": 0.4267, "step": 866 }, { "epoch": 0.0015390357444597816, "grad_norm": 2.890625, "learning_rate": 0.001734, "loss": 0.4916, "step": 868 }, { "epoch": 0.0015425819097695968, "grad_norm": 0.7265625, "learning_rate": 0.001738, "loss": 0.4525, "step": 870 }, { "epoch": 0.001546128075079412, "grad_norm": 0.734375, "learning_rate": 0.001742, "loss": 0.466, "step": 872 }, { "epoch": 0.0015496742403892272, "grad_norm": 0.6484375, "learning_rate": 0.001746, "loss": 0.4172, "step": 874 }, { "epoch": 0.0015532204056990422, "grad_norm": 2.1875, "learning_rate": 0.00175, "loss": 0.4612, "step": 876 }, { "epoch": 0.0015567665710088574, "grad_norm": 0.51953125, "learning_rate": 0.0017540000000000001, "loss": 0.3634, "step": 878 }, { "epoch": 0.0015603127363186726, "grad_norm": 0.43359375, "learning_rate": 0.001758, "loss": 0.3293, "step": 880 }, { "epoch": 0.0015638589016284878, "grad_norm": 0.45703125, "learning_rate": 0.0017620000000000001, "loss": 0.3595, "step": 882 }, { "epoch": 0.001567405066938303, "grad_norm": 0.58203125, "learning_rate": 0.001766, "loss": 0.3994, "step": 884 }, { "epoch": 0.001570951232248118, "grad_norm": 0.8359375, "learning_rate": 0.00177, "loss": 0.4265, "step": 886 }, { "epoch": 0.0015744973975579332, "grad_norm": 0.62109375, "learning_rate": 0.001774, "loss": 0.3768, "step": 888 }, { "epoch": 0.0015780435628677484, "grad_norm": 0.75, "learning_rate": 0.001778, "loss": 0.3474, "step": 890 }, { "epoch": 0.0015815897281775636, "grad_norm": 0.337890625, "learning_rate": 0.0017820000000000002, "loss": 0.4295, "step": 892 }, { "epoch": 0.0015851358934873788, "grad_norm": 0.5859375, "learning_rate": 0.001786, "loss": 0.4619, "step": 894 }, { "epoch": 0.001588682058797194, "grad_norm": 2.328125, "learning_rate": 0.0017900000000000001, "loss": 0.3903, "step": 896 }, { "epoch": 0.001592228224107009, "grad_norm": 1.6484375, "learning_rate": 0.001794, "loss": 0.4117, "step": 898 }, { "epoch": 0.0015957743894168242, "grad_norm": 1.0, "learning_rate": 0.0017980000000000001, "loss": 0.4776, "step": 900 }, { "epoch": 0.0015993205547266394, "grad_norm": 0.62890625, "learning_rate": 0.001802, "loss": 0.3239, "step": 902 }, { "epoch": 0.0016028667200364546, "grad_norm": 0.439453125, "learning_rate": 0.001806, "loss": 0.4453, "step": 904 }, { "epoch": 0.0016064128853462698, "grad_norm": 0.421875, "learning_rate": 0.0018100000000000002, "loss": 0.3475, "step": 906 }, { "epoch": 0.001609959050656085, "grad_norm": 4.34375, "learning_rate": 0.001814, "loss": 0.5284, "step": 908 }, { "epoch": 0.0016135052159659, "grad_norm": 0.56640625, "learning_rate": 0.0018180000000000002, "loss": 0.3544, "step": 910 }, { "epoch": 0.0016170513812757152, "grad_norm": 0.8828125, "learning_rate": 0.001822, "loss": 0.3681, "step": 912 }, { "epoch": 0.0016205975465855304, "grad_norm": 1.1953125, "learning_rate": 0.0018260000000000001, "loss": 0.3847, "step": 914 }, { "epoch": 0.0016241437118953456, "grad_norm": 0.68359375, "learning_rate": 0.00183, "loss": 0.5371, "step": 916 }, { "epoch": 0.0016276898772051608, "grad_norm": 0.9453125, "learning_rate": 0.0018340000000000001, "loss": 0.4194, "step": 918 }, { "epoch": 0.0016312360425149758, "grad_norm": 3.125, "learning_rate": 0.0018380000000000002, "loss": 0.6421, "step": 920 }, { "epoch": 0.001634782207824791, "grad_norm": 0.51953125, "learning_rate": 0.001842, "loss": 0.4111, "step": 922 }, { "epoch": 0.0016383283731346062, "grad_norm": 1.5546875, "learning_rate": 0.0018460000000000002, "loss": 0.4762, "step": 924 }, { "epoch": 0.0016418745384444214, "grad_norm": 0.466796875, "learning_rate": 0.00185, "loss": 0.4302, "step": 926 }, { "epoch": 0.0016454207037542366, "grad_norm": 0.5703125, "learning_rate": 0.0018540000000000002, "loss": 0.41, "step": 928 }, { "epoch": 0.0016489668690640518, "grad_norm": 0.404296875, "learning_rate": 0.001858, "loss": 0.5211, "step": 930 }, { "epoch": 0.0016525130343738668, "grad_norm": 0.48828125, "learning_rate": 0.0018620000000000002, "loss": 0.5331, "step": 932 }, { "epoch": 0.001656059199683682, "grad_norm": 1.6484375, "learning_rate": 0.001866, "loss": 0.8214, "step": 934 }, { "epoch": 0.0016596053649934972, "grad_norm": 1.875, "learning_rate": 0.0018700000000000001, "loss": 0.3674, "step": 936 }, { "epoch": 0.0016631515303033124, "grad_norm": 17.0, "learning_rate": 0.0018740000000000002, "loss": 0.5836, "step": 938 }, { "epoch": 0.0016666976956131276, "grad_norm": 1.375, "learning_rate": 0.001878, "loss": 0.4125, "step": 940 }, { "epoch": 0.0016702438609229428, "grad_norm": 0.9140625, "learning_rate": 0.001882, "loss": 0.4659, "step": 942 }, { "epoch": 0.0016737900262327578, "grad_norm": 0.478515625, "learning_rate": 0.0018859999999999999, "loss": 0.38, "step": 944 }, { "epoch": 0.001677336191542573, "grad_norm": 0.6484375, "learning_rate": 0.00189, "loss": 0.4455, "step": 946 }, { "epoch": 0.0016808823568523882, "grad_norm": 0.322265625, "learning_rate": 0.0018939999999999999, "loss": 0.3341, "step": 948 }, { "epoch": 0.0016844285221622034, "grad_norm": 0.392578125, "learning_rate": 0.001898, "loss": 0.4781, "step": 950 }, { "epoch": 0.0016879746874720186, "grad_norm": 0.72265625, "learning_rate": 0.001902, "loss": 0.5476, "step": 952 }, { "epoch": 0.0016915208527818336, "grad_norm": 1.2734375, "learning_rate": 0.001906, "loss": 0.476, "step": 954 }, { "epoch": 0.0016950670180916488, "grad_norm": 0.27734375, "learning_rate": 0.00191, "loss": 0.3611, "step": 956 }, { "epoch": 0.001698613183401464, "grad_norm": 1.859375, "learning_rate": 0.001914, "loss": 0.6158, "step": 958 }, { "epoch": 0.0017021593487112792, "grad_norm": 1.703125, "learning_rate": 0.001918, "loss": 0.4304, "step": 960 }, { "epoch": 0.0017057055140210944, "grad_norm": 0.431640625, "learning_rate": 0.0019219999999999999, "loss": 0.3029, "step": 962 }, { "epoch": 0.0017092516793309096, "grad_norm": 0.63671875, "learning_rate": 0.001926, "loss": 0.5024, "step": 964 }, { "epoch": 0.0017127978446407246, "grad_norm": 10.375, "learning_rate": 0.00193, "loss": 0.4969, "step": 966 }, { "epoch": 0.0017163440099505398, "grad_norm": 0.55859375, "learning_rate": 0.001934, "loss": 0.7104, "step": 968 }, { "epoch": 0.001719890175260355, "grad_norm": 0.57421875, "learning_rate": 0.001938, "loss": 0.5163, "step": 970 }, { "epoch": 0.0017234363405701702, "grad_norm": 0.376953125, "learning_rate": 0.001942, "loss": 0.4006, "step": 972 }, { "epoch": 0.0017269825058799854, "grad_norm": 0.453125, "learning_rate": 0.001946, "loss": 0.3615, "step": 974 }, { "epoch": 0.0017305286711898006, "grad_norm": 0.3828125, "learning_rate": 0.00195, "loss": 0.3554, "step": 976 }, { "epoch": 0.0017340748364996156, "grad_norm": 0.56640625, "learning_rate": 0.001954, "loss": 0.4427, "step": 978 }, { "epoch": 0.0017376210018094308, "grad_norm": 0.9921875, "learning_rate": 0.001958, "loss": 0.4424, "step": 980 }, { "epoch": 0.001741167167119246, "grad_norm": 1.4609375, "learning_rate": 0.001962, "loss": 0.5594, "step": 982 }, { "epoch": 0.0017447133324290612, "grad_norm": 1.0625, "learning_rate": 0.001966, "loss": 0.4296, "step": 984 }, { "epoch": 0.0017482594977388764, "grad_norm": 1.4765625, "learning_rate": 0.00197, "loss": 0.4107, "step": 986 }, { "epoch": 0.0017518056630486914, "grad_norm": 0.3984375, "learning_rate": 0.001974, "loss": 0.38, "step": 988 }, { "epoch": 0.0017553518283585066, "grad_norm": 0.68359375, "learning_rate": 0.001978, "loss": 0.4456, "step": 990 }, { "epoch": 0.0017588979936683218, "grad_norm": 0.33203125, "learning_rate": 0.001982, "loss": 0.4618, "step": 992 }, { "epoch": 0.001762444158978137, "grad_norm": 0.33984375, "learning_rate": 0.001986, "loss": 0.3707, "step": 994 }, { "epoch": 0.0017659903242879522, "grad_norm": 0.255859375, "learning_rate": 0.00199, "loss": 0.3779, "step": 996 }, { "epoch": 0.0017695364895977674, "grad_norm": 0.2734375, "learning_rate": 0.001994, "loss": 0.4004, "step": 998 }, { "epoch": 0.0017730826549075824, "grad_norm": 2.25, "learning_rate": 0.001998, "loss": 0.3643, "step": 1000 }, { "epoch": 0.0017766288202173976, "grad_norm": 0.48046875, "learning_rate": 0.0019999999995079904, "loss": 0.3971, "step": 1002 }, { "epoch": 0.0017801749855272128, "grad_norm": 0.78515625, "learning_rate": 0.0019999999955719132, "loss": 0.3017, "step": 1004 }, { "epoch": 0.001783721150837028, "grad_norm": 0.470703125, "learning_rate": 0.001999999987699759, "loss": 0.3235, "step": 1006 }, { "epoch": 0.0017872673161468432, "grad_norm": 0.90234375, "learning_rate": 0.0019999999758915274, "loss": 0.4187, "step": 1008 }, { "epoch": 0.0017908134814566582, "grad_norm": 1.6484375, "learning_rate": 0.0019999999601472184, "loss": 0.5903, "step": 1010 }, { "epoch": 0.0017943596467664734, "grad_norm": 0.384765625, "learning_rate": 0.0019999999404668326, "loss": 0.6096, "step": 1012 }, { "epoch": 0.0017979058120762886, "grad_norm": 1.3515625, "learning_rate": 0.00199999991685037, "loss": 0.3594, "step": 1014 }, { "epoch": 0.0018014519773861038, "grad_norm": 0.388671875, "learning_rate": 0.001999999889297831, "loss": 0.541, "step": 1016 }, { "epoch": 0.001804998142695919, "grad_norm": 0.40234375, "learning_rate": 0.001999999857809214, "loss": 0.3586, "step": 1018 }, { "epoch": 0.0018085443080057342, "grad_norm": 1.015625, "learning_rate": 0.0019999998223845213, "loss": 0.507, "step": 1020 }, { "epoch": 0.0018120904733155492, "grad_norm": 0.91796875, "learning_rate": 0.001999999783023752, "loss": 0.4054, "step": 1022 }, { "epoch": 0.0018156366386253644, "grad_norm": 0.84375, "learning_rate": 0.0019999997397269066, "loss": 0.3707, "step": 1024 }, { "epoch": 0.0018191828039351796, "grad_norm": 1.8359375, "learning_rate": 0.0019999996924939846, "loss": 0.6278, "step": 1026 }, { "epoch": 0.0018227289692449948, "grad_norm": 0.34765625, "learning_rate": 0.001999999641324987, "loss": 0.3858, "step": 1028 }, { "epoch": 0.00182627513455481, "grad_norm": 0.5859375, "learning_rate": 0.001999999586219914, "loss": 0.3753, "step": 1030 }, { "epoch": 0.0018298212998646252, "grad_norm": 0.6171875, "learning_rate": 0.0019999995271787656, "loss": 0.3808, "step": 1032 }, { "epoch": 0.0018333674651744402, "grad_norm": 0.390625, "learning_rate": 0.0019999994642015415, "loss": 0.2981, "step": 1034 }, { "epoch": 0.0018369136304842554, "grad_norm": 0.734375, "learning_rate": 0.0019999993972882432, "loss": 0.4874, "step": 1036 }, { "epoch": 0.0018404597957940706, "grad_norm": 0.349609375, "learning_rate": 0.00199999932643887, "loss": 0.3889, "step": 1038 }, { "epoch": 0.0018440059611038858, "grad_norm": 0.375, "learning_rate": 0.0019999992516534226, "loss": 0.4473, "step": 1040 }, { "epoch": 0.001847552126413701, "grad_norm": 0.333984375, "learning_rate": 0.001999999172931901, "loss": 0.3383, "step": 1042 }, { "epoch": 0.001851098291723516, "grad_norm": 0.87109375, "learning_rate": 0.001999999090274306, "loss": 0.4581, "step": 1044 }, { "epoch": 0.0018546444570333312, "grad_norm": 0.5234375, "learning_rate": 0.001999999003680638, "loss": 0.3222, "step": 1046 }, { "epoch": 0.0018581906223431464, "grad_norm": 0.62109375, "learning_rate": 0.0019999989131508963, "loss": 0.3686, "step": 1048 }, { "epoch": 0.0018617367876529616, "grad_norm": 0.90234375, "learning_rate": 0.001999998818685083, "loss": 0.5504, "step": 1050 }, { "epoch": 0.0018652829529627768, "grad_norm": 0.3125, "learning_rate": 0.0019999987202831975, "loss": 0.3204, "step": 1052 }, { "epoch": 0.001868829118272592, "grad_norm": 0.361328125, "learning_rate": 0.0019999986179452403, "loss": 0.3595, "step": 1054 }, { "epoch": 0.001872375283582407, "grad_norm": 0.3359375, "learning_rate": 0.0019999985116712117, "loss": 0.3356, "step": 1056 }, { "epoch": 0.0018759214488922222, "grad_norm": 0.40625, "learning_rate": 0.0019999984014611124, "loss": 0.4235, "step": 1058 }, { "epoch": 0.0018794676142020374, "grad_norm": 0.390625, "learning_rate": 0.0019999982873149433, "loss": 0.3783, "step": 1060 }, { "epoch": 0.0018830137795118526, "grad_norm": 0.330078125, "learning_rate": 0.0019999981692327045, "loss": 0.4312, "step": 1062 }, { "epoch": 0.0018865599448216678, "grad_norm": 0.287109375, "learning_rate": 0.001999998047214396, "loss": 0.3751, "step": 1064 }, { "epoch": 0.001890106110131483, "grad_norm": 0.498046875, "learning_rate": 0.0019999979212600187, "loss": 0.4861, "step": 1066 }, { "epoch": 0.001893652275441298, "grad_norm": 0.4921875, "learning_rate": 0.001999997791369574, "loss": 0.4154, "step": 1068 }, { "epoch": 0.0018971984407511132, "grad_norm": 0.392578125, "learning_rate": 0.001999997657543061, "loss": 0.3348, "step": 1070 }, { "epoch": 0.0019007446060609284, "grad_norm": 1.4140625, "learning_rate": 0.0019999975197804816, "loss": 0.5562, "step": 1072 }, { "epoch": 0.0019042907713707436, "grad_norm": 3.984375, "learning_rate": 0.0019999973780818353, "loss": 0.5703, "step": 1074 }, { "epoch": 0.0019078369366805588, "grad_norm": 0.31640625, "learning_rate": 0.0019999972324471235, "loss": 0.3959, "step": 1076 }, { "epoch": 0.0019113831019903738, "grad_norm": 0.43359375, "learning_rate": 0.0019999970828763467, "loss": 0.3247, "step": 1078 }, { "epoch": 0.001914929267300189, "grad_norm": 0.6796875, "learning_rate": 0.0019999969293695054, "loss": 0.439, "step": 1080 }, { "epoch": 0.0019184754326100042, "grad_norm": 0.380859375, "learning_rate": 0.0019999967719266003, "loss": 0.6608, "step": 1082 }, { "epoch": 0.0019220215979198194, "grad_norm": 1.375, "learning_rate": 0.001999996610547632, "loss": 0.4712, "step": 1084 }, { "epoch": 0.0019255677632296346, "grad_norm": 0.8671875, "learning_rate": 0.0019999964452326016, "loss": 0.606, "step": 1086 }, { "epoch": 0.0019291139285394499, "grad_norm": 1.359375, "learning_rate": 0.0019999962759815093, "loss": 0.5051, "step": 1088 }, { "epoch": 0.0019326600938492648, "grad_norm": 0.30078125, "learning_rate": 0.001999996102794356, "loss": 0.3872, "step": 1090 }, { "epoch": 0.00193620625915908, "grad_norm": 0.42578125, "learning_rate": 0.0019999959256711427, "loss": 0.3313, "step": 1092 }, { "epoch": 0.0019397524244688952, "grad_norm": 0.99609375, "learning_rate": 0.0019999957446118696, "loss": 0.4841, "step": 1094 }, { "epoch": 0.0019432985897787104, "grad_norm": 0.92578125, "learning_rate": 0.001999995559616538, "loss": 0.5517, "step": 1096 }, { "epoch": 0.0019468447550885257, "grad_norm": 0.443359375, "learning_rate": 0.0019999953706851493, "loss": 0.3948, "step": 1098 }, { "epoch": 0.0019503909203983409, "grad_norm": 0.451171875, "learning_rate": 0.001999995177817703, "loss": 0.5052, "step": 1100 }, { "epoch": 0.001953937085708156, "grad_norm": 1.3828125, "learning_rate": 0.0019999949810142006, "loss": 0.4857, "step": 1102 }, { "epoch": 0.001957483251017971, "grad_norm": 0.369140625, "learning_rate": 0.0019999947802746432, "loss": 0.3922, "step": 1104 }, { "epoch": 0.001961029416327786, "grad_norm": 2.859375, "learning_rate": 0.0019999945755990313, "loss": 0.4307, "step": 1106 }, { "epoch": 0.0019645755816376015, "grad_norm": 0.62890625, "learning_rate": 0.0019999943669873656, "loss": 0.4164, "step": 1108 }, { "epoch": 0.0019681217469474164, "grad_norm": 0.45703125, "learning_rate": 0.0019999941544396474, "loss": 0.3841, "step": 1110 }, { "epoch": 0.001971667912257232, "grad_norm": 4.0625, "learning_rate": 0.001999993937955878, "loss": 0.5836, "step": 1112 }, { "epoch": 0.001975214077567047, "grad_norm": 0.5625, "learning_rate": 0.0019999937175360577, "loss": 0.3593, "step": 1114 }, { "epoch": 0.0019787602428768623, "grad_norm": 0.380859375, "learning_rate": 0.0019999934931801875, "loss": 0.351, "step": 1116 }, { "epoch": 0.0019823064081866773, "grad_norm": 0.515625, "learning_rate": 0.0019999932648882687, "loss": 0.3285, "step": 1118 }, { "epoch": 0.0019858525734964922, "grad_norm": 0.59375, "learning_rate": 0.001999993032660302, "loss": 0.5993, "step": 1120 }, { "epoch": 0.0019893987388063077, "grad_norm": 0.6875, "learning_rate": 0.0019999927964962885, "loss": 0.3257, "step": 1122 }, { "epoch": 0.0019929449041161226, "grad_norm": 0.6640625, "learning_rate": 0.0019999925563962294, "loss": 0.4925, "step": 1124 }, { "epoch": 0.001996491069425938, "grad_norm": 0.404296875, "learning_rate": 0.001999992312360126, "loss": 0.3838, "step": 1126 }, { "epoch": 0.002000037234735753, "grad_norm": 0.47265625, "learning_rate": 0.001999992064387978, "loss": 0.3545, "step": 1128 }, { "epoch": 0.002003583400045568, "grad_norm": 0.63671875, "learning_rate": 0.0019999918124797883, "loss": 0.4683, "step": 1130 }, { "epoch": 0.0020071295653553835, "grad_norm": 0.76953125, "learning_rate": 0.0019999915566355575, "loss": 0.413, "step": 1132 }, { "epoch": 0.0020106757306651984, "grad_norm": 0.65234375, "learning_rate": 0.0019999912968552856, "loss": 0.4569, "step": 1134 }, { "epoch": 0.002014221895975014, "grad_norm": 0.375, "learning_rate": 0.0019999910331389746, "loss": 0.3477, "step": 1136 }, { "epoch": 0.002017768061284829, "grad_norm": 0.52734375, "learning_rate": 0.001999990765486626, "loss": 0.3038, "step": 1138 }, { "epoch": 0.002021314226594644, "grad_norm": 0.59765625, "learning_rate": 0.00199999049389824, "loss": 0.3498, "step": 1140 }, { "epoch": 0.0020248603919044593, "grad_norm": 0.45703125, "learning_rate": 0.001999990218373819, "loss": 0.563, "step": 1142 }, { "epoch": 0.0020284065572142742, "grad_norm": 0.470703125, "learning_rate": 0.0019999899389133635, "loss": 0.7518, "step": 1144 }, { "epoch": 0.0020319527225240897, "grad_norm": 2.0625, "learning_rate": 0.001999989655516875, "loss": 0.5289, "step": 1146 }, { "epoch": 0.0020354988878339047, "grad_norm": 0.423828125, "learning_rate": 0.001999989368184354, "loss": 0.3241, "step": 1148 }, { "epoch": 0.00203904505314372, "grad_norm": 1.0625, "learning_rate": 0.001999989076915802, "loss": 0.527, "step": 1150 }, { "epoch": 0.002042591218453535, "grad_norm": 1.71875, "learning_rate": 0.001999988781711221, "loss": 0.5642, "step": 1152 }, { "epoch": 0.00204613738376335, "grad_norm": 0.921875, "learning_rate": 0.0019999884825706122, "loss": 0.4935, "step": 1154 }, { "epoch": 0.0020496835490731655, "grad_norm": 0.91796875, "learning_rate": 0.001999988179493976, "loss": 0.3964, "step": 1156 }, { "epoch": 0.0020532297143829805, "grad_norm": 2.359375, "learning_rate": 0.001999987872481314, "loss": 0.4454, "step": 1158 }, { "epoch": 0.002056775879692796, "grad_norm": 0.625, "learning_rate": 0.001999987561532629, "loss": 0.4465, "step": 1160 }, { "epoch": 0.002060322045002611, "grad_norm": 1.0234375, "learning_rate": 0.00199998724664792, "loss": 0.4312, "step": 1162 }, { "epoch": 0.002063868210312426, "grad_norm": 0.427734375, "learning_rate": 0.00199998692782719, "loss": 0.4251, "step": 1164 }, { "epoch": 0.0020674143756222413, "grad_norm": 0.3828125, "learning_rate": 0.00199998660507044, "loss": 0.4131, "step": 1166 }, { "epoch": 0.0020709605409320563, "grad_norm": 0.88671875, "learning_rate": 0.0019999862783776716, "loss": 0.3982, "step": 1168 }, { "epoch": 0.0020745067062418717, "grad_norm": 0.443359375, "learning_rate": 0.0019999859477488856, "loss": 0.3609, "step": 1170 }, { "epoch": 0.0020780528715516867, "grad_norm": 0.447265625, "learning_rate": 0.001999985613184084, "loss": 0.3846, "step": 1172 }, { "epoch": 0.0020815990368615016, "grad_norm": 0.984375, "learning_rate": 0.001999985274683268, "loss": 0.4326, "step": 1174 }, { "epoch": 0.002085145202171317, "grad_norm": 0.26953125, "learning_rate": 0.001999984932246439, "loss": 0.3229, "step": 1176 }, { "epoch": 0.002088691367481132, "grad_norm": 0.298828125, "learning_rate": 0.0019999845858735994, "loss": 0.3905, "step": 1178 }, { "epoch": 0.0020922375327909475, "grad_norm": 0.48828125, "learning_rate": 0.0019999842355647494, "loss": 0.5283, "step": 1180 }, { "epoch": 0.0020957836981007625, "grad_norm": 0.24609375, "learning_rate": 0.001999983881319891, "loss": 0.2918, "step": 1182 }, { "epoch": 0.002099329863410578, "grad_norm": 0.5546875, "learning_rate": 0.0019999835231390263, "loss": 0.3663, "step": 1184 }, { "epoch": 0.002102876028720393, "grad_norm": 0.283203125, "learning_rate": 0.0019999831610221564, "loss": 0.3811, "step": 1186 }, { "epoch": 0.002106422194030208, "grad_norm": 6.90625, "learning_rate": 0.0019999827949692827, "loss": 0.68, "step": 1188 }, { "epoch": 0.0021099683593400233, "grad_norm": 0.7109375, "learning_rate": 0.001999982424980407, "loss": 0.3522, "step": 1190 }, { "epoch": 0.0021135145246498383, "grad_norm": 0.45703125, "learning_rate": 0.0019999820510555313, "loss": 0.3751, "step": 1192 }, { "epoch": 0.0021170606899596537, "grad_norm": 2.09375, "learning_rate": 0.0019999816731946563, "loss": 0.4795, "step": 1194 }, { "epoch": 0.0021206068552694687, "grad_norm": 1.2421875, "learning_rate": 0.0019999812913977844, "loss": 0.3934, "step": 1196 }, { "epoch": 0.0021241530205792837, "grad_norm": 0.38671875, "learning_rate": 0.001999980905664918, "loss": 0.549, "step": 1198 }, { "epoch": 0.002127699185889099, "grad_norm": 1.3125, "learning_rate": 0.001999980515996057, "loss": 0.4363, "step": 1200 }, { "epoch": 0.002131245351198914, "grad_norm": 0.38671875, "learning_rate": 0.001999980122391204, "loss": 0.4437, "step": 1202 }, { "epoch": 0.0021347915165087295, "grad_norm": 0.439453125, "learning_rate": 0.001999979724850361, "loss": 0.3607, "step": 1204 }, { "epoch": 0.0021383376818185445, "grad_norm": 0.345703125, "learning_rate": 0.001999979323373529, "loss": 0.3687, "step": 1206 }, { "epoch": 0.0021418838471283595, "grad_norm": 0.5234375, "learning_rate": 0.001999978917960711, "loss": 0.3193, "step": 1208 }, { "epoch": 0.002145430012438175, "grad_norm": 0.51171875, "learning_rate": 0.0019999785086119073, "loss": 0.4807, "step": 1210 }, { "epoch": 0.00214897617774799, "grad_norm": 0.3359375, "learning_rate": 0.0019999780953271207, "loss": 0.3487, "step": 1212 }, { "epoch": 0.0021525223430578053, "grad_norm": 0.4296875, "learning_rate": 0.0019999776781063523, "loss": 0.4667, "step": 1214 }, { "epoch": 0.0021560685083676203, "grad_norm": 0.26171875, "learning_rate": 0.001999977256949605, "loss": 0.3317, "step": 1216 }, { "epoch": 0.0021596146736774357, "grad_norm": 0.361328125, "learning_rate": 0.001999976831856879, "loss": 0.3814, "step": 1218 }, { "epoch": 0.0021631608389872507, "grad_norm": 0.77734375, "learning_rate": 0.001999976402828178, "loss": 0.3466, "step": 1220 }, { "epoch": 0.0021667070042970657, "grad_norm": 0.62890625, "learning_rate": 0.001999975969863502, "loss": 0.382, "step": 1222 }, { "epoch": 0.002170253169606881, "grad_norm": 0.7109375, "learning_rate": 0.001999975532962855, "loss": 0.5704, "step": 1224 }, { "epoch": 0.002173799334916696, "grad_norm": 3.5625, "learning_rate": 0.0019999750921262374, "loss": 0.6261, "step": 1226 }, { "epoch": 0.0021773455002265115, "grad_norm": 0.921875, "learning_rate": 0.001999974647353651, "loss": 0.408, "step": 1228 }, { "epoch": 0.0021808916655363265, "grad_norm": 0.51953125, "learning_rate": 0.001999974198645099, "loss": 0.3717, "step": 1230 }, { "epoch": 0.0021844378308461415, "grad_norm": 0.287109375, "learning_rate": 0.0019999737460005824, "loss": 0.303, "step": 1232 }, { "epoch": 0.002187983996155957, "grad_norm": 0.62109375, "learning_rate": 0.001999973289420103, "loss": 0.3554, "step": 1234 }, { "epoch": 0.002191530161465772, "grad_norm": 0.326171875, "learning_rate": 0.0019999728289036636, "loss": 0.292, "step": 1236 }, { "epoch": 0.0021950763267755873, "grad_norm": 0.322265625, "learning_rate": 0.0019999723644512656, "loss": 0.3423, "step": 1238 }, { "epoch": 0.0021986224920854023, "grad_norm": 0.72265625, "learning_rate": 0.0019999718960629115, "loss": 0.4001, "step": 1240 }, { "epoch": 0.0022021686573952173, "grad_norm": 0.255859375, "learning_rate": 0.001999971423738603, "loss": 0.4699, "step": 1242 }, { "epoch": 0.0022057148227050327, "grad_norm": 0.306640625, "learning_rate": 0.001999970947478342, "loss": 0.3121, "step": 1244 }, { "epoch": 0.0022092609880148477, "grad_norm": 0.4765625, "learning_rate": 0.0019999704672821307, "loss": 0.3609, "step": 1246 }, { "epoch": 0.002212807153324663, "grad_norm": 0.279296875, "learning_rate": 0.001999969983149972, "loss": 0.3226, "step": 1248 }, { "epoch": 0.002216353318634478, "grad_norm": 0.2734375, "learning_rate": 0.001999969495081867, "loss": 0.3121, "step": 1250 }, { "epoch": 0.0022198994839442935, "grad_norm": 0.408203125, "learning_rate": 0.0019999690030778183, "loss": 0.3878, "step": 1252 }, { "epoch": 0.0022234456492541085, "grad_norm": 0.462890625, "learning_rate": 0.001999968507137828, "loss": 0.3017, "step": 1254 }, { "epoch": 0.0022269918145639235, "grad_norm": 0.69140625, "learning_rate": 0.0019999680072618977, "loss": 0.3342, "step": 1256 }, { "epoch": 0.002230537979873739, "grad_norm": 0.462890625, "learning_rate": 0.001999967503450031, "loss": 0.3295, "step": 1258 }, { "epoch": 0.002234084145183554, "grad_norm": 0.33984375, "learning_rate": 0.0019999669957022283, "loss": 0.3612, "step": 1260 }, { "epoch": 0.0022376303104933693, "grad_norm": 0.404296875, "learning_rate": 0.001999966484018493, "loss": 0.4064, "step": 1262 }, { "epoch": 0.0022411764758031843, "grad_norm": 0.97265625, "learning_rate": 0.0019999659683988275, "loss": 0.3753, "step": 1264 }, { "epoch": 0.0022447226411129993, "grad_norm": 0.5703125, "learning_rate": 0.0019999654488432332, "loss": 0.3581, "step": 1266 }, { "epoch": 0.0022482688064228147, "grad_norm": 0.328125, "learning_rate": 0.0019999649253517127, "loss": 0.3033, "step": 1268 }, { "epoch": 0.0022518149717326297, "grad_norm": 0.59765625, "learning_rate": 0.0019999643979242685, "loss": 0.3, "step": 1270 }, { "epoch": 0.002255361137042445, "grad_norm": 0.7578125, "learning_rate": 0.001999963866560903, "loss": 0.3704, "step": 1272 }, { "epoch": 0.00225890730235226, "grad_norm": 0.310546875, "learning_rate": 0.001999963331261618, "loss": 0.3202, "step": 1274 }, { "epoch": 0.002262453467662075, "grad_norm": 0.83984375, "learning_rate": 0.0019999627920264163, "loss": 0.4499, "step": 1276 }, { "epoch": 0.0022659996329718905, "grad_norm": 1.640625, "learning_rate": 0.0019999622488553, "loss": 0.4349, "step": 1278 }, { "epoch": 0.0022695457982817055, "grad_norm": 0.427734375, "learning_rate": 0.0019999617017482717, "loss": 0.4296, "step": 1280 }, { "epoch": 0.002273091963591521, "grad_norm": 1.0546875, "learning_rate": 0.001999961150705334, "loss": 0.3572, "step": 1282 }, { "epoch": 0.002276638128901336, "grad_norm": 0.515625, "learning_rate": 0.0019999605957264884, "loss": 0.3237, "step": 1284 }, { "epoch": 0.0022801842942111513, "grad_norm": 0.451171875, "learning_rate": 0.0019999600368117384, "loss": 0.3838, "step": 1286 }, { "epoch": 0.0022837304595209663, "grad_norm": 0.466796875, "learning_rate": 0.0019999594739610856, "loss": 0.3879, "step": 1288 }, { "epoch": 0.0022872766248307813, "grad_norm": 0.3046875, "learning_rate": 0.0019999589071745326, "loss": 0.4766, "step": 1290 }, { "epoch": 0.0022908227901405967, "grad_norm": 0.275390625, "learning_rate": 0.001999958336452083, "loss": 0.3154, "step": 1292 }, { "epoch": 0.0022943689554504117, "grad_norm": 0.79296875, "learning_rate": 0.0019999577617937376, "loss": 0.2976, "step": 1294 }, { "epoch": 0.002297915120760227, "grad_norm": 0.255859375, "learning_rate": 0.0019999571831995, "loss": 0.3841, "step": 1296 }, { "epoch": 0.002301461286070042, "grad_norm": 0.56640625, "learning_rate": 0.001999956600669372, "loss": 0.3736, "step": 1298 }, { "epoch": 0.002305007451379857, "grad_norm": 0.8046875, "learning_rate": 0.0019999560142033575, "loss": 0.4563, "step": 1300 }, { "epoch": 0.0023085536166896725, "grad_norm": 0.5078125, "learning_rate": 0.0019999554238014573, "loss": 0.3575, "step": 1302 }, { "epoch": 0.0023120997819994875, "grad_norm": 1.7734375, "learning_rate": 0.0019999548294636752, "loss": 0.465, "step": 1304 }, { "epoch": 0.002315645947309303, "grad_norm": 0.412109375, "learning_rate": 0.0019999542311900133, "loss": 0.3361, "step": 1306 }, { "epoch": 0.002319192112619118, "grad_norm": 0.65234375, "learning_rate": 0.0019999536289804745, "loss": 0.3126, "step": 1308 }, { "epoch": 0.002322738277928933, "grad_norm": 0.44140625, "learning_rate": 0.001999953022835061, "loss": 0.4039, "step": 1310 }, { "epoch": 0.0023262844432387483, "grad_norm": 0.51171875, "learning_rate": 0.001999952412753776, "loss": 0.3648, "step": 1312 }, { "epoch": 0.0023298306085485633, "grad_norm": 0.65625, "learning_rate": 0.0019999517987366222, "loss": 0.3008, "step": 1314 }, { "epoch": 0.0023333767738583787, "grad_norm": 2.546875, "learning_rate": 0.0019999511807836014, "loss": 0.3931, "step": 1316 }, { "epoch": 0.0023369229391681937, "grad_norm": 0.64453125, "learning_rate": 0.0019999505588947173, "loss": 0.3711, "step": 1318 }, { "epoch": 0.002340469104478009, "grad_norm": 0.220703125, "learning_rate": 0.0019999499330699715, "loss": 0.2766, "step": 1320 }, { "epoch": 0.002344015269787824, "grad_norm": 0.349609375, "learning_rate": 0.0019999493033093685, "loss": 0.3488, "step": 1322 }, { "epoch": 0.002347561435097639, "grad_norm": 1.9296875, "learning_rate": 0.001999948669612909, "loss": 0.4454, "step": 1324 }, { "epoch": 0.0023511076004074545, "grad_norm": 0.87109375, "learning_rate": 0.0019999480319805975, "loss": 0.5258, "step": 1326 }, { "epoch": 0.0023546537657172695, "grad_norm": 0.8203125, "learning_rate": 0.001999947390412436, "loss": 0.3523, "step": 1328 }, { "epoch": 0.002358199931027085, "grad_norm": 1.1875, "learning_rate": 0.001999946744908427, "loss": 0.3534, "step": 1330 }, { "epoch": 0.0023617460963369, "grad_norm": 0.56640625, "learning_rate": 0.001999946095468574, "loss": 0.3965, "step": 1332 }, { "epoch": 0.002365292261646715, "grad_norm": 0.86328125, "learning_rate": 0.001999945442092879, "loss": 0.5747, "step": 1334 }, { "epoch": 0.0023688384269565303, "grad_norm": 0.44921875, "learning_rate": 0.001999944784781346, "loss": 0.3435, "step": 1336 }, { "epoch": 0.0023723845922663453, "grad_norm": 1.921875, "learning_rate": 0.001999944123533977, "loss": 0.4215, "step": 1338 }, { "epoch": 0.0023759307575761607, "grad_norm": 1.3671875, "learning_rate": 0.0019999434583507754, "loss": 0.3861, "step": 1340 }, { "epoch": 0.0023794769228859757, "grad_norm": 1.9765625, "learning_rate": 0.0019999427892317435, "loss": 0.4901, "step": 1342 }, { "epoch": 0.0023830230881957907, "grad_norm": 0.44140625, "learning_rate": 0.0019999421161768847, "loss": 0.3331, "step": 1344 }, { "epoch": 0.002386569253505606, "grad_norm": 0.34765625, "learning_rate": 0.001999941439186202, "loss": 0.5539, "step": 1346 }, { "epoch": 0.002390115418815421, "grad_norm": 0.484375, "learning_rate": 0.0019999407582596976, "loss": 0.3413, "step": 1348 }, { "epoch": 0.0023936615841252365, "grad_norm": 1.7578125, "learning_rate": 0.001999940073397376, "loss": 0.46, "step": 1350 }, { "epoch": 0.0023972077494350515, "grad_norm": 1.0078125, "learning_rate": 0.001999939384599239, "loss": 0.5004, "step": 1352 }, { "epoch": 0.002400753914744867, "grad_norm": 0.3828125, "learning_rate": 0.0019999386918652896, "loss": 0.4681, "step": 1354 }, { "epoch": 0.002404300080054682, "grad_norm": 0.431640625, "learning_rate": 0.001999937995195531, "loss": 0.4008, "step": 1356 }, { "epoch": 0.002407846245364497, "grad_norm": 0.39453125, "learning_rate": 0.0019999372945899664, "loss": 0.3076, "step": 1358 }, { "epoch": 0.0024113924106743123, "grad_norm": 0.369140625, "learning_rate": 0.0019999365900485993, "loss": 0.3904, "step": 1360 }, { "epoch": 0.0024149385759841273, "grad_norm": 0.23828125, "learning_rate": 0.0019999358815714317, "loss": 0.3989, "step": 1362 }, { "epoch": 0.0024184847412939427, "grad_norm": 0.5, "learning_rate": 0.0019999351691584676, "loss": 0.3566, "step": 1364 }, { "epoch": 0.0024220309066037577, "grad_norm": 0.66015625, "learning_rate": 0.0019999344528097095, "loss": 0.3267, "step": 1366 }, { "epoch": 0.0024255770719135727, "grad_norm": 0.65234375, "learning_rate": 0.0019999337325251614, "loss": 0.3624, "step": 1368 }, { "epoch": 0.002429123237223388, "grad_norm": 1.296875, "learning_rate": 0.0019999330083048258, "loss": 0.3912, "step": 1370 }, { "epoch": 0.002432669402533203, "grad_norm": 1.078125, "learning_rate": 0.0019999322801487057, "loss": 0.4825, "step": 1372 }, { "epoch": 0.0024362155678430185, "grad_norm": 0.66796875, "learning_rate": 0.0019999315480568047, "loss": 0.3519, "step": 1374 }, { "epoch": 0.0024397617331528335, "grad_norm": 0.6875, "learning_rate": 0.001999930812029126, "loss": 0.4869, "step": 1376 }, { "epoch": 0.0024433078984626485, "grad_norm": 5.1875, "learning_rate": 0.001999930072065672, "loss": 0.6554, "step": 1378 }, { "epoch": 0.002446854063772464, "grad_norm": 0.859375, "learning_rate": 0.0019999293281664473, "loss": 0.4527, "step": 1380 }, { "epoch": 0.002450400229082279, "grad_norm": 0.4140625, "learning_rate": 0.001999928580331454, "loss": 0.4871, "step": 1382 }, { "epoch": 0.0024539463943920943, "grad_norm": 0.255859375, "learning_rate": 0.0019999278285606965, "loss": 0.3651, "step": 1384 }, { "epoch": 0.0024574925597019093, "grad_norm": 0.265625, "learning_rate": 0.0019999270728541766, "loss": 0.3327, "step": 1386 }, { "epoch": 0.0024610387250117243, "grad_norm": 0.59765625, "learning_rate": 0.001999926313211899, "loss": 0.4196, "step": 1388 }, { "epoch": 0.0024645848903215397, "grad_norm": 0.330078125, "learning_rate": 0.0019999255496338663, "loss": 0.3441, "step": 1390 }, { "epoch": 0.0024681310556313547, "grad_norm": 0.61328125, "learning_rate": 0.001999924782120082, "loss": 0.7735, "step": 1392 }, { "epoch": 0.00247167722094117, "grad_norm": 0.19140625, "learning_rate": 0.0019999240106705492, "loss": 0.5843, "step": 1394 }, { "epoch": 0.002475223386250985, "grad_norm": 0.72265625, "learning_rate": 0.0019999232352852715, "loss": 0.4501, "step": 1396 }, { "epoch": 0.0024787695515608005, "grad_norm": 0.453125, "learning_rate": 0.001999922455964253, "loss": 0.4135, "step": 1398 }, { "epoch": 0.0024823157168706155, "grad_norm": 0.478515625, "learning_rate": 0.0019999216727074956, "loss": 0.5371, "step": 1400 }, { "epoch": 0.0024858618821804305, "grad_norm": 1.796875, "learning_rate": 0.001999920885515004, "loss": 0.5403, "step": 1402 }, { "epoch": 0.002489408047490246, "grad_norm": 0.3515625, "learning_rate": 0.0019999200943867806, "loss": 0.4323, "step": 1404 }, { "epoch": 0.002492954212800061, "grad_norm": 0.6875, "learning_rate": 0.00199991929932283, "loss": 0.4454, "step": 1406 }, { "epoch": 0.0024965003781098763, "grad_norm": 0.75390625, "learning_rate": 0.001999918500323155, "loss": 0.339, "step": 1408 }, { "epoch": 0.0025000465434196913, "grad_norm": 0.60546875, "learning_rate": 0.0019999176973877594, "loss": 0.4297, "step": 1410 }, { "epoch": 0.0025035927087295063, "grad_norm": 1.1171875, "learning_rate": 0.001999916890516646, "loss": 0.3729, "step": 1412 }, { "epoch": 0.0025071388740393217, "grad_norm": 1.1875, "learning_rate": 0.0019999160797098195, "loss": 0.346, "step": 1414 }, { "epoch": 0.0025106850393491367, "grad_norm": 0.5859375, "learning_rate": 0.0019999152649672826, "loss": 0.4238, "step": 1416 }, { "epoch": 0.002514231204658952, "grad_norm": 1.1640625, "learning_rate": 0.001999914446289039, "loss": 0.494, "step": 1418 }, { "epoch": 0.002517777369968767, "grad_norm": 0.41796875, "learning_rate": 0.0019999136236750923, "loss": 0.3822, "step": 1420 }, { "epoch": 0.002521323535278582, "grad_norm": 0.46875, "learning_rate": 0.001999912797125446, "loss": 0.4121, "step": 1422 }, { "epoch": 0.0025248697005883975, "grad_norm": 0.56640625, "learning_rate": 0.001999911966640104, "loss": 0.3484, "step": 1424 }, { "epoch": 0.0025284158658982125, "grad_norm": 1.0625, "learning_rate": 0.00199991113221907, "loss": 0.5026, "step": 1426 }, { "epoch": 0.002531962031208028, "grad_norm": 0.97265625, "learning_rate": 0.001999910293862347, "loss": 0.4649, "step": 1428 }, { "epoch": 0.002535508196517843, "grad_norm": 0.458984375, "learning_rate": 0.0019999094515699392, "loss": 0.262, "step": 1430 }, { "epoch": 0.0025390543618276583, "grad_norm": 1.1328125, "learning_rate": 0.0019999086053418503, "loss": 0.3965, "step": 1432 }, { "epoch": 0.0025426005271374733, "grad_norm": 2.046875, "learning_rate": 0.001999907755178084, "loss": 0.5808, "step": 1434 }, { "epoch": 0.0025461466924472883, "grad_norm": 0.56640625, "learning_rate": 0.001999906901078644, "loss": 0.3706, "step": 1436 }, { "epoch": 0.0025496928577571037, "grad_norm": 1.2890625, "learning_rate": 0.0019999060430435337, "loss": 0.309, "step": 1438 }, { "epoch": 0.0025532390230669187, "grad_norm": 0.486328125, "learning_rate": 0.0019999051810727572, "loss": 0.354, "step": 1440 }, { "epoch": 0.002556785188376734, "grad_norm": 0.33984375, "learning_rate": 0.0019999043151663182, "loss": 0.3341, "step": 1442 }, { "epoch": 0.002560331353686549, "grad_norm": 0.796875, "learning_rate": 0.00199990344532422, "loss": 0.3325, "step": 1444 }, { "epoch": 0.002563877518996364, "grad_norm": 1.0703125, "learning_rate": 0.0019999025715464673, "loss": 0.4343, "step": 1446 }, { "epoch": 0.0025674236843061795, "grad_norm": 4.28125, "learning_rate": 0.0019999016938330636, "loss": 0.5948, "step": 1448 }, { "epoch": 0.0025709698496159945, "grad_norm": 0.376953125, "learning_rate": 0.0019999008121840125, "loss": 0.3183, "step": 1450 }, { "epoch": 0.00257451601492581, "grad_norm": 0.482421875, "learning_rate": 0.001999899926599318, "loss": 0.3859, "step": 1452 }, { "epoch": 0.002578062180235625, "grad_norm": 0.388671875, "learning_rate": 0.0019998990370789834, "loss": 0.3776, "step": 1454 }, { "epoch": 0.00258160834554544, "grad_norm": 0.44140625, "learning_rate": 0.0019998981436230136, "loss": 0.3878, "step": 1456 }, { "epoch": 0.0025851545108552553, "grad_norm": 0.65234375, "learning_rate": 0.001999897246231412, "loss": 0.3084, "step": 1458 }, { "epoch": 0.0025887006761650703, "grad_norm": 0.390625, "learning_rate": 0.0019998963449041826, "loss": 0.342, "step": 1460 }, { "epoch": 0.0025922468414748857, "grad_norm": 2.34375, "learning_rate": 0.0019998954396413296, "loss": 0.6345, "step": 1462 }, { "epoch": 0.0025957930067847007, "grad_norm": 0.4609375, "learning_rate": 0.001999894530442856, "loss": 0.3512, "step": 1464 }, { "epoch": 0.002599339172094516, "grad_norm": 0.59765625, "learning_rate": 0.001999893617308767, "loss": 0.3512, "step": 1466 }, { "epoch": 0.002602885337404331, "grad_norm": 0.392578125, "learning_rate": 0.001999892700239066, "loss": 0.2984, "step": 1468 }, { "epoch": 0.002606431502714146, "grad_norm": 0.75390625, "learning_rate": 0.0019998917792337567, "loss": 0.3737, "step": 1470 }, { "epoch": 0.0026099776680239615, "grad_norm": 1.28125, "learning_rate": 0.0019998908542928438, "loss": 0.4438, "step": 1472 }, { "epoch": 0.0026135238333337765, "grad_norm": 0.97265625, "learning_rate": 0.001999889925416331, "loss": 0.3816, "step": 1474 }, { "epoch": 0.002617069998643592, "grad_norm": 0.76171875, "learning_rate": 0.001999888992604222, "loss": 0.335, "step": 1476 }, { "epoch": 0.002620616163953407, "grad_norm": 0.80078125, "learning_rate": 0.0019998880558565217, "loss": 0.4098, "step": 1478 }, { "epoch": 0.002624162329263222, "grad_norm": 0.38671875, "learning_rate": 0.0019998871151732335, "loss": 0.3347, "step": 1480 }, { "epoch": 0.0026277084945730373, "grad_norm": 0.640625, "learning_rate": 0.0019998861705543616, "loss": 0.3202, "step": 1482 }, { "epoch": 0.0026312546598828523, "grad_norm": 1.5859375, "learning_rate": 0.001999885221999911, "loss": 0.4216, "step": 1484 }, { "epoch": 0.0026348008251926677, "grad_norm": 0.8203125, "learning_rate": 0.0019998842695098844, "loss": 0.3309, "step": 1486 }, { "epoch": 0.0026383469905024827, "grad_norm": 0.6015625, "learning_rate": 0.0019998833130842873, "loss": 0.4202, "step": 1488 }, { "epoch": 0.0026418931558122977, "grad_norm": 0.578125, "learning_rate": 0.001999882352723123, "loss": 0.418, "step": 1490 }, { "epoch": 0.002645439321122113, "grad_norm": 0.98828125, "learning_rate": 0.001999881388426396, "loss": 0.4025, "step": 1492 }, { "epoch": 0.002648985486431928, "grad_norm": 1.9296875, "learning_rate": 0.0019998804201941107, "loss": 0.3504, "step": 1494 }, { "epoch": 0.0026525316517417435, "grad_norm": 2.203125, "learning_rate": 0.001999879448026271, "loss": 0.3986, "step": 1496 }, { "epoch": 0.0026560778170515585, "grad_norm": 0.5234375, "learning_rate": 0.0019998784719228815, "loss": 0.3664, "step": 1498 }, { "epoch": 0.002659623982361374, "grad_norm": 0.9296875, "learning_rate": 0.001999877491883946, "loss": 0.4292, "step": 1500 }, { "epoch": 0.002663170147671189, "grad_norm": 0.953125, "learning_rate": 0.0019998765079094695, "loss": 0.4353, "step": 1502 }, { "epoch": 0.002666716312981004, "grad_norm": 2.15625, "learning_rate": 0.0019998755199994553, "loss": 0.4917, "step": 1504 }, { "epoch": 0.0026702624782908193, "grad_norm": 0.412109375, "learning_rate": 0.0019998745281539086, "loss": 0.3018, "step": 1506 }, { "epoch": 0.0026738086436006343, "grad_norm": 0.2890625, "learning_rate": 0.0019998735323728334, "loss": 0.4769, "step": 1508 }, { "epoch": 0.0026773548089104498, "grad_norm": 0.72265625, "learning_rate": 0.001999872532656234, "loss": 0.3584, "step": 1510 }, { "epoch": 0.0026809009742202647, "grad_norm": 5.96875, "learning_rate": 0.0019998715290041147, "loss": 0.3792, "step": 1512 }, { "epoch": 0.0026844471395300797, "grad_norm": 0.6328125, "learning_rate": 0.0019998705214164803, "loss": 0.3475, "step": 1514 }, { "epoch": 0.002687993304839895, "grad_norm": 0.53125, "learning_rate": 0.0019998695098933346, "loss": 0.4474, "step": 1516 }, { "epoch": 0.00269153947014971, "grad_norm": 0.69921875, "learning_rate": 0.0019998684944346826, "loss": 0.3354, "step": 1518 }, { "epoch": 0.0026950856354595256, "grad_norm": 1.4296875, "learning_rate": 0.001999867475040529, "loss": 0.4186, "step": 1520 }, { "epoch": 0.0026986318007693405, "grad_norm": 0.423828125, "learning_rate": 0.001999866451710877, "loss": 0.3646, "step": 1522 }, { "epoch": 0.0027021779660791555, "grad_norm": 1.0546875, "learning_rate": 0.0019998654244457324, "loss": 0.2748, "step": 1524 }, { "epoch": 0.002705724131388971, "grad_norm": 0.59375, "learning_rate": 0.001999864393245099, "loss": 0.4799, "step": 1526 }, { "epoch": 0.002709270296698786, "grad_norm": 0.65234375, "learning_rate": 0.001999863358108981, "loss": 0.4214, "step": 1528 }, { "epoch": 0.0027128164620086014, "grad_norm": 0.435546875, "learning_rate": 0.001999862319037384, "loss": 0.424, "step": 1530 }, { "epoch": 0.0027163626273184163, "grad_norm": 0.9453125, "learning_rate": 0.0019998612760303114, "loss": 0.3688, "step": 1532 }, { "epoch": 0.0027199087926282318, "grad_norm": 0.921875, "learning_rate": 0.0019998602290877687, "loss": 0.4423, "step": 1534 }, { "epoch": 0.0027234549579380467, "grad_norm": 0.69921875, "learning_rate": 0.00199985917820976, "loss": 0.3279, "step": 1536 }, { "epoch": 0.0027270011232478617, "grad_norm": 0.388671875, "learning_rate": 0.0019998581233962895, "loss": 0.3472, "step": 1538 }, { "epoch": 0.002730547288557677, "grad_norm": 0.39453125, "learning_rate": 0.001999857064647363, "loss": 0.2892, "step": 1540 }, { "epoch": 0.002734093453867492, "grad_norm": 0.71484375, "learning_rate": 0.0019998560019629835, "loss": 0.4923, "step": 1542 }, { "epoch": 0.0027376396191773076, "grad_norm": 2.0625, "learning_rate": 0.0019998549353431572, "loss": 0.3521, "step": 1544 }, { "epoch": 0.0027411857844871225, "grad_norm": 1.359375, "learning_rate": 0.0019998538647878882, "loss": 0.4604, "step": 1546 }, { "epoch": 0.0027447319497969375, "grad_norm": 2.28125, "learning_rate": 0.001999852790297181, "loss": 0.4689, "step": 1548 }, { "epoch": 0.002748278115106753, "grad_norm": 4.46875, "learning_rate": 0.00199985171187104, "loss": 0.6894, "step": 1550 }, { "epoch": 0.002751824280416568, "grad_norm": 0.4765625, "learning_rate": 0.0019998506295094707, "loss": 0.3231, "step": 1552 }, { "epoch": 0.0027553704457263834, "grad_norm": 1.7421875, "learning_rate": 0.0019998495432124773, "loss": 0.5036, "step": 1554 }, { "epoch": 0.0027589166110361983, "grad_norm": 0.52734375, "learning_rate": 0.0019998484529800643, "loss": 0.4204, "step": 1556 }, { "epoch": 0.0027624627763460133, "grad_norm": 0.9609375, "learning_rate": 0.0019998473588122376, "loss": 0.4533, "step": 1558 }, { "epoch": 0.0027660089416558288, "grad_norm": 0.80859375, "learning_rate": 0.001999846260709001, "loss": 0.4233, "step": 1560 }, { "epoch": 0.0027695551069656437, "grad_norm": 0.478515625, "learning_rate": 0.001999845158670359, "loss": 0.4601, "step": 1562 }, { "epoch": 0.002773101272275459, "grad_norm": 0.58203125, "learning_rate": 0.0019998440526963175, "loss": 0.3775, "step": 1564 }, { "epoch": 0.002776647437585274, "grad_norm": 0.8984375, "learning_rate": 0.0019998429427868806, "loss": 0.4076, "step": 1566 }, { "epoch": 0.0027801936028950896, "grad_norm": 0.419921875, "learning_rate": 0.0019998418289420535, "loss": 0.3472, "step": 1568 }, { "epoch": 0.0027837397682049046, "grad_norm": 2.078125, "learning_rate": 0.0019998407111618413, "loss": 0.4329, "step": 1570 }, { "epoch": 0.0027872859335147195, "grad_norm": 0.84375, "learning_rate": 0.001999839589446248, "loss": 0.6181, "step": 1572 }, { "epoch": 0.002790832098824535, "grad_norm": 2.671875, "learning_rate": 0.001999838463795279, "loss": 0.4774, "step": 1574 }, { "epoch": 0.00279437826413435, "grad_norm": 2.28125, "learning_rate": 0.0019998373342089396, "loss": 0.4125, "step": 1576 }, { "epoch": 0.0027979244294441654, "grad_norm": 1.4140625, "learning_rate": 0.0019998362006872343, "loss": 0.4688, "step": 1578 }, { "epoch": 0.0028014705947539804, "grad_norm": 0.796875, "learning_rate": 0.001999835063230168, "loss": 0.3709, "step": 1580 }, { "epoch": 0.0028050167600637953, "grad_norm": 0.875, "learning_rate": 0.001999833921837746, "loss": 0.4181, "step": 1582 }, { "epoch": 0.0028085629253736108, "grad_norm": 0.51953125, "learning_rate": 0.0019998327765099726, "loss": 0.3191, "step": 1584 }, { "epoch": 0.0028121090906834257, "grad_norm": 0.38671875, "learning_rate": 0.001999831627246854, "loss": 0.3258, "step": 1586 }, { "epoch": 0.002815655255993241, "grad_norm": 3.21875, "learning_rate": 0.001999830474048394, "loss": 0.6824, "step": 1588 }, { "epoch": 0.002819201421303056, "grad_norm": 0.34375, "learning_rate": 0.0019998293169145986, "loss": 0.3697, "step": 1590 }, { "epoch": 0.002822747586612871, "grad_norm": 0.376953125, "learning_rate": 0.0019998281558454723, "loss": 0.3423, "step": 1592 }, { "epoch": 0.0028262937519226866, "grad_norm": 0.578125, "learning_rate": 0.00199982699084102, "loss": 0.3911, "step": 1594 }, { "epoch": 0.0028298399172325015, "grad_norm": 0.349609375, "learning_rate": 0.0019998258219012474, "loss": 0.4432, "step": 1596 }, { "epoch": 0.002833386082542317, "grad_norm": 1.140625, "learning_rate": 0.0019998246490261595, "loss": 0.4584, "step": 1598 }, { "epoch": 0.002836932247852132, "grad_norm": 0.439453125, "learning_rate": 0.0019998234722157613, "loss": 0.285, "step": 1600 }, { "epoch": 0.0028404784131619474, "grad_norm": 0.87109375, "learning_rate": 0.0019998222914700573, "loss": 0.3189, "step": 1602 }, { "epoch": 0.0028440245784717624, "grad_norm": 0.294921875, "learning_rate": 0.0019998211067890543, "loss": 0.3265, "step": 1604 }, { "epoch": 0.0028475707437815773, "grad_norm": 0.609375, "learning_rate": 0.0019998199181727556, "loss": 0.5614, "step": 1606 }, { "epoch": 0.0028511169090913928, "grad_norm": 0.333984375, "learning_rate": 0.0019998187256211673, "loss": 0.3572, "step": 1608 }, { "epoch": 0.0028546630744012078, "grad_norm": 0.64453125, "learning_rate": 0.001999817529134295, "loss": 0.3555, "step": 1610 }, { "epoch": 0.002858209239711023, "grad_norm": 0.236328125, "learning_rate": 0.001999816328712143, "loss": 0.8434, "step": 1612 }, { "epoch": 0.002861755405020838, "grad_norm": 0.427734375, "learning_rate": 0.0019998151243547178, "loss": 0.3892, "step": 1614 }, { "epoch": 0.002865301570330653, "grad_norm": 0.439453125, "learning_rate": 0.0019998139160620236, "loss": 0.3229, "step": 1616 }, { "epoch": 0.0028688477356404686, "grad_norm": 0.5234375, "learning_rate": 0.001999812703834066, "loss": 0.3374, "step": 1618 }, { "epoch": 0.0028723939009502836, "grad_norm": 0.208984375, "learning_rate": 0.0019998114876708497, "loss": 0.349, "step": 1620 }, { "epoch": 0.002875940066260099, "grad_norm": 0.2578125, "learning_rate": 0.0019998102675723812, "loss": 0.3285, "step": 1622 }, { "epoch": 0.002879486231569914, "grad_norm": 0.39453125, "learning_rate": 0.0019998090435386653, "loss": 0.3325, "step": 1624 }, { "epoch": 0.002883032396879729, "grad_norm": 0.6328125, "learning_rate": 0.001999807815569707, "loss": 0.3392, "step": 1626 }, { "epoch": 0.0028865785621895444, "grad_norm": 2.421875, "learning_rate": 0.0019998065836655124, "loss": 0.3521, "step": 1628 }, { "epoch": 0.0028901247274993594, "grad_norm": 0.97265625, "learning_rate": 0.001999805347826086, "loss": 0.4679, "step": 1630 }, { "epoch": 0.0028936708928091748, "grad_norm": 1.015625, "learning_rate": 0.001999804108051434, "loss": 0.3713, "step": 1632 }, { "epoch": 0.0028972170581189898, "grad_norm": 4.90625, "learning_rate": 0.0019998028643415616, "loss": 0.4651, "step": 1634 }, { "epoch": 0.002900763223428805, "grad_norm": 0.5703125, "learning_rate": 0.0019998016166964737, "loss": 0.4644, "step": 1636 }, { "epoch": 0.00290430938873862, "grad_norm": 0.796875, "learning_rate": 0.001999800365116177, "loss": 0.3905, "step": 1638 }, { "epoch": 0.002907855554048435, "grad_norm": 0.71875, "learning_rate": 0.0019997991096006753, "loss": 0.3075, "step": 1640 }, { "epoch": 0.0029114017193582506, "grad_norm": 0.51953125, "learning_rate": 0.0019997978501499754, "loss": 0.3528, "step": 1642 }, { "epoch": 0.0029149478846680656, "grad_norm": 0.515625, "learning_rate": 0.001999796586764082, "loss": 0.6004, "step": 1644 }, { "epoch": 0.002918494049977881, "grad_norm": 0.56640625, "learning_rate": 0.0019997953194430015, "loss": 0.2975, "step": 1646 }, { "epoch": 0.002922040215287696, "grad_norm": 0.49609375, "learning_rate": 0.0019997940481867385, "loss": 0.3195, "step": 1648 }, { "epoch": 0.002925586380597511, "grad_norm": 0.263671875, "learning_rate": 0.001999792772995299, "loss": 0.3221, "step": 1650 }, { "epoch": 0.0029291325459073264, "grad_norm": 0.37890625, "learning_rate": 0.001999791493868689, "loss": 0.294, "step": 1652 }, { "epoch": 0.0029326787112171414, "grad_norm": 0.47265625, "learning_rate": 0.0019997902108069136, "loss": 0.4062, "step": 1654 }, { "epoch": 0.002936224876526957, "grad_norm": 0.98828125, "learning_rate": 0.001999788923809978, "loss": 0.3383, "step": 1656 }, { "epoch": 0.0029397710418367718, "grad_norm": 0.52734375, "learning_rate": 0.001999787632877889, "loss": 0.4175, "step": 1658 }, { "epoch": 0.0029433172071465868, "grad_norm": 0.58203125, "learning_rate": 0.001999786338010651, "loss": 0.2959, "step": 1660 }, { "epoch": 0.002946863372456402, "grad_norm": 0.423828125, "learning_rate": 0.0019997850392082703, "loss": 0.3044, "step": 1662 }, { "epoch": 0.002950409537766217, "grad_norm": 1.3046875, "learning_rate": 0.0019997837364707526, "loss": 0.4403, "step": 1664 }, { "epoch": 0.0029539557030760326, "grad_norm": 0.4921875, "learning_rate": 0.0019997824297981032, "loss": 0.2723, "step": 1666 }, { "epoch": 0.0029575018683858476, "grad_norm": 0.326171875, "learning_rate": 0.0019997811191903286, "loss": 0.3622, "step": 1668 }, { "epoch": 0.0029610480336956626, "grad_norm": 0.498046875, "learning_rate": 0.0019997798046474335, "loss": 0.3541, "step": 1670 }, { "epoch": 0.002964594199005478, "grad_norm": 0.86328125, "learning_rate": 0.001999778486169425, "loss": 0.2949, "step": 1672 }, { "epoch": 0.002968140364315293, "grad_norm": 1.125, "learning_rate": 0.0019997771637563075, "loss": 0.5068, "step": 1674 }, { "epoch": 0.0029716865296251084, "grad_norm": 0.44140625, "learning_rate": 0.0019997758374080874, "loss": 0.3588, "step": 1676 }, { "epoch": 0.0029752326949349234, "grad_norm": 0.3203125, "learning_rate": 0.0019997745071247703, "loss": 0.3341, "step": 1678 }, { "epoch": 0.002978778860244739, "grad_norm": 0.4453125, "learning_rate": 0.0019997731729063622, "loss": 0.4023, "step": 1680 }, { "epoch": 0.0029823250255545538, "grad_norm": 0.263671875, "learning_rate": 0.0019997718347528693, "loss": 0.3561, "step": 1682 }, { "epoch": 0.0029858711908643688, "grad_norm": 0.65234375, "learning_rate": 0.0019997704926642966, "loss": 0.4754, "step": 1684 }, { "epoch": 0.002989417356174184, "grad_norm": 0.68359375, "learning_rate": 0.0019997691466406503, "loss": 0.435, "step": 1686 }, { "epoch": 0.002992963521483999, "grad_norm": 0.6953125, "learning_rate": 0.001999767796681937, "loss": 0.3984, "step": 1688 }, { "epoch": 0.0029965096867938146, "grad_norm": 0.4296875, "learning_rate": 0.0019997664427881616, "loss": 0.3234, "step": 1690 }, { "epoch": 0.0030000558521036296, "grad_norm": 0.349609375, "learning_rate": 0.0019997650849593305, "loss": 0.3247, "step": 1692 }, { "epoch": 0.0030036020174134446, "grad_norm": 0.302734375, "learning_rate": 0.001999763723195449, "loss": 0.3037, "step": 1694 }, { "epoch": 0.00300714818272326, "grad_norm": 1.0703125, "learning_rate": 0.0019997623574965246, "loss": 0.3523, "step": 1696 }, { "epoch": 0.003010694348033075, "grad_norm": 0.50390625, "learning_rate": 0.001999760987862562, "loss": 0.3611, "step": 1698 }, { "epoch": 0.0030142405133428904, "grad_norm": 0.4296875, "learning_rate": 0.001999759614293567, "loss": 0.285, "step": 1700 }, { "epoch": 0.0030177866786527054, "grad_norm": 0.482421875, "learning_rate": 0.0019997582367895462, "loss": 0.3267, "step": 1702 }, { "epoch": 0.0030213328439625204, "grad_norm": 0.3671875, "learning_rate": 0.0019997568553505062, "loss": 0.3262, "step": 1704 }, { "epoch": 0.003024879009272336, "grad_norm": 0.279296875, "learning_rate": 0.0019997554699764516, "loss": 0.3819, "step": 1706 }, { "epoch": 0.0030284251745821508, "grad_norm": 0.6953125, "learning_rate": 0.0019997540806673897, "loss": 0.4152, "step": 1708 }, { "epoch": 0.003031971339891966, "grad_norm": 0.52734375, "learning_rate": 0.0019997526874233258, "loss": 0.3476, "step": 1710 }, { "epoch": 0.003035517505201781, "grad_norm": 0.7265625, "learning_rate": 0.0019997512902442667, "loss": 0.3321, "step": 1712 }, { "epoch": 0.0030390636705115966, "grad_norm": 0.84375, "learning_rate": 0.0019997498891302177, "loss": 0.5933, "step": 1714 }, { "epoch": 0.0030426098358214116, "grad_norm": 0.3984375, "learning_rate": 0.001999748484081185, "loss": 0.3428, "step": 1716 }, { "epoch": 0.0030461560011312266, "grad_norm": 0.94140625, "learning_rate": 0.0019997470750971755, "loss": 0.5058, "step": 1718 }, { "epoch": 0.003049702166441042, "grad_norm": 0.427734375, "learning_rate": 0.001999745662178195, "loss": 0.2842, "step": 1720 }, { "epoch": 0.003053248331750857, "grad_norm": 0.6484375, "learning_rate": 0.0019997442453242495, "loss": 0.4308, "step": 1722 }, { "epoch": 0.0030567944970606724, "grad_norm": 0.5234375, "learning_rate": 0.0019997428245353455, "loss": 0.4701, "step": 1724 }, { "epoch": 0.0030603406623704874, "grad_norm": 0.79296875, "learning_rate": 0.001999741399811489, "loss": 0.4284, "step": 1726 }, { "epoch": 0.0030638868276803024, "grad_norm": 0.39453125, "learning_rate": 0.001999739971152686, "loss": 0.3371, "step": 1728 }, { "epoch": 0.003067432992990118, "grad_norm": 0.40625, "learning_rate": 0.001999738538558943, "loss": 0.3227, "step": 1730 }, { "epoch": 0.0030709791582999328, "grad_norm": 0.54296875, "learning_rate": 0.0019997371020302663, "loss": 0.404, "step": 1732 }, { "epoch": 0.003074525323609748, "grad_norm": 0.310546875, "learning_rate": 0.001999735661566662, "loss": 0.331, "step": 1734 }, { "epoch": 0.003078071488919563, "grad_norm": 0.435546875, "learning_rate": 0.0019997342171681367, "loss": 0.3667, "step": 1736 }, { "epoch": 0.003081617654229378, "grad_norm": 2.296875, "learning_rate": 0.0019997327688346966, "loss": 0.3231, "step": 1738 }, { "epoch": 0.0030851638195391936, "grad_norm": 1.6640625, "learning_rate": 0.001999731316566348, "loss": 0.5626, "step": 1740 }, { "epoch": 0.0030887099848490086, "grad_norm": 0.359375, "learning_rate": 0.001999729860363097, "loss": 0.3205, "step": 1742 }, { "epoch": 0.003092256150158824, "grad_norm": 0.84375, "learning_rate": 0.001999728400224951, "loss": 0.4312, "step": 1744 }, { "epoch": 0.003095802315468639, "grad_norm": 0.4453125, "learning_rate": 0.0019997269361519145, "loss": 0.4246, "step": 1746 }, { "epoch": 0.0030993484807784544, "grad_norm": 1.84375, "learning_rate": 0.0019997254681439957, "loss": 0.5008, "step": 1748 }, { "epoch": 0.0031028946460882694, "grad_norm": 0.34375, "learning_rate": 0.0019997239962012, "loss": 0.3183, "step": 1750 }, { "epoch": 0.0031064408113980844, "grad_norm": 0.287109375, "learning_rate": 0.0019997225203235345, "loss": 0.3441, "step": 1752 }, { "epoch": 0.0031099869767079, "grad_norm": 3.25, "learning_rate": 0.001999721040511005, "loss": 0.3552, "step": 1754 }, { "epoch": 0.003113533142017715, "grad_norm": 0.9375, "learning_rate": 0.0019997195567636188, "loss": 0.3398, "step": 1756 }, { "epoch": 0.00311707930732753, "grad_norm": 0.365234375, "learning_rate": 0.0019997180690813814, "loss": 0.3092, "step": 1758 }, { "epoch": 0.003120625472637345, "grad_norm": 0.365234375, "learning_rate": 0.0019997165774643, "loss": 0.3321, "step": 1760 }, { "epoch": 0.00312417163794716, "grad_norm": 0.6640625, "learning_rate": 0.0019997150819123805, "loss": 0.3145, "step": 1762 }, { "epoch": 0.0031277178032569756, "grad_norm": 0.462890625, "learning_rate": 0.0019997135824256305, "loss": 0.3727, "step": 1764 }, { "epoch": 0.0031312639685667906, "grad_norm": 0.5234375, "learning_rate": 0.0019997120790040553, "loss": 0.3252, "step": 1766 }, { "epoch": 0.003134810133876606, "grad_norm": 0.396484375, "learning_rate": 0.0019997105716476624, "loss": 0.3073, "step": 1768 }, { "epoch": 0.003138356299186421, "grad_norm": 1.296875, "learning_rate": 0.0019997090603564582, "loss": 0.4655, "step": 1770 }, { "epoch": 0.003141902464496236, "grad_norm": 0.478515625, "learning_rate": 0.001999707545130449, "loss": 0.2859, "step": 1772 }, { "epoch": 0.0031454486298060514, "grad_norm": 4.625, "learning_rate": 0.0019997060259696418, "loss": 0.318, "step": 1774 }, { "epoch": 0.0031489947951158664, "grad_norm": 0.703125, "learning_rate": 0.0019997045028740425, "loss": 0.3593, "step": 1776 }, { "epoch": 0.003152540960425682, "grad_norm": 1.8359375, "learning_rate": 0.001999702975843659, "loss": 0.3372, "step": 1778 }, { "epoch": 0.003156087125735497, "grad_norm": 0.61328125, "learning_rate": 0.001999701444878497, "loss": 0.362, "step": 1780 }, { "epoch": 0.003159633291045312, "grad_norm": 1.25, "learning_rate": 0.001999699909978563, "loss": 0.491, "step": 1782 }, { "epoch": 0.003163179456355127, "grad_norm": 0.6640625, "learning_rate": 0.001999698371143865, "loss": 0.3574, "step": 1784 }, { "epoch": 0.003166725621664942, "grad_norm": 8.1875, "learning_rate": 0.0019996968283744086, "loss": 0.44, "step": 1786 }, { "epoch": 0.0031702717869747576, "grad_norm": 2.765625, "learning_rate": 0.0019996952816702007, "loss": 0.4284, "step": 1788 }, { "epoch": 0.0031738179522845726, "grad_norm": 0.47265625, "learning_rate": 0.0019996937310312484, "loss": 0.3777, "step": 1790 }, { "epoch": 0.003177364117594388, "grad_norm": 0.322265625, "learning_rate": 0.0019996921764575586, "loss": 0.3025, "step": 1792 }, { "epoch": 0.003180910282904203, "grad_norm": 0.6875, "learning_rate": 0.0019996906179491373, "loss": 0.3498, "step": 1794 }, { "epoch": 0.003184456448214018, "grad_norm": 0.98046875, "learning_rate": 0.001999689055505992, "loss": 0.2658, "step": 1796 }, { "epoch": 0.0031880026135238334, "grad_norm": 0.408203125, "learning_rate": 0.00199968748912813, "loss": 0.3541, "step": 1798 }, { "epoch": 0.0031915487788336484, "grad_norm": 1.0703125, "learning_rate": 0.001999685918815557, "loss": 0.3026, "step": 1800 }, { "epoch": 0.003195094944143464, "grad_norm": 0.27734375, "learning_rate": 0.00199968434456828, "loss": 0.3607, "step": 1802 }, { "epoch": 0.003198641109453279, "grad_norm": 0.75390625, "learning_rate": 0.001999682766386307, "loss": 0.8249, "step": 1804 }, { "epoch": 0.003202187274763094, "grad_norm": 0.240234375, "learning_rate": 0.001999681184269644, "loss": 0.3065, "step": 1806 }, { "epoch": 0.003205733440072909, "grad_norm": 0.515625, "learning_rate": 0.0019996795982182977, "loss": 0.31, "step": 1808 }, { "epoch": 0.003209279605382724, "grad_norm": 1.1171875, "learning_rate": 0.0019996780082322755, "loss": 0.4118, "step": 1810 }, { "epoch": 0.0032128257706925396, "grad_norm": 0.357421875, "learning_rate": 0.0019996764143115848, "loss": 0.3238, "step": 1812 }, { "epoch": 0.0032163719360023546, "grad_norm": 0.359375, "learning_rate": 0.0019996748164562315, "loss": 0.3518, "step": 1814 }, { "epoch": 0.00321991810131217, "grad_norm": 1.1484375, "learning_rate": 0.0019996732146662236, "loss": 0.4987, "step": 1816 }, { "epoch": 0.003223464266621985, "grad_norm": 0.48828125, "learning_rate": 0.001999671608941567, "loss": 0.3448, "step": 1818 }, { "epoch": 0.0032270104319318, "grad_norm": 0.26171875, "learning_rate": 0.00199966999928227, "loss": 0.3469, "step": 1820 }, { "epoch": 0.0032305565972416154, "grad_norm": 1.0234375, "learning_rate": 0.001999668385688339, "loss": 0.356, "step": 1822 }, { "epoch": 0.0032341027625514304, "grad_norm": 0.375, "learning_rate": 0.0019996667681597808, "loss": 0.3797, "step": 1824 }, { "epoch": 0.003237648927861246, "grad_norm": 0.4140625, "learning_rate": 0.0019996651466966026, "loss": 0.3039, "step": 1826 }, { "epoch": 0.003241195093171061, "grad_norm": 4.25, "learning_rate": 0.001999663521298812, "loss": 0.3366, "step": 1828 }, { "epoch": 0.003244741258480876, "grad_norm": 0.30078125, "learning_rate": 0.001999661891966416, "loss": 0.3459, "step": 1830 }, { "epoch": 0.0032482874237906912, "grad_norm": 0.56640625, "learning_rate": 0.0019996602586994207, "loss": 0.3181, "step": 1832 }, { "epoch": 0.003251833589100506, "grad_norm": 0.36328125, "learning_rate": 0.0019996586214978346, "loss": 0.2824, "step": 1834 }, { "epoch": 0.0032553797544103216, "grad_norm": 0.28125, "learning_rate": 0.001999656980361664, "loss": 0.2619, "step": 1836 }, { "epoch": 0.0032589259197201366, "grad_norm": 4.125, "learning_rate": 0.0019996553352909165, "loss": 0.5336, "step": 1838 }, { "epoch": 0.0032624720850299516, "grad_norm": 0.37109375, "learning_rate": 0.0019996536862855988, "loss": 0.2924, "step": 1840 }, { "epoch": 0.003266018250339767, "grad_norm": 0.7578125, "learning_rate": 0.0019996520333457187, "loss": 0.3617, "step": 1842 }, { "epoch": 0.003269564415649582, "grad_norm": 0.8046875, "learning_rate": 0.0019996503764712834, "loss": 0.3407, "step": 1844 }, { "epoch": 0.0032731105809593974, "grad_norm": 0.66015625, "learning_rate": 0.0019996487156622995, "loss": 0.3611, "step": 1846 }, { "epoch": 0.0032766567462692124, "grad_norm": 0.41015625, "learning_rate": 0.001999647050918775, "loss": 0.3965, "step": 1848 }, { "epoch": 0.003280202911579028, "grad_norm": 0.55859375, "learning_rate": 0.001999645382240717, "loss": 0.392, "step": 1850 }, { "epoch": 0.003283749076888843, "grad_norm": 0.67578125, "learning_rate": 0.0019996437096281325, "loss": 0.2987, "step": 1852 }, { "epoch": 0.003287295242198658, "grad_norm": 0.373046875, "learning_rate": 0.0019996420330810286, "loss": 0.374, "step": 1854 }, { "epoch": 0.0032908414075084732, "grad_norm": 0.361328125, "learning_rate": 0.0019996403525994137, "loss": 0.3721, "step": 1856 }, { "epoch": 0.003294387572818288, "grad_norm": 1.375, "learning_rate": 0.001999638668183294, "loss": 0.269, "step": 1858 }, { "epoch": 0.0032979337381281036, "grad_norm": 0.484375, "learning_rate": 0.0019996369798326776, "loss": 0.397, "step": 1860 }, { "epoch": 0.0033014799034379186, "grad_norm": 3.0, "learning_rate": 0.0019996352875475717, "loss": 0.3565, "step": 1862 }, { "epoch": 0.0033050260687477336, "grad_norm": 0.61328125, "learning_rate": 0.0019996335913279833, "loss": 0.3263, "step": 1864 }, { "epoch": 0.003308572234057549, "grad_norm": 0.58984375, "learning_rate": 0.0019996318911739202, "loss": 0.3105, "step": 1866 }, { "epoch": 0.003312118399367364, "grad_norm": 0.65234375, "learning_rate": 0.00199963018708539, "loss": 0.3022, "step": 1868 }, { "epoch": 0.0033156645646771794, "grad_norm": 0.5234375, "learning_rate": 0.0019996284790624, "loss": 0.3436, "step": 1870 }, { "epoch": 0.0033192107299869944, "grad_norm": 0.62890625, "learning_rate": 0.0019996267671049573, "loss": 0.3547, "step": 1872 }, { "epoch": 0.0033227568952968094, "grad_norm": 1.140625, "learning_rate": 0.00199962505121307, "loss": 0.4605, "step": 1874 }, { "epoch": 0.003326303060606625, "grad_norm": 1.15625, "learning_rate": 0.0019996233313867447, "loss": 0.3832, "step": 1876 }, { "epoch": 0.00332984922591644, "grad_norm": 0.302734375, "learning_rate": 0.00199962160762599, "loss": 0.3048, "step": 1878 }, { "epoch": 0.0033333953912262552, "grad_norm": 0.53515625, "learning_rate": 0.001999619879930813, "loss": 0.3729, "step": 1880 }, { "epoch": 0.0033369415565360702, "grad_norm": 0.84375, "learning_rate": 0.001999618148301221, "loss": 0.3066, "step": 1882 }, { "epoch": 0.0033404877218458856, "grad_norm": 0.64453125, "learning_rate": 0.0019996164127372216, "loss": 0.367, "step": 1884 }, { "epoch": 0.0033440338871557006, "grad_norm": 0.326171875, "learning_rate": 0.0019996146732388226, "loss": 0.3087, "step": 1886 }, { "epoch": 0.0033475800524655156, "grad_norm": 0.39453125, "learning_rate": 0.001999612929806032, "loss": 0.3728, "step": 1888 }, { "epoch": 0.003351126217775331, "grad_norm": 1.8125, "learning_rate": 0.0019996111824388567, "loss": 0.378, "step": 1890 }, { "epoch": 0.003354672383085146, "grad_norm": 0.5234375, "learning_rate": 0.001999609431137305, "loss": 0.3537, "step": 1892 }, { "epoch": 0.0033582185483949614, "grad_norm": 0.75390625, "learning_rate": 0.0019996076759013835, "loss": 0.3804, "step": 1894 }, { "epoch": 0.0033617647137047764, "grad_norm": 0.609375, "learning_rate": 0.0019996059167311014, "loss": 0.2966, "step": 1896 }, { "epoch": 0.0033653108790145914, "grad_norm": 0.2890625, "learning_rate": 0.001999604153626465, "loss": 0.4085, "step": 1898 }, { "epoch": 0.003368857044324407, "grad_norm": 0.734375, "learning_rate": 0.0019996023865874826, "loss": 0.3374, "step": 1900 }, { "epoch": 0.003372403209634222, "grad_norm": 1.2109375, "learning_rate": 0.0019996006156141623, "loss": 0.3316, "step": 1902 }, { "epoch": 0.0033759493749440372, "grad_norm": 0.392578125, "learning_rate": 0.001999598840706511, "loss": 0.356, "step": 1904 }, { "epoch": 0.0033794955402538522, "grad_norm": 0.265625, "learning_rate": 0.0019995970618645375, "loss": 0.3072, "step": 1906 }, { "epoch": 0.003383041705563667, "grad_norm": 2.484375, "learning_rate": 0.0019995952790882484, "loss": 0.2967, "step": 1908 }, { "epoch": 0.0033865878708734826, "grad_norm": 0.3984375, "learning_rate": 0.0019995934923776526, "loss": 0.2774, "step": 1910 }, { "epoch": 0.0033901340361832976, "grad_norm": 1.7578125, "learning_rate": 0.0019995917017327568, "loss": 0.566, "step": 1912 }, { "epoch": 0.003393680201493113, "grad_norm": 0.98046875, "learning_rate": 0.00199958990715357, "loss": 0.3883, "step": 1914 }, { "epoch": 0.003397226366802928, "grad_norm": 0.734375, "learning_rate": 0.001999588108640099, "loss": 0.3311, "step": 1916 }, { "epoch": 0.0034007725321127434, "grad_norm": 0.53515625, "learning_rate": 0.0019995863061923525, "loss": 0.3409, "step": 1918 }, { "epoch": 0.0034043186974225584, "grad_norm": 11.0, "learning_rate": 0.001999584499810338, "loss": 0.3615, "step": 1920 }, { "epoch": 0.0034078648627323734, "grad_norm": 0.484375, "learning_rate": 0.0019995826894940636, "loss": 0.3205, "step": 1922 }, { "epoch": 0.003411411028042189, "grad_norm": 4.96875, "learning_rate": 0.001999580875243537, "loss": 0.5128, "step": 1924 }, { "epoch": 0.003414957193352004, "grad_norm": 0.90234375, "learning_rate": 0.0019995790570587657, "loss": 0.5527, "step": 1926 }, { "epoch": 0.0034185033586618192, "grad_norm": 0.52734375, "learning_rate": 0.0019995772349397584, "loss": 0.2791, "step": 1928 }, { "epoch": 0.0034220495239716342, "grad_norm": 0.62109375, "learning_rate": 0.001999575408886523, "loss": 0.2989, "step": 1930 }, { "epoch": 0.0034255956892814492, "grad_norm": 1.0078125, "learning_rate": 0.0019995735788990673, "loss": 0.3708, "step": 1932 }, { "epoch": 0.0034291418545912646, "grad_norm": 0.4375, "learning_rate": 0.0019995717449773995, "loss": 0.5294, "step": 1934 }, { "epoch": 0.0034326880199010796, "grad_norm": 1.3046875, "learning_rate": 0.001999569907121527, "loss": 0.5341, "step": 1936 }, { "epoch": 0.003436234185210895, "grad_norm": 4.8125, "learning_rate": 0.0019995680653314583, "loss": 0.5079, "step": 1938 }, { "epoch": 0.00343978035052071, "grad_norm": 0.796875, "learning_rate": 0.0019995662196072017, "loss": 0.336, "step": 1940 }, { "epoch": 0.003443326515830525, "grad_norm": 0.40625, "learning_rate": 0.001999564369948765, "loss": 0.3488, "step": 1942 }, { "epoch": 0.0034468726811403404, "grad_norm": 0.3984375, "learning_rate": 0.0019995625163561556, "loss": 0.3103, "step": 1944 }, { "epoch": 0.0034504188464501554, "grad_norm": 0.484375, "learning_rate": 0.001999560658829383, "loss": 0.277, "step": 1946 }, { "epoch": 0.003453965011759971, "grad_norm": 1.7109375, "learning_rate": 0.001999558797368454, "loss": 0.3605, "step": 1948 }, { "epoch": 0.003457511177069786, "grad_norm": 0.515625, "learning_rate": 0.001999556931973378, "loss": 0.3346, "step": 1950 }, { "epoch": 0.0034610573423796013, "grad_norm": 6.0625, "learning_rate": 0.001999555062644162, "loss": 0.4588, "step": 1952 }, { "epoch": 0.0034646035076894162, "grad_norm": 1.171875, "learning_rate": 0.001999553189380815, "loss": 0.2776, "step": 1954 }, { "epoch": 0.0034681496729992312, "grad_norm": 1.9296875, "learning_rate": 0.0019995513121833447, "loss": 0.3906, "step": 1956 }, { "epoch": 0.0034716958383090466, "grad_norm": 0.462890625, "learning_rate": 0.0019995494310517596, "loss": 0.3159, "step": 1958 }, { "epoch": 0.0034752420036188616, "grad_norm": 0.26953125, "learning_rate": 0.001999547545986068, "loss": 0.2552, "step": 1960 }, { "epoch": 0.003478788168928677, "grad_norm": 1.1015625, "learning_rate": 0.001999545656986277, "loss": 0.3181, "step": 1962 }, { "epoch": 0.003482334334238492, "grad_norm": 0.494140625, "learning_rate": 0.0019995437640523968, "loss": 0.3241, "step": 1964 }, { "epoch": 0.003485880499548307, "grad_norm": 0.52734375, "learning_rate": 0.0019995418671844346, "loss": 0.4052, "step": 1966 }, { "epoch": 0.0034894266648581224, "grad_norm": 1.4609375, "learning_rate": 0.0019995399663823984, "loss": 0.3428, "step": 1968 }, { "epoch": 0.0034929728301679374, "grad_norm": 1.1796875, "learning_rate": 0.0019995380616462974, "loss": 0.3386, "step": 1970 }, { "epoch": 0.003496518995477753, "grad_norm": 0.55859375, "learning_rate": 0.001999536152976139, "loss": 0.2762, "step": 1972 }, { "epoch": 0.003500065160787568, "grad_norm": 0.5078125, "learning_rate": 0.0019995342403719323, "loss": 0.3334, "step": 1974 }, { "epoch": 0.003503611326097383, "grad_norm": 1.171875, "learning_rate": 0.0019995323238336847, "loss": 0.3141, "step": 1976 }, { "epoch": 0.0035071574914071982, "grad_norm": 0.81640625, "learning_rate": 0.001999530403361406, "loss": 0.4356, "step": 1978 }, { "epoch": 0.0035107036567170132, "grad_norm": 0.326171875, "learning_rate": 0.0019995284789551034, "loss": 0.3265, "step": 1980 }, { "epoch": 0.0035142498220268287, "grad_norm": 0.498046875, "learning_rate": 0.001999526550614786, "loss": 0.6316, "step": 1982 }, { "epoch": 0.0035177959873366436, "grad_norm": 0.34375, "learning_rate": 0.0019995246183404617, "loss": 0.322, "step": 1984 }, { "epoch": 0.0035213421526464586, "grad_norm": 1.4375, "learning_rate": 0.001999522682132139, "loss": 0.3813, "step": 1986 }, { "epoch": 0.003524888317956274, "grad_norm": 1.2890625, "learning_rate": 0.0019995207419898275, "loss": 0.2774, "step": 1988 }, { "epoch": 0.003528434483266089, "grad_norm": 3.953125, "learning_rate": 0.001999518797913534, "loss": 0.6197, "step": 1990 }, { "epoch": 0.0035319806485759045, "grad_norm": 0.3125, "learning_rate": 0.001999516849903268, "loss": 0.2728, "step": 1992 }, { "epoch": 0.0035355268138857194, "grad_norm": 0.314453125, "learning_rate": 0.001999514897959038, "loss": 0.3735, "step": 1994 }, { "epoch": 0.003539072979195535, "grad_norm": 3.046875, "learning_rate": 0.001999512942080852, "loss": 0.3896, "step": 1996 }, { "epoch": 0.00354261914450535, "grad_norm": 0.6328125, "learning_rate": 0.001999510982268719, "loss": 0.4228, "step": 1998 }, { "epoch": 0.003546165309815165, "grad_norm": 0.9375, "learning_rate": 0.0019995090185226474, "loss": 0.3514, "step": 2000 }, { "epoch": 0.0035497114751249803, "grad_norm": 0.2890625, "learning_rate": 0.0019995070508426463, "loss": 0.2548, "step": 2002 }, { "epoch": 0.0035532576404347952, "grad_norm": 0.92578125, "learning_rate": 0.0019995050792287234, "loss": 0.3597, "step": 2004 }, { "epoch": 0.0035568038057446107, "grad_norm": 0.6328125, "learning_rate": 0.001999503103680888, "loss": 0.2934, "step": 2006 }, { "epoch": 0.0035603499710544257, "grad_norm": 0.52734375, "learning_rate": 0.0019995011241991482, "loss": 0.363, "step": 2008 }, { "epoch": 0.0035638961363642406, "grad_norm": 0.875, "learning_rate": 0.0019994991407835134, "loss": 0.4091, "step": 2010 }, { "epoch": 0.003567442301674056, "grad_norm": 6.375, "learning_rate": 0.001999497153433991, "loss": 0.4613, "step": 2012 }, { "epoch": 0.003570988466983871, "grad_norm": 0.60546875, "learning_rate": 0.001999495162150591, "loss": 0.2809, "step": 2014 }, { "epoch": 0.0035745346322936865, "grad_norm": 0.458984375, "learning_rate": 0.0019994931669333224, "loss": 0.3114, "step": 2016 }, { "epoch": 0.0035780807976035015, "grad_norm": 0.376953125, "learning_rate": 0.0019994911677821926, "loss": 0.3755, "step": 2018 }, { "epoch": 0.0035816269629133164, "grad_norm": 1.703125, "learning_rate": 0.001999489164697211, "loss": 0.359, "step": 2020 }, { "epoch": 0.003585173128223132, "grad_norm": 0.73828125, "learning_rate": 0.001999487157678386, "loss": 0.4394, "step": 2022 }, { "epoch": 0.003588719293532947, "grad_norm": 0.5625, "learning_rate": 0.0019994851467257267, "loss": 0.368, "step": 2024 }, { "epoch": 0.0035922654588427623, "grad_norm": 0.9453125, "learning_rate": 0.001999483131839242, "loss": 0.5545, "step": 2026 }, { "epoch": 0.0035958116241525773, "grad_norm": 0.8359375, "learning_rate": 0.0019994811130189406, "loss": 0.3449, "step": 2028 }, { "epoch": 0.0035993577894623927, "grad_norm": 1.28125, "learning_rate": 0.0019994790902648314, "loss": 0.2781, "step": 2030 }, { "epoch": 0.0036029039547722077, "grad_norm": 1.296875, "learning_rate": 0.0019994770635769227, "loss": 0.3364, "step": 2032 }, { "epoch": 0.0036064501200820226, "grad_norm": 2.8125, "learning_rate": 0.001999475032955224, "loss": 0.4186, "step": 2034 }, { "epoch": 0.003609996285391838, "grad_norm": 1.2109375, "learning_rate": 0.0019994729983997444, "loss": 0.3297, "step": 2036 }, { "epoch": 0.003613542450701653, "grad_norm": 0.52734375, "learning_rate": 0.001999470959910492, "loss": 0.4499, "step": 2038 }, { "epoch": 0.0036170886160114685, "grad_norm": 1.0703125, "learning_rate": 0.001999468917487476, "loss": 0.3226, "step": 2040 }, { "epoch": 0.0036206347813212835, "grad_norm": 1.5390625, "learning_rate": 0.001999466871130706, "loss": 0.3072, "step": 2042 }, { "epoch": 0.0036241809466310984, "grad_norm": 1.6875, "learning_rate": 0.0019994648208401895, "loss": 0.4056, "step": 2044 }, { "epoch": 0.003627727111940914, "grad_norm": 0.453125, "learning_rate": 0.001999462766615937, "loss": 0.3269, "step": 2046 }, { "epoch": 0.003631273277250729, "grad_norm": 1.3671875, "learning_rate": 0.001999460708457956, "loss": 0.3396, "step": 2048 }, { "epoch": 0.0036348194425605443, "grad_norm": 0.4375, "learning_rate": 0.0019994586463662574, "loss": 0.347, "step": 2050 }, { "epoch": 0.0036383656078703593, "grad_norm": 0.388671875, "learning_rate": 0.0019994565803408484, "loss": 0.3198, "step": 2052 }, { "epoch": 0.0036419117731801742, "grad_norm": 1.0703125, "learning_rate": 0.0019994545103817394, "loss": 0.3497, "step": 2054 }, { "epoch": 0.0036454579384899897, "grad_norm": 0.49609375, "learning_rate": 0.0019994524364889384, "loss": 0.3626, "step": 2056 }, { "epoch": 0.0036490041037998047, "grad_norm": 0.296875, "learning_rate": 0.0019994503586624555, "loss": 0.2683, "step": 2058 }, { "epoch": 0.00365255026910962, "grad_norm": 2.125, "learning_rate": 0.0019994482769022984, "loss": 0.3466, "step": 2060 }, { "epoch": 0.003656096434419435, "grad_norm": 0.353515625, "learning_rate": 0.001999446191208477, "loss": 0.3426, "step": 2062 }, { "epoch": 0.0036596425997292505, "grad_norm": 0.375, "learning_rate": 0.0019994441015810014, "loss": 0.3127, "step": 2064 }, { "epoch": 0.0036631887650390655, "grad_norm": 0.302734375, "learning_rate": 0.0019994420080198787, "loss": 0.2981, "step": 2066 }, { "epoch": 0.0036667349303488805, "grad_norm": 0.41796875, "learning_rate": 0.0019994399105251197, "loss": 0.3194, "step": 2068 }, { "epoch": 0.003670281095658696, "grad_norm": 0.486328125, "learning_rate": 0.0019994378090967325, "loss": 0.3509, "step": 2070 }, { "epoch": 0.003673827260968511, "grad_norm": 1.375, "learning_rate": 0.001999435703734727, "loss": 0.4092, "step": 2072 }, { "epoch": 0.0036773734262783263, "grad_norm": 0.625, "learning_rate": 0.0019994335944391126, "loss": 0.4644, "step": 2074 }, { "epoch": 0.0036809195915881413, "grad_norm": 0.21875, "learning_rate": 0.0019994314812098977, "loss": 0.2814, "step": 2076 }, { "epoch": 0.0036844657568979563, "grad_norm": 1.171875, "learning_rate": 0.001999429364047092, "loss": 0.3216, "step": 2078 }, { "epoch": 0.0036880119222077717, "grad_norm": 7.21875, "learning_rate": 0.001999427242950705, "loss": 0.3708, "step": 2080 }, { "epoch": 0.0036915580875175867, "grad_norm": 0.58203125, "learning_rate": 0.0019994251179207456, "loss": 0.2891, "step": 2082 }, { "epoch": 0.003695104252827402, "grad_norm": 0.5546875, "learning_rate": 0.001999422988957223, "loss": 0.2936, "step": 2084 }, { "epoch": 0.003698650418137217, "grad_norm": 0.4140625, "learning_rate": 0.0019994208560601464, "loss": 0.3614, "step": 2086 }, { "epoch": 0.003702196583447032, "grad_norm": 0.92578125, "learning_rate": 0.0019994187192295255, "loss": 0.3889, "step": 2088 }, { "epoch": 0.0037057427487568475, "grad_norm": 0.6953125, "learning_rate": 0.00199941657846537, "loss": 0.3378, "step": 2090 }, { "epoch": 0.0037092889140666625, "grad_norm": 0.275390625, "learning_rate": 0.0019994144337676886, "loss": 0.3008, "step": 2092 }, { "epoch": 0.003712835079376478, "grad_norm": 0.390625, "learning_rate": 0.001999412285136491, "loss": 0.3034, "step": 2094 }, { "epoch": 0.003716381244686293, "grad_norm": 0.65625, "learning_rate": 0.0019994101325717865, "loss": 0.3028, "step": 2096 }, { "epoch": 0.0037199274099961083, "grad_norm": 2.390625, "learning_rate": 0.0019994079760735847, "loss": 0.4911, "step": 2098 }, { "epoch": 0.0037234735753059233, "grad_norm": 0.515625, "learning_rate": 0.0019994058156418944, "loss": 0.3719, "step": 2100 }, { "epoch": 0.0037270197406157383, "grad_norm": 0.39453125, "learning_rate": 0.001999403651276726, "loss": 0.3539, "step": 2102 }, { "epoch": 0.0037305659059255537, "grad_norm": 0.21875, "learning_rate": 0.001999401482978088, "loss": 0.5215, "step": 2104 }, { "epoch": 0.0037341120712353687, "grad_norm": 0.359375, "learning_rate": 0.0019993993107459904, "loss": 0.3986, "step": 2106 }, { "epoch": 0.003737658236545184, "grad_norm": 1.390625, "learning_rate": 0.001999397134580443, "loss": 0.4519, "step": 2108 }, { "epoch": 0.003741204401854999, "grad_norm": 0.8515625, "learning_rate": 0.0019993949544814546, "loss": 0.4013, "step": 2110 }, { "epoch": 0.003744750567164814, "grad_norm": 0.89453125, "learning_rate": 0.0019993927704490353, "loss": 0.3381, "step": 2112 }, { "epoch": 0.0037482967324746295, "grad_norm": 1.3671875, "learning_rate": 0.0019993905824831943, "loss": 0.3385, "step": 2114 }, { "epoch": 0.0037518428977844445, "grad_norm": 2.703125, "learning_rate": 0.0019993883905839414, "loss": 0.4074, "step": 2116 }, { "epoch": 0.00375538906309426, "grad_norm": 0.5703125, "learning_rate": 0.001999386194751286, "loss": 0.3746, "step": 2118 }, { "epoch": 0.003758935228404075, "grad_norm": 0.46875, "learning_rate": 0.0019993839949852383, "loss": 0.3088, "step": 2120 }, { "epoch": 0.00376248139371389, "grad_norm": 1.0625, "learning_rate": 0.0019993817912858066, "loss": 0.4044, "step": 2122 }, { "epoch": 0.0037660275590237053, "grad_norm": 0.546875, "learning_rate": 0.001999379583653002, "loss": 0.3365, "step": 2124 }, { "epoch": 0.0037695737243335203, "grad_norm": 0.640625, "learning_rate": 0.0019993773720868332, "loss": 0.3101, "step": 2126 }, { "epoch": 0.0037731198896433357, "grad_norm": 0.4609375, "learning_rate": 0.0019993751565873107, "loss": 0.3699, "step": 2128 }, { "epoch": 0.0037766660549531507, "grad_norm": 0.376953125, "learning_rate": 0.001999372937154443, "loss": 0.3317, "step": 2130 }, { "epoch": 0.003780212220262966, "grad_norm": 0.80859375, "learning_rate": 0.001999370713788241, "loss": 0.3458, "step": 2132 }, { "epoch": 0.003783758385572781, "grad_norm": 0.34375, "learning_rate": 0.001999368486488714, "loss": 0.4345, "step": 2134 }, { "epoch": 0.003787304550882596, "grad_norm": 0.546875, "learning_rate": 0.001999366255255871, "loss": 0.2812, "step": 2136 }, { "epoch": 0.0037908507161924115, "grad_norm": 0.35546875, "learning_rate": 0.001999364020089723, "loss": 0.2801, "step": 2138 }, { "epoch": 0.0037943968815022265, "grad_norm": 0.3125, "learning_rate": 0.0019993617809902792, "loss": 0.2712, "step": 2140 }, { "epoch": 0.003797943046812042, "grad_norm": 0.33203125, "learning_rate": 0.0019993595379575488, "loss": 0.2721, "step": 2142 }, { "epoch": 0.003801489212121857, "grad_norm": 0.28515625, "learning_rate": 0.0019993572909915427, "loss": 0.2625, "step": 2144 }, { "epoch": 0.003805035377431672, "grad_norm": 0.392578125, "learning_rate": 0.00199935504009227, "loss": 0.4411, "step": 2146 }, { "epoch": 0.0038085815427414873, "grad_norm": 0.2890625, "learning_rate": 0.001999352785259741, "loss": 0.3142, "step": 2148 }, { "epoch": 0.0038121277080513023, "grad_norm": 0.7265625, "learning_rate": 0.001999350526493965, "loss": 0.3174, "step": 2150 }, { "epoch": 0.0038156738733611177, "grad_norm": 1.1484375, "learning_rate": 0.0019993482637949526, "loss": 0.419, "step": 2152 }, { "epoch": 0.0038192200386709327, "grad_norm": 0.400390625, "learning_rate": 0.001999345997162713, "loss": 0.3591, "step": 2154 }, { "epoch": 0.0038227662039807477, "grad_norm": 0.3203125, "learning_rate": 0.0019993437265972565, "loss": 0.2762, "step": 2156 }, { "epoch": 0.003826312369290563, "grad_norm": 0.75, "learning_rate": 0.001999341452098593, "loss": 0.3903, "step": 2158 }, { "epoch": 0.003829858534600378, "grad_norm": 0.8984375, "learning_rate": 0.001999339173666732, "loss": 0.3826, "step": 2160 }, { "epoch": 0.0038334046999101935, "grad_norm": 0.22265625, "learning_rate": 0.0019993368913016844, "loss": 0.2625, "step": 2162 }, { "epoch": 0.0038369508652200085, "grad_norm": 0.498046875, "learning_rate": 0.0019993346050034594, "loss": 0.3452, "step": 2164 }, { "epoch": 0.003840497030529824, "grad_norm": 0.4375, "learning_rate": 0.0019993323147720673, "loss": 0.3819, "step": 2166 }, { "epoch": 0.003844043195839639, "grad_norm": 0.9375, "learning_rate": 0.001999330020607518, "loss": 0.2753, "step": 2168 }, { "epoch": 0.003847589361149454, "grad_norm": 1.2578125, "learning_rate": 0.0019993277225098215, "loss": 0.4099, "step": 2170 }, { "epoch": 0.0038511355264592693, "grad_norm": 0.3828125, "learning_rate": 0.001999325420478988, "loss": 0.4247, "step": 2172 }, { "epoch": 0.0038546816917690843, "grad_norm": 0.48828125, "learning_rate": 0.0019993231145150276, "loss": 0.3804, "step": 2174 }, { "epoch": 0.0038582278570788997, "grad_norm": 1.6875, "learning_rate": 0.00199932080461795, "loss": 0.4339, "step": 2176 }, { "epoch": 0.0038617740223887147, "grad_norm": 0.451171875, "learning_rate": 0.001999318490787766, "loss": 0.33, "step": 2178 }, { "epoch": 0.0038653201876985297, "grad_norm": 0.2470703125, "learning_rate": 0.001999316173024485, "loss": 0.3843, "step": 2180 }, { "epoch": 0.003868866353008345, "grad_norm": 0.5, "learning_rate": 0.0019993138513281173, "loss": 0.2957, "step": 2182 }, { "epoch": 0.00387241251831816, "grad_norm": 0.2392578125, "learning_rate": 0.0019993115256986735, "loss": 0.345, "step": 2184 }, { "epoch": 0.0038759586836279755, "grad_norm": 0.3125, "learning_rate": 0.001999309196136163, "loss": 0.2717, "step": 2186 }, { "epoch": 0.0038795048489377905, "grad_norm": 0.55859375, "learning_rate": 0.001999306862640597, "loss": 0.2542, "step": 2188 }, { "epoch": 0.0038830510142476055, "grad_norm": 1.1328125, "learning_rate": 0.0019993045252119854, "loss": 0.3122, "step": 2190 }, { "epoch": 0.003886597179557421, "grad_norm": 3.46875, "learning_rate": 0.0019993021838503374, "loss": 0.4333, "step": 2192 }, { "epoch": 0.003890143344867236, "grad_norm": 0.64453125, "learning_rate": 0.0019992998385556645, "loss": 0.3802, "step": 2194 }, { "epoch": 0.0038936895101770513, "grad_norm": 1.421875, "learning_rate": 0.0019992974893279767, "loss": 0.3688, "step": 2196 }, { "epoch": 0.0038972356754868663, "grad_norm": 0.35546875, "learning_rate": 0.001999295136167284, "loss": 0.3194, "step": 2198 }, { "epoch": 0.0039007818407966817, "grad_norm": 0.337890625, "learning_rate": 0.001999292779073596, "loss": 0.266, "step": 2200 }, { "epoch": 0.0039043280061064967, "grad_norm": 1.0078125, "learning_rate": 0.0019992904180469243, "loss": 0.3561, "step": 2202 }, { "epoch": 0.003907874171416312, "grad_norm": 0.3671875, "learning_rate": 0.0019992880530872786, "loss": 0.3423, "step": 2204 }, { "epoch": 0.003911420336726127, "grad_norm": 0.52734375, "learning_rate": 0.001999285684194669, "loss": 0.2538, "step": 2206 }, { "epoch": 0.003914966502035942, "grad_norm": 0.486328125, "learning_rate": 0.0019992833113691073, "loss": 0.3364, "step": 2208 }, { "epoch": 0.0039185126673457575, "grad_norm": 0.546875, "learning_rate": 0.001999280934610602, "loss": 0.3101, "step": 2210 }, { "epoch": 0.003922058832655572, "grad_norm": 0.4765625, "learning_rate": 0.001999278553919164, "loss": 0.337, "step": 2212 }, { "epoch": 0.0039256049979653875, "grad_norm": 1.1796875, "learning_rate": 0.0019992761692948042, "loss": 0.34, "step": 2214 }, { "epoch": 0.003929151163275203, "grad_norm": 0.5078125, "learning_rate": 0.001999273780737533, "loss": 0.3095, "step": 2216 }, { "epoch": 0.003932697328585018, "grad_norm": 0.5078125, "learning_rate": 0.0019992713882473604, "loss": 0.4177, "step": 2218 }, { "epoch": 0.003936243493894833, "grad_norm": 0.470703125, "learning_rate": 0.0019992689918242974, "loss": 0.3188, "step": 2220 }, { "epoch": 0.003939789659204648, "grad_norm": 0.6171875, "learning_rate": 0.001999266591468354, "loss": 0.327, "step": 2222 }, { "epoch": 0.003943335824514464, "grad_norm": 0.314453125, "learning_rate": 0.0019992641871795407, "loss": 0.3127, "step": 2224 }, { "epoch": 0.003946881989824278, "grad_norm": 0.890625, "learning_rate": 0.0019992617789578683, "loss": 0.274, "step": 2226 }, { "epoch": 0.003950428155134094, "grad_norm": 3.5625, "learning_rate": 0.001999259366803347, "loss": 0.3542, "step": 2228 }, { "epoch": 0.003953974320443909, "grad_norm": 1.6640625, "learning_rate": 0.001999256950715988, "loss": 0.4139, "step": 2230 }, { "epoch": 0.0039575204857537245, "grad_norm": 0.267578125, "learning_rate": 0.0019992545306958013, "loss": 0.3966, "step": 2232 }, { "epoch": 0.003961066651063539, "grad_norm": 0.98046875, "learning_rate": 0.0019992521067427977, "loss": 0.2843, "step": 2234 }, { "epoch": 0.0039646128163733545, "grad_norm": 1.046875, "learning_rate": 0.001999249678856988, "loss": 0.3295, "step": 2236 }, { "epoch": 0.00396815898168317, "grad_norm": 0.375, "learning_rate": 0.001999247247038382, "loss": 0.3355, "step": 2238 }, { "epoch": 0.0039717051469929845, "grad_norm": 0.302734375, "learning_rate": 0.001999244811286991, "loss": 0.2689, "step": 2240 }, { "epoch": 0.0039752513123028, "grad_norm": 0.99609375, "learning_rate": 0.0019992423716028262, "loss": 0.285, "step": 2242 }, { "epoch": 0.003978797477612615, "grad_norm": 0.30859375, "learning_rate": 0.0019992399279858968, "loss": 0.3874, "step": 2244 }, { "epoch": 0.00398234364292243, "grad_norm": 0.64453125, "learning_rate": 0.0019992374804362143, "loss": 0.624, "step": 2246 }, { "epoch": 0.003985889808232245, "grad_norm": 0.30859375, "learning_rate": 0.0019992350289537897, "loss": 0.3005, "step": 2248 }, { "epoch": 0.003989435973542061, "grad_norm": 1.4375, "learning_rate": 0.0019992325735386338, "loss": 0.5908, "step": 2250 }, { "epoch": 0.003992982138851876, "grad_norm": 0.6953125, "learning_rate": 0.001999230114190756, "loss": 0.4056, "step": 2252 }, { "epoch": 0.003996528304161691, "grad_norm": 1.0234375, "learning_rate": 0.0019992276509101688, "loss": 0.3623, "step": 2254 }, { "epoch": 0.004000074469471506, "grad_norm": 1.375, "learning_rate": 0.001999225183696882, "loss": 0.2732, "step": 2256 }, { "epoch": 0.0040036206347813215, "grad_norm": 0.2890625, "learning_rate": 0.0019992227125509065, "loss": 0.2714, "step": 2258 }, { "epoch": 0.004007166800091136, "grad_norm": 0.2734375, "learning_rate": 0.001999220237472253, "loss": 0.3026, "step": 2260 }, { "epoch": 0.0040107129654009515, "grad_norm": 2.21875, "learning_rate": 0.001999217758460933, "loss": 0.6627, "step": 2262 }, { "epoch": 0.004014259130710767, "grad_norm": 0.4609375, "learning_rate": 0.0019992152755169564, "loss": 0.2845, "step": 2264 }, { "epoch": 0.004017805296020582, "grad_norm": 0.291015625, "learning_rate": 0.0019992127886403347, "loss": 0.2716, "step": 2266 }, { "epoch": 0.004021351461330397, "grad_norm": 1.4375, "learning_rate": 0.0019992102978310788, "loss": 0.3575, "step": 2268 }, { "epoch": 0.004024897626640212, "grad_norm": 1.109375, "learning_rate": 0.001999207803089199, "loss": 0.4475, "step": 2270 }, { "epoch": 0.004028443791950028, "grad_norm": 0.734375, "learning_rate": 0.0019992053044147065, "loss": 0.5932, "step": 2272 }, { "epoch": 0.004031989957259842, "grad_norm": 1.03125, "learning_rate": 0.0019992028018076128, "loss": 0.4675, "step": 2274 }, { "epoch": 0.004035536122569658, "grad_norm": 0.6953125, "learning_rate": 0.001999200295267928, "loss": 0.3746, "step": 2276 }, { "epoch": 0.004039082287879473, "grad_norm": 0.431640625, "learning_rate": 0.0019991977847956637, "loss": 0.3107, "step": 2278 }, { "epoch": 0.004042628453189288, "grad_norm": 0.546875, "learning_rate": 0.0019991952703908304, "loss": 0.2674, "step": 2280 }, { "epoch": 0.004046174618499103, "grad_norm": 0.83203125, "learning_rate": 0.0019991927520534394, "loss": 0.3235, "step": 2282 }, { "epoch": 0.0040497207838089185, "grad_norm": 1.2109375, "learning_rate": 0.0019991902297835018, "loss": 0.3161, "step": 2284 }, { "epoch": 0.004053266949118734, "grad_norm": 0.369140625, "learning_rate": 0.001999187703581028, "loss": 0.3673, "step": 2286 }, { "epoch": 0.0040568131144285485, "grad_norm": 1.453125, "learning_rate": 0.00199918517344603, "loss": 0.2897, "step": 2288 }, { "epoch": 0.004060359279738364, "grad_norm": 3.078125, "learning_rate": 0.001999182639378518, "loss": 0.4174, "step": 2290 }, { "epoch": 0.004063905445048179, "grad_norm": 0.322265625, "learning_rate": 0.0019991801013785034, "loss": 0.296, "step": 2292 }, { "epoch": 0.004067451610357994, "grad_norm": 0.431640625, "learning_rate": 0.0019991775594459973, "loss": 0.307, "step": 2294 }, { "epoch": 0.004070997775667809, "grad_norm": 2.171875, "learning_rate": 0.0019991750135810115, "loss": 0.4551, "step": 2296 }, { "epoch": 0.004074543940977625, "grad_norm": 0.9921875, "learning_rate": 0.001999172463783556, "loss": 0.4069, "step": 2298 }, { "epoch": 0.00407809010628744, "grad_norm": 0.79296875, "learning_rate": 0.001999169910053642, "loss": 0.3585, "step": 2300 }, { "epoch": 0.004081636271597255, "grad_norm": 0.5703125, "learning_rate": 0.001999167352391282, "loss": 0.2759, "step": 2302 }, { "epoch": 0.00408518243690707, "grad_norm": 0.7421875, "learning_rate": 0.0019991647907964855, "loss": 0.2841, "step": 2304 }, { "epoch": 0.0040887286022168855, "grad_norm": 0.53515625, "learning_rate": 0.001999162225269265, "loss": 0.4041, "step": 2306 }, { "epoch": 0.0040922747675267, "grad_norm": 0.69921875, "learning_rate": 0.0019991596558096313, "loss": 0.5127, "step": 2308 }, { "epoch": 0.0040958209328365155, "grad_norm": 2.125, "learning_rate": 0.0019991570824175954, "loss": 0.414, "step": 2310 }, { "epoch": 0.004099367098146331, "grad_norm": 0.5, "learning_rate": 0.0019991545050931686, "loss": 0.3223, "step": 2312 }, { "epoch": 0.0041029132634561455, "grad_norm": 0.6953125, "learning_rate": 0.0019991519238363622, "loss": 0.3266, "step": 2314 }, { "epoch": 0.004106459428765961, "grad_norm": 0.82421875, "learning_rate": 0.001999149338647188, "loss": 0.4117, "step": 2316 }, { "epoch": 0.004110005594075776, "grad_norm": 0.44921875, "learning_rate": 0.001999146749525656, "loss": 0.2709, "step": 2318 }, { "epoch": 0.004113551759385592, "grad_norm": 0.3125, "learning_rate": 0.0019991441564717796, "loss": 0.3172, "step": 2320 }, { "epoch": 0.004117097924695406, "grad_norm": 0.7109375, "learning_rate": 0.0019991415594855684, "loss": 0.2935, "step": 2322 }, { "epoch": 0.004120644090005222, "grad_norm": 0.470703125, "learning_rate": 0.001999138958567034, "loss": 0.3147, "step": 2324 }, { "epoch": 0.004124190255315037, "grad_norm": 1.0546875, "learning_rate": 0.0019991363537161886, "loss": 0.2743, "step": 2326 }, { "epoch": 0.004127736420624852, "grad_norm": 0.3828125, "learning_rate": 0.001999133744933043, "loss": 0.2952, "step": 2328 }, { "epoch": 0.004131282585934667, "grad_norm": 0.3828125, "learning_rate": 0.0019991311322176083, "loss": 0.2564, "step": 2330 }, { "epoch": 0.0041348287512444825, "grad_norm": 2.125, "learning_rate": 0.0019991285155698964, "loss": 0.309, "step": 2332 }, { "epoch": 0.004138374916554298, "grad_norm": 0.26953125, "learning_rate": 0.0019991258949899186, "loss": 0.2408, "step": 2334 }, { "epoch": 0.0041419210818641125, "grad_norm": 0.578125, "learning_rate": 0.0019991232704776865, "loss": 0.2607, "step": 2336 }, { "epoch": 0.004145467247173928, "grad_norm": 0.69140625, "learning_rate": 0.001999120642033212, "loss": 0.3421, "step": 2338 }, { "epoch": 0.004149013412483743, "grad_norm": 0.3515625, "learning_rate": 0.0019991180096565054, "loss": 0.4084, "step": 2340 }, { "epoch": 0.004152559577793558, "grad_norm": 0.3515625, "learning_rate": 0.001999115373347579, "loss": 0.2662, "step": 2342 }, { "epoch": 0.004156105743103373, "grad_norm": 0.25, "learning_rate": 0.0019991127331064444, "loss": 0.267, "step": 2344 }, { "epoch": 0.004159651908413189, "grad_norm": 0.47265625, "learning_rate": 0.001999110088933113, "loss": 0.2545, "step": 2346 }, { "epoch": 0.004163198073723003, "grad_norm": 3.28125, "learning_rate": 0.001999107440827596, "loss": 0.358, "step": 2348 }, { "epoch": 0.004166744239032819, "grad_norm": 0.3125, "learning_rate": 0.0019991047887899056, "loss": 0.2937, "step": 2350 }, { "epoch": 0.004170290404342634, "grad_norm": 0.490234375, "learning_rate": 0.0019991021328200536, "loss": 0.2853, "step": 2352 }, { "epoch": 0.0041738365696524496, "grad_norm": 0.55078125, "learning_rate": 0.0019990994729180505, "loss": 0.3396, "step": 2354 }, { "epoch": 0.004177382734962264, "grad_norm": 0.4453125, "learning_rate": 0.0019990968090839085, "loss": 0.4468, "step": 2356 }, { "epoch": 0.0041809289002720795, "grad_norm": 0.427734375, "learning_rate": 0.0019990941413176398, "loss": 0.3123, "step": 2358 }, { "epoch": 0.004184475065581895, "grad_norm": 0.36328125, "learning_rate": 0.001999091469619255, "loss": 0.5217, "step": 2360 }, { "epoch": 0.0041880212308917095, "grad_norm": 0.384765625, "learning_rate": 0.0019990887939887667, "loss": 0.3404, "step": 2362 }, { "epoch": 0.004191567396201525, "grad_norm": 0.33984375, "learning_rate": 0.001999086114426186, "loss": 0.3254, "step": 2364 }, { "epoch": 0.00419511356151134, "grad_norm": 3.875, "learning_rate": 0.0019990834309315253, "loss": 0.5084, "step": 2366 }, { "epoch": 0.004198659726821156, "grad_norm": 0.671875, "learning_rate": 0.001999080743504796, "loss": 0.332, "step": 2368 }, { "epoch": 0.00420220589213097, "grad_norm": 0.6328125, "learning_rate": 0.001999078052146009, "loss": 0.3747, "step": 2370 }, { "epoch": 0.004205752057440786, "grad_norm": 0.7265625, "learning_rate": 0.0019990753568551777, "loss": 0.3267, "step": 2372 }, { "epoch": 0.004209298222750601, "grad_norm": 0.392578125, "learning_rate": 0.0019990726576323125, "loss": 0.3301, "step": 2374 }, { "epoch": 0.004212844388060416, "grad_norm": 0.296875, "learning_rate": 0.001999069954477426, "loss": 0.3264, "step": 2376 }, { "epoch": 0.004216390553370231, "grad_norm": 0.310546875, "learning_rate": 0.0019990672473905297, "loss": 0.2649, "step": 2378 }, { "epoch": 0.0042199367186800466, "grad_norm": 0.6328125, "learning_rate": 0.0019990645363716355, "loss": 0.3235, "step": 2380 }, { "epoch": 0.004223482883989861, "grad_norm": 0.58984375, "learning_rate": 0.001999061821420755, "loss": 0.348, "step": 2382 }, { "epoch": 0.0042270290492996765, "grad_norm": 0.734375, "learning_rate": 0.001999059102537901, "loss": 0.3561, "step": 2384 }, { "epoch": 0.004230575214609492, "grad_norm": 0.298828125, "learning_rate": 0.001999056379723084, "loss": 0.2547, "step": 2386 }, { "epoch": 0.004234121379919307, "grad_norm": 0.546875, "learning_rate": 0.001999053652976317, "loss": 0.326, "step": 2388 }, { "epoch": 0.004237667545229122, "grad_norm": 0.8203125, "learning_rate": 0.0019990509222976116, "loss": 0.37, "step": 2390 }, { "epoch": 0.004241213710538937, "grad_norm": 3.0625, "learning_rate": 0.0019990481876869795, "loss": 0.3745, "step": 2392 }, { "epoch": 0.004244759875848753, "grad_norm": 0.56640625, "learning_rate": 0.001999045449144433, "loss": 0.3181, "step": 2394 }, { "epoch": 0.004248306041158567, "grad_norm": 0.482421875, "learning_rate": 0.0019990427066699837, "loss": 0.2918, "step": 2396 }, { "epoch": 0.004251852206468383, "grad_norm": 0.55078125, "learning_rate": 0.001999039960263644, "loss": 0.2809, "step": 2398 }, { "epoch": 0.004255398371778198, "grad_norm": 0.43359375, "learning_rate": 0.0019990372099254255, "loss": 0.3693, "step": 2400 }, { "epoch": 0.004258944537088014, "grad_norm": 1.0078125, "learning_rate": 0.001999034455655341, "loss": 0.3528, "step": 2402 }, { "epoch": 0.004262490702397828, "grad_norm": 0.2294921875, "learning_rate": 0.0019990316974534015, "loss": 0.2559, "step": 2404 }, { "epoch": 0.0042660368677076435, "grad_norm": 0.267578125, "learning_rate": 0.00199902893531962, "loss": 0.245, "step": 2406 }, { "epoch": 0.004269583033017459, "grad_norm": 0.3515625, "learning_rate": 0.001999026169254008, "loss": 0.343, "step": 2408 }, { "epoch": 0.0042731291983272735, "grad_norm": 0.2890625, "learning_rate": 0.0019990233992565774, "loss": 0.2537, "step": 2410 }, { "epoch": 0.004276675363637089, "grad_norm": 0.44140625, "learning_rate": 0.001999020625327341, "loss": 0.2417, "step": 2412 }, { "epoch": 0.004280221528946904, "grad_norm": 2.484375, "learning_rate": 0.0019990178474663107, "loss": 0.4874, "step": 2414 }, { "epoch": 0.004283767694256719, "grad_norm": 0.5234375, "learning_rate": 0.0019990150656734986, "loss": 0.2428, "step": 2416 }, { "epoch": 0.004287313859566534, "grad_norm": 0.337890625, "learning_rate": 0.001999012279948916, "loss": 0.2781, "step": 2418 }, { "epoch": 0.00429086002487635, "grad_norm": 0.298828125, "learning_rate": 0.001999009490292577, "loss": 0.3514, "step": 2420 }, { "epoch": 0.004294406190186165, "grad_norm": 0.3671875, "learning_rate": 0.001999006696704492, "loss": 0.3882, "step": 2422 }, { "epoch": 0.00429795235549598, "grad_norm": 2.59375, "learning_rate": 0.0019990038991846743, "loss": 0.3699, "step": 2424 }, { "epoch": 0.004301498520805795, "grad_norm": 0.267578125, "learning_rate": 0.0019990010977331356, "loss": 0.3473, "step": 2426 }, { "epoch": 0.0043050446861156106, "grad_norm": 0.5, "learning_rate": 0.0019989982923498883, "loss": 0.2783, "step": 2428 }, { "epoch": 0.004308590851425425, "grad_norm": 2.34375, "learning_rate": 0.0019989954830349444, "loss": 0.4462, "step": 2430 }, { "epoch": 0.0043121370167352405, "grad_norm": 0.439453125, "learning_rate": 0.0019989926697883166, "loss": 0.357, "step": 2432 }, { "epoch": 0.004315683182045056, "grad_norm": 0.333984375, "learning_rate": 0.0019989898526100174, "loss": 0.3523, "step": 2434 }, { "epoch": 0.004319229347354871, "grad_norm": 0.30078125, "learning_rate": 0.0019989870315000585, "loss": 0.2859, "step": 2436 }, { "epoch": 0.004322775512664686, "grad_norm": 0.2412109375, "learning_rate": 0.0019989842064584524, "loss": 0.3051, "step": 2438 }, { "epoch": 0.004326321677974501, "grad_norm": 0.2265625, "learning_rate": 0.0019989813774852123, "loss": 0.2355, "step": 2440 }, { "epoch": 0.004329867843284317, "grad_norm": 0.482421875, "learning_rate": 0.001998978544580349, "loss": 0.4937, "step": 2442 }, { "epoch": 0.004333414008594131, "grad_norm": 0.91015625, "learning_rate": 0.001998975707743876, "loss": 0.4194, "step": 2444 }, { "epoch": 0.004336960173903947, "grad_norm": 0.330078125, "learning_rate": 0.0019989728669758053, "loss": 0.3299, "step": 2446 }, { "epoch": 0.004340506339213762, "grad_norm": 0.41796875, "learning_rate": 0.0019989700222761498, "loss": 0.2959, "step": 2448 }, { "epoch": 0.004344052504523577, "grad_norm": 0.8046875, "learning_rate": 0.0019989671736449213, "loss": 0.345, "step": 2450 }, { "epoch": 0.004347598669833392, "grad_norm": 1.046875, "learning_rate": 0.001998964321082133, "loss": 0.32, "step": 2452 }, { "epoch": 0.0043511448351432076, "grad_norm": 0.4453125, "learning_rate": 0.0019989614645877962, "loss": 0.318, "step": 2454 }, { "epoch": 0.004354691000453023, "grad_norm": 2.46875, "learning_rate": 0.001998958604161925, "loss": 0.3437, "step": 2456 }, { "epoch": 0.0043582371657628375, "grad_norm": 0.365234375, "learning_rate": 0.0019989557398045303, "loss": 0.3221, "step": 2458 }, { "epoch": 0.004361783331072653, "grad_norm": 2.65625, "learning_rate": 0.0019989528715156257, "loss": 0.4671, "step": 2460 }, { "epoch": 0.004365329496382468, "grad_norm": 2.015625, "learning_rate": 0.0019989499992952233, "loss": 0.4874, "step": 2462 }, { "epoch": 0.004368875661692283, "grad_norm": 0.486328125, "learning_rate": 0.001998947123143336, "loss": 0.3161, "step": 2464 }, { "epoch": 0.004372421827002098, "grad_norm": 0.91015625, "learning_rate": 0.001998944243059976, "loss": 0.5244, "step": 2466 }, { "epoch": 0.004375967992311914, "grad_norm": 0.63671875, "learning_rate": 0.0019989413590451558, "loss": 0.5607, "step": 2468 }, { "epoch": 0.004379514157621729, "grad_norm": 0.41015625, "learning_rate": 0.0019989384710988886, "loss": 0.2798, "step": 2470 }, { "epoch": 0.004383060322931544, "grad_norm": 0.51953125, "learning_rate": 0.001998935579221187, "loss": 0.3108, "step": 2472 }, { "epoch": 0.004386606488241359, "grad_norm": 2.90625, "learning_rate": 0.0019989326834120628, "loss": 0.4146, "step": 2474 }, { "epoch": 0.004390152653551175, "grad_norm": 0.41015625, "learning_rate": 0.001998929783671529, "loss": 0.3229, "step": 2476 }, { "epoch": 0.004393698818860989, "grad_norm": 0.431640625, "learning_rate": 0.001998926879999599, "loss": 0.3949, "step": 2478 }, { "epoch": 0.0043972449841708046, "grad_norm": 2.015625, "learning_rate": 0.0019989239723962847, "loss": 0.4253, "step": 2480 }, { "epoch": 0.00440079114948062, "grad_norm": 2.0, "learning_rate": 0.0019989210608615995, "loss": 0.503, "step": 2482 }, { "epoch": 0.0044043373147904345, "grad_norm": 1.8828125, "learning_rate": 0.0019989181453955555, "loss": 0.3762, "step": 2484 }, { "epoch": 0.00440788348010025, "grad_norm": 2.046875, "learning_rate": 0.001998915225998166, "loss": 0.4607, "step": 2486 }, { "epoch": 0.004411429645410065, "grad_norm": 0.314453125, "learning_rate": 0.0019989123026694427, "loss": 0.3172, "step": 2488 }, { "epoch": 0.004414975810719881, "grad_norm": 0.4375, "learning_rate": 0.0019989093754094, "loss": 0.2745, "step": 2490 }, { "epoch": 0.004418521976029695, "grad_norm": 0.2734375, "learning_rate": 0.001998906444218049, "loss": 0.3414, "step": 2492 }, { "epoch": 0.004422068141339511, "grad_norm": 0.71484375, "learning_rate": 0.001998903509095404, "loss": 0.434, "step": 2494 }, { "epoch": 0.004425614306649326, "grad_norm": 0.326171875, "learning_rate": 0.0019989005700414768, "loss": 0.4045, "step": 2496 }, { "epoch": 0.004429160471959141, "grad_norm": 0.59375, "learning_rate": 0.0019988976270562805, "loss": 0.2992, "step": 2498 }, { "epoch": 0.004432706637268956, "grad_norm": 1.546875, "learning_rate": 0.0019988946801398288, "loss": 0.4373, "step": 2500 }, { "epoch": 0.004436252802578772, "grad_norm": 0.5078125, "learning_rate": 0.0019988917292921332, "loss": 0.3932, "step": 2502 }, { "epoch": 0.004439798967888587, "grad_norm": 0.34765625, "learning_rate": 0.001998888774513208, "loss": 0.336, "step": 2504 }, { "epoch": 0.0044433451331984015, "grad_norm": 0.62890625, "learning_rate": 0.001998885815803065, "loss": 0.3264, "step": 2506 }, { "epoch": 0.004446891298508217, "grad_norm": 1.3984375, "learning_rate": 0.0019988828531617175, "loss": 0.3519, "step": 2508 }, { "epoch": 0.004450437463818032, "grad_norm": 1.125, "learning_rate": 0.0019988798865891787, "loss": 0.2821, "step": 2510 }, { "epoch": 0.004453983629127847, "grad_norm": 1.3828125, "learning_rate": 0.0019988769160854615, "loss": 0.3554, "step": 2512 }, { "epoch": 0.004457529794437662, "grad_norm": 1.0859375, "learning_rate": 0.0019988739416505787, "loss": 0.4213, "step": 2514 }, { "epoch": 0.004461075959747478, "grad_norm": 3.515625, "learning_rate": 0.0019988709632845435, "loss": 0.5323, "step": 2516 }, { "epoch": 0.004464622125057292, "grad_norm": 0.828125, "learning_rate": 0.0019988679809873687, "loss": 0.3411, "step": 2518 }, { "epoch": 0.004468168290367108, "grad_norm": 0.36328125, "learning_rate": 0.001998864994759067, "loss": 0.4354, "step": 2520 }, { "epoch": 0.004471714455676923, "grad_norm": 0.36328125, "learning_rate": 0.001998862004599653, "loss": 0.2528, "step": 2522 }, { "epoch": 0.004475260620986739, "grad_norm": 0.8125, "learning_rate": 0.0019988590105091382, "loss": 0.3286, "step": 2524 }, { "epoch": 0.004478806786296553, "grad_norm": 0.380859375, "learning_rate": 0.001998856012487536, "loss": 0.8415, "step": 2526 }, { "epoch": 0.004482352951606369, "grad_norm": 1.6171875, "learning_rate": 0.00199885301053486, "loss": 0.3309, "step": 2528 }, { "epoch": 0.004485899116916184, "grad_norm": 1.109375, "learning_rate": 0.0019988500046511227, "loss": 0.3953, "step": 2530 }, { "epoch": 0.0044894452822259985, "grad_norm": 0.44921875, "learning_rate": 0.0019988469948363377, "loss": 0.3209, "step": 2532 }, { "epoch": 0.004492991447535814, "grad_norm": 0.58984375, "learning_rate": 0.0019988439810905184, "loss": 0.3589, "step": 2534 }, { "epoch": 0.004496537612845629, "grad_norm": 0.498046875, "learning_rate": 0.001998840963413678, "loss": 0.3395, "step": 2536 }, { "epoch": 0.004500083778155445, "grad_norm": 0.4609375, "learning_rate": 0.001998837941805828, "loss": 0.2942, "step": 2538 }, { "epoch": 0.004503629943465259, "grad_norm": 0.373046875, "learning_rate": 0.001998834916266984, "loss": 0.3399, "step": 2540 }, { "epoch": 0.004507176108775075, "grad_norm": 2.40625, "learning_rate": 0.001998831886797158, "loss": 0.3492, "step": 2542 }, { "epoch": 0.00451072227408489, "grad_norm": 0.2734375, "learning_rate": 0.0019988288533963634, "loss": 0.2839, "step": 2544 }, { "epoch": 0.004514268439394705, "grad_norm": 0.37109375, "learning_rate": 0.001998825816064613, "loss": 0.2991, "step": 2546 }, { "epoch": 0.00451781460470452, "grad_norm": 0.1953125, "learning_rate": 0.0019988227748019213, "loss": 0.2732, "step": 2548 }, { "epoch": 0.004521360770014336, "grad_norm": 0.96484375, "learning_rate": 0.0019988197296083, "loss": 0.411, "step": 2550 }, { "epoch": 0.00452490693532415, "grad_norm": 0.51953125, "learning_rate": 0.0019988166804837644, "loss": 0.5054, "step": 2552 }, { "epoch": 0.0045284531006339656, "grad_norm": 0.35546875, "learning_rate": 0.001998813627428326, "loss": 0.2754, "step": 2554 }, { "epoch": 0.004531999265943781, "grad_norm": 0.2109375, "learning_rate": 0.0019988105704419994, "loss": 0.271, "step": 2556 }, { "epoch": 0.004535545431253596, "grad_norm": 0.625, "learning_rate": 0.001998807509524797, "loss": 0.3684, "step": 2558 }, { "epoch": 0.004539091596563411, "grad_norm": 0.49609375, "learning_rate": 0.001998804444676733, "loss": 0.3516, "step": 2560 }, { "epoch": 0.004542637761873226, "grad_norm": 0.40625, "learning_rate": 0.0019988013758978204, "loss": 0.3089, "step": 2562 }, { "epoch": 0.004546183927183042, "grad_norm": 0.447265625, "learning_rate": 0.0019987983031880723, "loss": 0.3493, "step": 2564 }, { "epoch": 0.004549730092492856, "grad_norm": 0.365234375, "learning_rate": 0.001998795226547503, "loss": 0.2243, "step": 2566 }, { "epoch": 0.004553276257802672, "grad_norm": 4.3125, "learning_rate": 0.001998792145976125, "loss": 0.413, "step": 2568 }, { "epoch": 0.004556822423112487, "grad_norm": 0.60546875, "learning_rate": 0.001998789061473952, "loss": 0.3219, "step": 2570 }, { "epoch": 0.004560368588422303, "grad_norm": 0.388671875, "learning_rate": 0.0019987859730409984, "loss": 0.2752, "step": 2572 }, { "epoch": 0.004563914753732117, "grad_norm": 0.6328125, "learning_rate": 0.001998782880677277, "loss": 0.4357, "step": 2574 }, { "epoch": 0.004567460919041933, "grad_norm": 0.5390625, "learning_rate": 0.001998779784382801, "loss": 0.3183, "step": 2576 }, { "epoch": 0.004571007084351748, "grad_norm": 0.9609375, "learning_rate": 0.001998776684157584, "loss": 0.4608, "step": 2578 }, { "epoch": 0.0045745532496615626, "grad_norm": 0.337890625, "learning_rate": 0.0019987735800016406, "loss": 0.371, "step": 2580 }, { "epoch": 0.004578099414971378, "grad_norm": 1.4453125, "learning_rate": 0.0019987704719149836, "loss": 0.2915, "step": 2582 }, { "epoch": 0.004581645580281193, "grad_norm": 1.0625, "learning_rate": 0.0019987673598976263, "loss": 0.4373, "step": 2584 }, { "epoch": 0.004585191745591008, "grad_norm": 0.265625, "learning_rate": 0.0019987642439495824, "loss": 0.272, "step": 2586 }, { "epoch": 0.004588737910900823, "grad_norm": 1.53125, "learning_rate": 0.001998761124070866, "loss": 0.484, "step": 2588 }, { "epoch": 0.004592284076210639, "grad_norm": 1.390625, "learning_rate": 0.0019987580002614907, "loss": 0.3142, "step": 2590 }, { "epoch": 0.004595830241520454, "grad_norm": 0.453125, "learning_rate": 0.0019987548725214697, "loss": 0.3708, "step": 2592 }, { "epoch": 0.004599376406830269, "grad_norm": 0.376953125, "learning_rate": 0.0019987517408508173, "loss": 0.3678, "step": 2594 }, { "epoch": 0.004602922572140084, "grad_norm": 1.796875, "learning_rate": 0.001998748605249546, "loss": 0.3708, "step": 2596 }, { "epoch": 0.0046064687374499, "grad_norm": 0.54296875, "learning_rate": 0.001998745465717671, "loss": 0.244, "step": 2598 }, { "epoch": 0.004610014902759714, "grad_norm": 1.1796875, "learning_rate": 0.0019987423222552056, "loss": 0.3241, "step": 2600 }, { "epoch": 0.00461356106806953, "grad_norm": 1.5625, "learning_rate": 0.001998739174862163, "loss": 0.3932, "step": 2602 }, { "epoch": 0.004617107233379345, "grad_norm": 0.3984375, "learning_rate": 0.0019987360235385575, "loss": 0.2386, "step": 2604 }, { "epoch": 0.00462065339868916, "grad_norm": 0.5625, "learning_rate": 0.0019987328682844027, "loss": 0.2481, "step": 2606 }, { "epoch": 0.004624199563998975, "grad_norm": 0.26953125, "learning_rate": 0.0019987297090997124, "loss": 0.3161, "step": 2608 }, { "epoch": 0.00462774572930879, "grad_norm": 1.4296875, "learning_rate": 0.0019987265459845, "loss": 0.658, "step": 2610 }, { "epoch": 0.004631291894618606, "grad_norm": 0.671875, "learning_rate": 0.00199872337893878, "loss": 0.3221, "step": 2612 }, { "epoch": 0.00463483805992842, "grad_norm": 0.9609375, "learning_rate": 0.001998720207962566, "loss": 0.4271, "step": 2614 }, { "epoch": 0.004638384225238236, "grad_norm": 0.21484375, "learning_rate": 0.0019987170330558715, "loss": 0.313, "step": 2616 }, { "epoch": 0.004641930390548051, "grad_norm": 1.046875, "learning_rate": 0.001998713854218711, "loss": 0.3507, "step": 2618 }, { "epoch": 0.004645476555857866, "grad_norm": 0.30859375, "learning_rate": 0.001998710671451098, "loss": 0.2538, "step": 2620 }, { "epoch": 0.004649022721167681, "grad_norm": 1.046875, "learning_rate": 0.001998707484753047, "loss": 0.3987, "step": 2622 }, { "epoch": 0.004652568886477497, "grad_norm": 0.54296875, "learning_rate": 0.001998704294124571, "loss": 0.3039, "step": 2624 }, { "epoch": 0.004656115051787312, "grad_norm": 0.36328125, "learning_rate": 0.001998701099565685, "loss": 0.3143, "step": 2626 }, { "epoch": 0.004659661217097127, "grad_norm": 0.34765625, "learning_rate": 0.001998697901076402, "loss": 0.3227, "step": 2628 }, { "epoch": 0.004663207382406942, "grad_norm": 0.365234375, "learning_rate": 0.0019986946986567363, "loss": 0.335, "step": 2630 }, { "epoch": 0.004666753547716757, "grad_norm": 1.6796875, "learning_rate": 0.0019986914923067027, "loss": 0.3679, "step": 2632 }, { "epoch": 0.004670299713026572, "grad_norm": 0.6328125, "learning_rate": 0.0019986882820263135, "loss": 0.3196, "step": 2634 }, { "epoch": 0.004673845878336387, "grad_norm": 0.369140625, "learning_rate": 0.0019986850678155844, "loss": 0.2708, "step": 2636 }, { "epoch": 0.004677392043646203, "grad_norm": 0.4296875, "learning_rate": 0.001998681849674529, "loss": 0.2926, "step": 2638 }, { "epoch": 0.004680938208956018, "grad_norm": 1.7109375, "learning_rate": 0.0019986786276031607, "loss": 0.2916, "step": 2640 }, { "epoch": 0.004684484374265833, "grad_norm": 0.4375, "learning_rate": 0.0019986754016014944, "loss": 0.3816, "step": 2642 }, { "epoch": 0.004688030539575648, "grad_norm": 0.33203125, "learning_rate": 0.001998672171669544, "loss": 0.4013, "step": 2644 }, { "epoch": 0.004691576704885464, "grad_norm": 0.375, "learning_rate": 0.001998668937807323, "loss": 0.3108, "step": 2646 }, { "epoch": 0.004695122870195278, "grad_norm": 0.197265625, "learning_rate": 0.0019986657000148466, "loss": 0.2387, "step": 2648 }, { "epoch": 0.004698669035505094, "grad_norm": 4.40625, "learning_rate": 0.001998662458292128, "loss": 0.3733, "step": 2650 }, { "epoch": 0.004702215200814909, "grad_norm": 0.51953125, "learning_rate": 0.001998659212639182, "loss": 0.4222, "step": 2652 }, { "epoch": 0.0047057613661247236, "grad_norm": 0.75, "learning_rate": 0.001998655963056023, "loss": 0.2702, "step": 2654 }, { "epoch": 0.004709307531434539, "grad_norm": 0.2734375, "learning_rate": 0.0019986527095426647, "loss": 0.2796, "step": 2656 }, { "epoch": 0.004712853696744354, "grad_norm": 1.46875, "learning_rate": 0.0019986494520991215, "loss": 0.3217, "step": 2658 }, { "epoch": 0.00471639986205417, "grad_norm": 1.015625, "learning_rate": 0.001998646190725407, "loss": 0.3625, "step": 2660 }, { "epoch": 0.004719946027363984, "grad_norm": 0.298828125, "learning_rate": 0.0019986429254215364, "loss": 0.2949, "step": 2662 }, { "epoch": 0.0047234921926738, "grad_norm": 0.328125, "learning_rate": 0.0019986396561875236, "loss": 0.2631, "step": 2664 }, { "epoch": 0.004727038357983615, "grad_norm": 2.890625, "learning_rate": 0.0019986363830233832, "loss": 0.4082, "step": 2666 }, { "epoch": 0.00473058452329343, "grad_norm": 0.734375, "learning_rate": 0.001998633105929129, "loss": 0.4736, "step": 2668 }, { "epoch": 0.004734130688603245, "grad_norm": 0.44921875, "learning_rate": 0.0019986298249047756, "loss": 0.329, "step": 2670 }, { "epoch": 0.004737676853913061, "grad_norm": 0.470703125, "learning_rate": 0.0019986265399503374, "loss": 0.4027, "step": 2672 }, { "epoch": 0.004741223019222876, "grad_norm": 0.5625, "learning_rate": 0.0019986232510658284, "loss": 0.2617, "step": 2674 }, { "epoch": 0.004744769184532691, "grad_norm": 0.5078125, "learning_rate": 0.001998619958251264, "loss": 0.3261, "step": 2676 }, { "epoch": 0.004748315349842506, "grad_norm": 1.3515625, "learning_rate": 0.001998616661506657, "loss": 0.396, "step": 2678 }, { "epoch": 0.004751861515152321, "grad_norm": 0.63671875, "learning_rate": 0.0019986133608320233, "loss": 0.3203, "step": 2680 }, { "epoch": 0.004755407680462136, "grad_norm": 1.3515625, "learning_rate": 0.0019986100562273765, "loss": 0.3113, "step": 2682 }, { "epoch": 0.004758953845771951, "grad_norm": 2.625, "learning_rate": 0.0019986067476927315, "loss": 0.5206, "step": 2684 }, { "epoch": 0.004762500011081767, "grad_norm": 1.515625, "learning_rate": 0.0019986034352281025, "loss": 0.4123, "step": 2686 }, { "epoch": 0.004766046176391581, "grad_norm": 0.33984375, "learning_rate": 0.001998600118833504, "loss": 0.2106, "step": 2688 }, { "epoch": 0.004769592341701397, "grad_norm": 0.48828125, "learning_rate": 0.0019985967985089504, "loss": 0.3059, "step": 2690 }, { "epoch": 0.004773138507011212, "grad_norm": 1.921875, "learning_rate": 0.0019985934742544564, "loss": 0.3459, "step": 2692 }, { "epoch": 0.004776684672321028, "grad_norm": 1.921875, "learning_rate": 0.0019985901460700365, "loss": 0.3537, "step": 2694 }, { "epoch": 0.004780230837630842, "grad_norm": 0.51171875, "learning_rate": 0.0019985868139557055, "loss": 0.3089, "step": 2696 }, { "epoch": 0.004783777002940658, "grad_norm": 1.015625, "learning_rate": 0.0019985834779114777, "loss": 0.4324, "step": 2698 }, { "epoch": 0.004787323168250473, "grad_norm": 1.8671875, "learning_rate": 0.0019985801379373675, "loss": 0.3557, "step": 2700 }, { "epoch": 0.004790869333560288, "grad_norm": 0.77734375, "learning_rate": 0.00199857679403339, "loss": 0.3592, "step": 2702 }, { "epoch": 0.004794415498870103, "grad_norm": 0.4921875, "learning_rate": 0.001998573446199559, "loss": 0.3282, "step": 2704 }, { "epoch": 0.004797961664179918, "grad_norm": 0.423828125, "learning_rate": 0.001998570094435891, "loss": 0.5474, "step": 2706 }, { "epoch": 0.004801507829489734, "grad_norm": 0.4375, "learning_rate": 0.0019985667387423978, "loss": 0.3031, "step": 2708 }, { "epoch": 0.004805053994799548, "grad_norm": 0.82421875, "learning_rate": 0.0019985633791190964, "loss": 0.3576, "step": 2710 }, { "epoch": 0.004808600160109364, "grad_norm": 1.328125, "learning_rate": 0.0019985600155660007, "loss": 0.4023, "step": 2712 }, { "epoch": 0.004812146325419179, "grad_norm": 0.98828125, "learning_rate": 0.001998556648083125, "loss": 0.2578, "step": 2714 }, { "epoch": 0.004815692490728994, "grad_norm": 0.4296875, "learning_rate": 0.001998553276670485, "loss": 0.2773, "step": 2716 }, { "epoch": 0.004819238656038809, "grad_norm": 0.31640625, "learning_rate": 0.0019985499013280947, "loss": 0.3085, "step": 2718 }, { "epoch": 0.004822784821348625, "grad_norm": 0.2890625, "learning_rate": 0.001998546522055969, "loss": 0.3116, "step": 2720 }, { "epoch": 0.004826330986658439, "grad_norm": 0.87109375, "learning_rate": 0.0019985431388541233, "loss": 0.4739, "step": 2722 }, { "epoch": 0.004829877151968255, "grad_norm": 0.69140625, "learning_rate": 0.001998539751722571, "loss": 0.3929, "step": 2724 }, { "epoch": 0.00483342331727807, "grad_norm": 1.2890625, "learning_rate": 0.0019985363606613285, "loss": 0.3235, "step": 2726 }, { "epoch": 0.0048369694825878854, "grad_norm": 0.765625, "learning_rate": 0.0019985329656704094, "loss": 0.2474, "step": 2728 }, { "epoch": 0.0048405156478977, "grad_norm": 0.6015625, "learning_rate": 0.001998529566749829, "loss": 0.354, "step": 2730 }, { "epoch": 0.004844061813207515, "grad_norm": 0.5703125, "learning_rate": 0.0019985261638996027, "loss": 0.2822, "step": 2732 }, { "epoch": 0.004847607978517331, "grad_norm": 0.5390625, "learning_rate": 0.0019985227571197445, "loss": 0.3357, "step": 2734 }, { "epoch": 0.004851154143827145, "grad_norm": 0.4296875, "learning_rate": 0.00199851934641027, "loss": 0.3095, "step": 2736 }, { "epoch": 0.004854700309136961, "grad_norm": 1.2578125, "learning_rate": 0.0019985159317711934, "loss": 0.554, "step": 2738 }, { "epoch": 0.004858246474446776, "grad_norm": 0.306640625, "learning_rate": 0.0019985125132025304, "loss": 0.3431, "step": 2740 }, { "epoch": 0.004861792639756591, "grad_norm": 0.6328125, "learning_rate": 0.001998509090704295, "loss": 0.3463, "step": 2742 }, { "epoch": 0.004865338805066406, "grad_norm": 0.302734375, "learning_rate": 0.001998505664276503, "loss": 0.298, "step": 2744 }, { "epoch": 0.004868884970376222, "grad_norm": 0.498046875, "learning_rate": 0.0019985022339191697, "loss": 0.2978, "step": 2746 }, { "epoch": 0.004872431135686037, "grad_norm": 0.74609375, "learning_rate": 0.0019984987996323087, "loss": 0.3427, "step": 2748 }, { "epoch": 0.004875977300995852, "grad_norm": 0.310546875, "learning_rate": 0.0019984953614159365, "loss": 0.3363, "step": 2750 }, { "epoch": 0.004879523466305667, "grad_norm": 0.2314453125, "learning_rate": 0.0019984919192700674, "loss": 0.2652, "step": 2752 }, { "epoch": 0.0048830696316154824, "grad_norm": 1.0625, "learning_rate": 0.0019984884731947166, "loss": 0.3352, "step": 2754 }, { "epoch": 0.004886615796925297, "grad_norm": 0.4921875, "learning_rate": 0.001998485023189899, "loss": 0.3254, "step": 2756 }, { "epoch": 0.004890161962235112, "grad_norm": 0.97265625, "learning_rate": 0.0019984815692556295, "loss": 0.4867, "step": 2758 }, { "epoch": 0.004893708127544928, "grad_norm": 0.765625, "learning_rate": 0.001998478111391924, "loss": 0.4473, "step": 2760 }, { "epoch": 0.004897254292854743, "grad_norm": 0.296875, "learning_rate": 0.0019984746495987967, "loss": 0.3472, "step": 2762 }, { "epoch": 0.004900800458164558, "grad_norm": 0.98046875, "learning_rate": 0.0019984711838762635, "loss": 0.4279, "step": 2764 }, { "epoch": 0.004904346623474373, "grad_norm": 3.09375, "learning_rate": 0.0019984677142243393, "loss": 0.3315, "step": 2766 }, { "epoch": 0.004907892788784189, "grad_norm": 0.734375, "learning_rate": 0.001998464240643039, "loss": 0.5489, "step": 2768 }, { "epoch": 0.004911438954094003, "grad_norm": 0.62890625, "learning_rate": 0.001998460763132378, "loss": 0.3425, "step": 2770 }, { "epoch": 0.004914985119403819, "grad_norm": 0.44140625, "learning_rate": 0.0019984572816923716, "loss": 0.4468, "step": 2772 }, { "epoch": 0.004918531284713634, "grad_norm": 0.3125, "learning_rate": 0.0019984537963230347, "loss": 0.2923, "step": 2774 }, { "epoch": 0.004922077450023449, "grad_norm": 0.83203125, "learning_rate": 0.001998450307024383, "loss": 0.3882, "step": 2776 }, { "epoch": 0.004925623615333264, "grad_norm": 0.78125, "learning_rate": 0.001998446813796432, "loss": 0.4952, "step": 2778 }, { "epoch": 0.004929169780643079, "grad_norm": 0.59375, "learning_rate": 0.0019984433166391957, "loss": 0.3895, "step": 2780 }, { "epoch": 0.004932715945952895, "grad_norm": 0.2890625, "learning_rate": 0.0019984398155526903, "loss": 0.2522, "step": 2782 }, { "epoch": 0.004936262111262709, "grad_norm": 0.330078125, "learning_rate": 0.001998436310536932, "loss": 0.2946, "step": 2784 }, { "epoch": 0.004939808276572525, "grad_norm": 0.72265625, "learning_rate": 0.001998432801591934, "loss": 0.3808, "step": 2786 }, { "epoch": 0.00494335444188234, "grad_norm": 0.4375, "learning_rate": 0.0019984292887177133, "loss": 0.4355, "step": 2788 }, { "epoch": 0.004946900607192155, "grad_norm": 1.984375, "learning_rate": 0.0019984257719142845, "loss": 0.4189, "step": 2790 }, { "epoch": 0.00495044677250197, "grad_norm": 0.6171875, "learning_rate": 0.0019984222511816633, "loss": 0.3627, "step": 2792 }, { "epoch": 0.004953992937811786, "grad_norm": 0.859375, "learning_rate": 0.001998418726519865, "loss": 0.3463, "step": 2794 }, { "epoch": 0.004957539103121601, "grad_norm": 0.37109375, "learning_rate": 0.0019984151979289054, "loss": 0.343, "step": 2796 }, { "epoch": 0.004961085268431416, "grad_norm": 1.1328125, "learning_rate": 0.0019984116654087995, "loss": 0.3639, "step": 2798 }, { "epoch": 0.004964631433741231, "grad_norm": 0.404296875, "learning_rate": 0.0019984081289595628, "loss": 0.3211, "step": 2800 }, { "epoch": 0.0049681775990510465, "grad_norm": 0.81640625, "learning_rate": 0.0019984045885812107, "loss": 0.4205, "step": 2802 }, { "epoch": 0.004971723764360861, "grad_norm": 0.4453125, "learning_rate": 0.0019984010442737586, "loss": 0.2752, "step": 2804 }, { "epoch": 0.004975269929670676, "grad_norm": 0.671875, "learning_rate": 0.0019983974960372224, "loss": 0.4363, "step": 2806 }, { "epoch": 0.004978816094980492, "grad_norm": 0.455078125, "learning_rate": 0.001998393943871618, "loss": 0.3035, "step": 2808 }, { "epoch": 0.004982362260290306, "grad_norm": 0.4609375, "learning_rate": 0.001998390387776959, "loss": 0.3992, "step": 2810 }, { "epoch": 0.004985908425600122, "grad_norm": 0.59765625, "learning_rate": 0.0019983868277532635, "loss": 0.4628, "step": 2812 }, { "epoch": 0.004989454590909937, "grad_norm": 0.9453125, "learning_rate": 0.001998383263800545, "loss": 0.3996, "step": 2814 }, { "epoch": 0.004993000756219753, "grad_norm": 0.255859375, "learning_rate": 0.0019983796959188206, "loss": 0.3718, "step": 2816 }, { "epoch": 0.004996546921529567, "grad_norm": 3.203125, "learning_rate": 0.001998376124108105, "loss": 0.4216, "step": 2818 }, { "epoch": 0.005000093086839383, "grad_norm": 1.2578125, "learning_rate": 0.001998372548368414, "loss": 0.3556, "step": 2820 }, { "epoch": 0.005003639252149198, "grad_norm": 0.26171875, "learning_rate": 0.001998368968699763, "loss": 0.2456, "step": 2822 }, { "epoch": 0.005007185417459013, "grad_norm": 1.7265625, "learning_rate": 0.0019983653851021687, "loss": 0.4176, "step": 2824 }, { "epoch": 0.005010731582768828, "grad_norm": 0.60546875, "learning_rate": 0.0019983617975756454, "loss": 0.3417, "step": 2826 }, { "epoch": 0.0050142777480786434, "grad_norm": 0.275390625, "learning_rate": 0.00199835820612021, "loss": 0.2935, "step": 2828 }, { "epoch": 0.005017823913388459, "grad_norm": 0.2470703125, "learning_rate": 0.001998354610735877, "loss": 0.2535, "step": 2830 }, { "epoch": 0.005021370078698273, "grad_norm": 0.6875, "learning_rate": 0.0019983510114226634, "loss": 0.2801, "step": 2832 }, { "epoch": 0.005024916244008089, "grad_norm": 0.38671875, "learning_rate": 0.001998347408180584, "loss": 0.2713, "step": 2834 }, { "epoch": 0.005028462409317904, "grad_norm": 0.54296875, "learning_rate": 0.001998343801009655, "loss": 0.3337, "step": 2836 }, { "epoch": 0.005032008574627719, "grad_norm": 0.33203125, "learning_rate": 0.001998340189909892, "loss": 0.2395, "step": 2838 }, { "epoch": 0.005035554739937534, "grad_norm": 0.388671875, "learning_rate": 0.0019983365748813104, "loss": 0.3079, "step": 2840 }, { "epoch": 0.00503910090524735, "grad_norm": 0.4765625, "learning_rate": 0.001998332955923927, "loss": 0.4155, "step": 2842 }, { "epoch": 0.005042647070557164, "grad_norm": 0.470703125, "learning_rate": 0.0019983293330377567, "loss": 0.3849, "step": 2844 }, { "epoch": 0.00504619323586698, "grad_norm": 0.373046875, "learning_rate": 0.001998325706222816, "loss": 0.3271, "step": 2846 }, { "epoch": 0.005049739401176795, "grad_norm": 0.7421875, "learning_rate": 0.00199832207547912, "loss": 0.2931, "step": 2848 }, { "epoch": 0.0050532855664866105, "grad_norm": 0.466796875, "learning_rate": 0.0019983184408066857, "loss": 0.3996, "step": 2850 }, { "epoch": 0.005056831731796425, "grad_norm": 0.63671875, "learning_rate": 0.001998314802205528, "loss": 0.2881, "step": 2852 }, { "epoch": 0.0050603778971062404, "grad_norm": 0.427734375, "learning_rate": 0.001998311159675663, "loss": 0.2577, "step": 2854 }, { "epoch": 0.005063924062416056, "grad_norm": 0.5625, "learning_rate": 0.0019983075132171068, "loss": 0.3623, "step": 2856 }, { "epoch": 0.00506747022772587, "grad_norm": 0.2294921875, "learning_rate": 0.0019983038628298756, "loss": 0.2591, "step": 2858 }, { "epoch": 0.005071016393035686, "grad_norm": 1.046875, "learning_rate": 0.001998300208513985, "loss": 0.3839, "step": 2860 }, { "epoch": 0.005074562558345501, "grad_norm": 0.267578125, "learning_rate": 0.001998296550269451, "loss": 0.3262, "step": 2862 }, { "epoch": 0.005078108723655317, "grad_norm": 0.283203125, "learning_rate": 0.0019982928880962897, "loss": 0.3527, "step": 2864 }, { "epoch": 0.005081654888965131, "grad_norm": 0.4375, "learning_rate": 0.0019982892219945173, "loss": 0.3176, "step": 2866 }, { "epoch": 0.005085201054274947, "grad_norm": 0.248046875, "learning_rate": 0.0019982855519641496, "loss": 0.2781, "step": 2868 }, { "epoch": 0.005088747219584762, "grad_norm": 0.306640625, "learning_rate": 0.0019982818780052026, "loss": 0.2864, "step": 2870 }, { "epoch": 0.005092293384894577, "grad_norm": 0.953125, "learning_rate": 0.0019982782001176924, "loss": 0.3155, "step": 2872 }, { "epoch": 0.005095839550204392, "grad_norm": 0.6015625, "learning_rate": 0.0019982745183016347, "loss": 0.4162, "step": 2874 }, { "epoch": 0.0050993857155142075, "grad_norm": 0.640625, "learning_rate": 0.001998270832557047, "loss": 0.3198, "step": 2876 }, { "epoch": 0.005102931880824022, "grad_norm": 0.546875, "learning_rate": 0.001998267142883944, "loss": 0.287, "step": 2878 }, { "epoch": 0.0051064780461338374, "grad_norm": 0.251953125, "learning_rate": 0.001998263449282342, "loss": 0.2977, "step": 2880 }, { "epoch": 0.005110024211443653, "grad_norm": 0.89453125, "learning_rate": 0.0019982597517522573, "loss": 0.321, "step": 2882 }, { "epoch": 0.005113570376753468, "grad_norm": 0.380859375, "learning_rate": 0.001998256050293707, "loss": 0.4265, "step": 2884 }, { "epoch": 0.005117116542063283, "grad_norm": 0.8203125, "learning_rate": 0.001998252344906706, "loss": 0.3992, "step": 2886 }, { "epoch": 0.005120662707373098, "grad_norm": 0.5, "learning_rate": 0.0019982486355912712, "loss": 0.3643, "step": 2888 }, { "epoch": 0.005124208872682914, "grad_norm": 0.7109375, "learning_rate": 0.0019982449223474186, "loss": 0.3402, "step": 2890 }, { "epoch": 0.005127755037992728, "grad_norm": 0.5703125, "learning_rate": 0.001998241205175164, "loss": 0.5101, "step": 2892 }, { "epoch": 0.005131301203302544, "grad_norm": 0.578125, "learning_rate": 0.001998237484074525, "loss": 0.3634, "step": 2894 }, { "epoch": 0.005134847368612359, "grad_norm": 0.58984375, "learning_rate": 0.0019982337590455164, "loss": 0.3343, "step": 2896 }, { "epoch": 0.0051383935339221745, "grad_norm": 0.40234375, "learning_rate": 0.001998230030088155, "loss": 0.4453, "step": 2898 }, { "epoch": 0.005141939699231989, "grad_norm": 1.2265625, "learning_rate": 0.0019982262972024576, "loss": 0.395, "step": 2900 }, { "epoch": 0.0051454858645418045, "grad_norm": 1.4921875, "learning_rate": 0.00199822256038844, "loss": 0.3722, "step": 2902 }, { "epoch": 0.00514903202985162, "grad_norm": 0.3046875, "learning_rate": 0.0019982188196461187, "loss": 0.3089, "step": 2904 }, { "epoch": 0.005152578195161434, "grad_norm": 0.6796875, "learning_rate": 0.00199821507497551, "loss": 0.3687, "step": 2906 }, { "epoch": 0.00515612436047125, "grad_norm": 0.373046875, "learning_rate": 0.0019982113263766303, "loss": 0.2594, "step": 2908 }, { "epoch": 0.005159670525781065, "grad_norm": 0.796875, "learning_rate": 0.001998207573849496, "loss": 0.4331, "step": 2910 }, { "epoch": 0.00516321669109088, "grad_norm": 0.220703125, "learning_rate": 0.0019982038173941234, "loss": 0.2379, "step": 2912 }, { "epoch": 0.005166762856400695, "grad_norm": 0.546875, "learning_rate": 0.001998200057010529, "loss": 0.395, "step": 2914 }, { "epoch": 0.005170309021710511, "grad_norm": 1.046875, "learning_rate": 0.0019981962926987292, "loss": 0.3564, "step": 2916 }, { "epoch": 0.005173855187020326, "grad_norm": 0.353515625, "learning_rate": 0.001998192524458741, "loss": 0.3461, "step": 2918 }, { "epoch": 0.005177401352330141, "grad_norm": 0.25390625, "learning_rate": 0.0019981887522905802, "loss": 0.298, "step": 2920 }, { "epoch": 0.005180947517639956, "grad_norm": 0.4375, "learning_rate": 0.0019981849761942635, "loss": 0.2868, "step": 2922 }, { "epoch": 0.0051844936829497715, "grad_norm": 0.890625, "learning_rate": 0.0019981811961698077, "loss": 0.3366, "step": 2924 }, { "epoch": 0.005188039848259586, "grad_norm": 0.349609375, "learning_rate": 0.001998177412217229, "loss": 0.4207, "step": 2926 }, { "epoch": 0.0051915860135694014, "grad_norm": 0.361328125, "learning_rate": 0.0019981736243365436, "loss": 0.2418, "step": 2928 }, { "epoch": 0.005195132178879217, "grad_norm": 0.52734375, "learning_rate": 0.0019981698325277687, "loss": 0.3548, "step": 2930 }, { "epoch": 0.005198678344189032, "grad_norm": 0.953125, "learning_rate": 0.0019981660367909206, "loss": 0.505, "step": 2932 }, { "epoch": 0.005202224509498847, "grad_norm": 2.6875, "learning_rate": 0.0019981622371260162, "loss": 0.3292, "step": 2934 }, { "epoch": 0.005205770674808662, "grad_norm": 0.59375, "learning_rate": 0.001998158433533072, "loss": 0.3489, "step": 2936 }, { "epoch": 0.005209316840118478, "grad_norm": 0.75, "learning_rate": 0.0019981546260121044, "loss": 0.3117, "step": 2938 }, { "epoch": 0.005212863005428292, "grad_norm": 1.0234375, "learning_rate": 0.00199815081456313, "loss": 0.4501, "step": 2940 }, { "epoch": 0.005216409170738108, "grad_norm": 0.314453125, "learning_rate": 0.001998146999186166, "loss": 0.2799, "step": 2942 }, { "epoch": 0.005219955336047923, "grad_norm": 0.49609375, "learning_rate": 0.0019981431798812284, "loss": 0.2491, "step": 2944 }, { "epoch": 0.005223501501357738, "grad_norm": 0.94921875, "learning_rate": 0.001998139356648334, "loss": 0.3046, "step": 2946 }, { "epoch": 0.005227047666667553, "grad_norm": 0.37890625, "learning_rate": 0.0019981355294875, "loss": 0.3474, "step": 2948 }, { "epoch": 0.0052305938319773685, "grad_norm": 0.60546875, "learning_rate": 0.001998131698398743, "loss": 0.3788, "step": 2950 }, { "epoch": 0.005234139997287184, "grad_norm": 0.5078125, "learning_rate": 0.0019981278633820795, "loss": 0.3671, "step": 2952 }, { "epoch": 0.0052376861625969984, "grad_norm": 0.35546875, "learning_rate": 0.0019981240244375266, "loss": 0.3066, "step": 2954 }, { "epoch": 0.005241232327906814, "grad_norm": 1.1328125, "learning_rate": 0.001998120181565101, "loss": 0.4496, "step": 2956 }, { "epoch": 0.005244778493216629, "grad_norm": 0.3515625, "learning_rate": 0.001998116334764819, "loss": 0.2959, "step": 2958 }, { "epoch": 0.005248324658526444, "grad_norm": 0.48828125, "learning_rate": 0.0019981124840366985, "loss": 0.3182, "step": 2960 }, { "epoch": 0.005251870823836259, "grad_norm": 1.125, "learning_rate": 0.001998108629380755, "loss": 0.3046, "step": 2962 }, { "epoch": 0.005255416989146075, "grad_norm": 0.97265625, "learning_rate": 0.001998104770797006, "loss": 0.3488, "step": 2964 }, { "epoch": 0.00525896315445589, "grad_norm": 0.380859375, "learning_rate": 0.0019981009082854695, "loss": 0.2864, "step": 2966 }, { "epoch": 0.005262509319765705, "grad_norm": 0.27734375, "learning_rate": 0.00199809704184616, "loss": 0.3037, "step": 2968 }, { "epoch": 0.00526605548507552, "grad_norm": 0.318359375, "learning_rate": 0.0019980931714790964, "loss": 0.3639, "step": 2970 }, { "epoch": 0.0052696016503853355, "grad_norm": 0.69921875, "learning_rate": 0.0019980892971842947, "loss": 0.3356, "step": 2972 }, { "epoch": 0.00527314781569515, "grad_norm": 1.7734375, "learning_rate": 0.0019980854189617725, "loss": 0.4104, "step": 2974 }, { "epoch": 0.0052766939810049655, "grad_norm": 0.4921875, "learning_rate": 0.001998081536811546, "loss": 0.2835, "step": 2976 }, { "epoch": 0.005280240146314781, "grad_norm": 0.2353515625, "learning_rate": 0.0019980776507336324, "loss": 0.2296, "step": 2978 }, { "epoch": 0.0052837863116245954, "grad_norm": 0.265625, "learning_rate": 0.0019980737607280486, "loss": 0.3178, "step": 2980 }, { "epoch": 0.005287332476934411, "grad_norm": 1.03125, "learning_rate": 0.0019980698667948125, "loss": 0.2921, "step": 2982 }, { "epoch": 0.005290878642244226, "grad_norm": 0.4609375, "learning_rate": 0.00199806596893394, "loss": 0.3058, "step": 2984 }, { "epoch": 0.005294424807554042, "grad_norm": 0.291015625, "learning_rate": 0.001998062067145449, "loss": 0.354, "step": 2986 }, { "epoch": 0.005297970972863856, "grad_norm": 0.388671875, "learning_rate": 0.0019980581614293556, "loss": 0.2857, "step": 2988 }, { "epoch": 0.005301517138173672, "grad_norm": 0.296875, "learning_rate": 0.001998054251785678, "loss": 0.2665, "step": 2990 }, { "epoch": 0.005305063303483487, "grad_norm": 0.828125, "learning_rate": 0.0019980503382144325, "loss": 0.3523, "step": 2992 }, { "epoch": 0.005308609468793302, "grad_norm": 0.54296875, "learning_rate": 0.0019980464207156367, "loss": 0.2823, "step": 2994 }, { "epoch": 0.005312155634103117, "grad_norm": 0.4375, "learning_rate": 0.001998042499289307, "loss": 0.2396, "step": 2996 }, { "epoch": 0.0053157017994129325, "grad_norm": 0.2890625, "learning_rate": 0.001998038573935462, "loss": 0.3331, "step": 2998 }, { "epoch": 0.005319247964722748, "grad_norm": 0.2255859375, "learning_rate": 0.001998034644654117, "loss": 0.3299, "step": 3000 }, { "epoch": 0.0053227941300325625, "grad_norm": 0.44140625, "learning_rate": 0.0019980307114452903, "loss": 0.2866, "step": 3002 }, { "epoch": 0.005326340295342378, "grad_norm": 0.369140625, "learning_rate": 0.0019980267743089993, "loss": 0.2317, "step": 3004 }, { "epoch": 0.005329886460652193, "grad_norm": 0.310546875, "learning_rate": 0.0019980228332452605, "loss": 0.3696, "step": 3006 }, { "epoch": 0.005333432625962008, "grad_norm": 0.44140625, "learning_rate": 0.001998018888254091, "loss": 0.3672, "step": 3008 }, { "epoch": 0.005336978791271823, "grad_norm": 0.3984375, "learning_rate": 0.0019980149393355093, "loss": 0.3108, "step": 3010 }, { "epoch": 0.005340524956581639, "grad_norm": 0.3828125, "learning_rate": 0.0019980109864895317, "loss": 0.7665, "step": 3012 }, { "epoch": 0.005344071121891453, "grad_norm": 3.28125, "learning_rate": 0.0019980070297161753, "loss": 0.6211, "step": 3014 }, { "epoch": 0.005347617287201269, "grad_norm": 0.337890625, "learning_rate": 0.001998003069015458, "loss": 0.3374, "step": 3016 }, { "epoch": 0.005351163452511084, "grad_norm": 0.361328125, "learning_rate": 0.0019979991043873975, "loss": 0.4111, "step": 3018 }, { "epoch": 0.0053547096178208995, "grad_norm": 0.58984375, "learning_rate": 0.0019979951358320095, "loss": 0.3796, "step": 3020 }, { "epoch": 0.005358255783130714, "grad_norm": 0.478515625, "learning_rate": 0.001997991163349313, "loss": 0.3128, "step": 3022 }, { "epoch": 0.0053618019484405295, "grad_norm": 0.86328125, "learning_rate": 0.001997987186939325, "loss": 0.5398, "step": 3024 }, { "epoch": 0.005365348113750345, "grad_norm": 0.51171875, "learning_rate": 0.001997983206602062, "loss": 0.3081, "step": 3026 }, { "epoch": 0.0053688942790601595, "grad_norm": 0.52734375, "learning_rate": 0.0019979792223375425, "loss": 0.3187, "step": 3028 }, { "epoch": 0.005372440444369975, "grad_norm": 2.3125, "learning_rate": 0.0019979752341457834, "loss": 0.5208, "step": 3030 }, { "epoch": 0.00537598660967979, "grad_norm": 0.380859375, "learning_rate": 0.001997971242026802, "loss": 0.3044, "step": 3032 }, { "epoch": 0.005379532774989606, "grad_norm": 0.265625, "learning_rate": 0.0019979672459806163, "loss": 0.301, "step": 3034 }, { "epoch": 0.00538307894029942, "grad_norm": 1.7734375, "learning_rate": 0.0019979632460072434, "loss": 0.5821, "step": 3036 }, { "epoch": 0.005386625105609236, "grad_norm": 0.2373046875, "learning_rate": 0.0019979592421067007, "loss": 0.2779, "step": 3038 }, { "epoch": 0.005390171270919051, "grad_norm": 0.38671875, "learning_rate": 0.001997955234279006, "loss": 0.297, "step": 3040 }, { "epoch": 0.005393717436228866, "grad_norm": 0.98828125, "learning_rate": 0.0019979512225241766, "loss": 0.3257, "step": 3042 }, { "epoch": 0.005397263601538681, "grad_norm": 0.228515625, "learning_rate": 0.0019979472068422303, "loss": 0.3346, "step": 3044 }, { "epoch": 0.0054008097668484965, "grad_norm": 0.474609375, "learning_rate": 0.0019979431872331845, "loss": 0.3538, "step": 3046 }, { "epoch": 0.005404355932158311, "grad_norm": 0.421875, "learning_rate": 0.0019979391636970566, "loss": 0.2874, "step": 3048 }, { "epoch": 0.0054079020974681265, "grad_norm": 0.31640625, "learning_rate": 0.0019979351362338646, "loss": 0.3763, "step": 3050 }, { "epoch": 0.005411448262777942, "grad_norm": 0.484375, "learning_rate": 0.001997931104843626, "loss": 0.4304, "step": 3052 }, { "epoch": 0.005414994428087757, "grad_norm": 0.1826171875, "learning_rate": 0.001997927069526358, "loss": 0.3974, "step": 3054 }, { "epoch": 0.005418540593397572, "grad_norm": 0.66796875, "learning_rate": 0.0019979230302820785, "loss": 0.396, "step": 3056 }, { "epoch": 0.005422086758707387, "grad_norm": 2.40625, "learning_rate": 0.0019979189871108054, "loss": 0.262, "step": 3058 }, { "epoch": 0.005425632924017203, "grad_norm": 0.416015625, "learning_rate": 0.0019979149400125564, "loss": 0.3403, "step": 3060 }, { "epoch": 0.005429179089327017, "grad_norm": 0.32421875, "learning_rate": 0.001997910888987349, "loss": 0.407, "step": 3062 }, { "epoch": 0.005432725254636833, "grad_norm": 2.859375, "learning_rate": 0.001997906834035201, "loss": 0.3674, "step": 3064 }, { "epoch": 0.005436271419946648, "grad_norm": 0.427734375, "learning_rate": 0.0019979027751561296, "loss": 0.376, "step": 3066 }, { "epoch": 0.0054398175852564635, "grad_norm": 0.2412109375, "learning_rate": 0.0019978987123501534, "loss": 0.3365, "step": 3068 }, { "epoch": 0.005443363750566278, "grad_norm": 0.53515625, "learning_rate": 0.00199789464561729, "loss": 0.3183, "step": 3070 }, { "epoch": 0.0054469099158760935, "grad_norm": 1.2578125, "learning_rate": 0.001997890574957557, "loss": 0.3286, "step": 3072 }, { "epoch": 0.005450456081185909, "grad_norm": 0.40625, "learning_rate": 0.001997886500370971, "loss": 0.2991, "step": 3074 }, { "epoch": 0.0054540022464957235, "grad_norm": 0.255859375, "learning_rate": 0.0019978824218575522, "loss": 0.2764, "step": 3076 }, { "epoch": 0.005457548411805539, "grad_norm": 0.86328125, "learning_rate": 0.001997878339417317, "loss": 0.3843, "step": 3078 }, { "epoch": 0.005461094577115354, "grad_norm": 2.578125, "learning_rate": 0.001997874253050283, "loss": 0.3829, "step": 3080 }, { "epoch": 0.005464640742425169, "grad_norm": 0.3203125, "learning_rate": 0.0019978701627564694, "loss": 0.2504, "step": 3082 }, { "epoch": 0.005468186907734984, "grad_norm": 4.03125, "learning_rate": 0.0019978660685358927, "loss": 0.372, "step": 3084 }, { "epoch": 0.0054717330730448, "grad_norm": 0.435546875, "learning_rate": 0.0019978619703885712, "loss": 0.2704, "step": 3086 }, { "epoch": 0.005475279238354615, "grad_norm": 2.328125, "learning_rate": 0.0019978578683145236, "loss": 0.3223, "step": 3088 }, { "epoch": 0.00547882540366443, "grad_norm": 0.439453125, "learning_rate": 0.0019978537623137668, "loss": 0.2812, "step": 3090 }, { "epoch": 0.005482371568974245, "grad_norm": 0.318359375, "learning_rate": 0.0019978496523863193, "loss": 0.2967, "step": 3092 }, { "epoch": 0.0054859177342840605, "grad_norm": 0.439453125, "learning_rate": 0.001997845538532199, "loss": 0.2953, "step": 3094 }, { "epoch": 0.005489463899593875, "grad_norm": 0.6875, "learning_rate": 0.001997841420751424, "loss": 0.222, "step": 3096 }, { "epoch": 0.0054930100649036905, "grad_norm": 0.48046875, "learning_rate": 0.001997837299044012, "loss": 0.3668, "step": 3098 }, { "epoch": 0.005496556230213506, "grad_norm": 0.3515625, "learning_rate": 0.001997833173409981, "loss": 0.3087, "step": 3100 }, { "epoch": 0.005500102395523321, "grad_norm": 0.443359375, "learning_rate": 0.001997829043849349, "loss": 0.3671, "step": 3102 }, { "epoch": 0.005503648560833136, "grad_norm": 0.306640625, "learning_rate": 0.001997824910362135, "loss": 0.2986, "step": 3104 }, { "epoch": 0.005507194726142951, "grad_norm": 0.369140625, "learning_rate": 0.001997820772948356, "loss": 0.3142, "step": 3106 }, { "epoch": 0.005510740891452767, "grad_norm": 1.2578125, "learning_rate": 0.0019978166316080305, "loss": 0.324, "step": 3108 }, { "epoch": 0.005514287056762581, "grad_norm": 0.5703125, "learning_rate": 0.0019978124863411764, "loss": 0.2928, "step": 3110 }, { "epoch": 0.005517833222072397, "grad_norm": 0.494140625, "learning_rate": 0.001997808337147812, "loss": 0.2638, "step": 3112 }, { "epoch": 0.005521379387382212, "grad_norm": 0.63671875, "learning_rate": 0.001997804184027956, "loss": 0.3343, "step": 3114 }, { "epoch": 0.005524925552692027, "grad_norm": 0.263671875, "learning_rate": 0.0019978000269816254, "loss": 0.2812, "step": 3116 }, { "epoch": 0.005528471718001842, "grad_norm": 0.349609375, "learning_rate": 0.001997795866008839, "loss": 0.3444, "step": 3118 }, { "epoch": 0.0055320178833116575, "grad_norm": 0.3125, "learning_rate": 0.0019977917011096153, "loss": 0.2748, "step": 3120 }, { "epoch": 0.005535564048621473, "grad_norm": 0.53515625, "learning_rate": 0.0019977875322839717, "loss": 0.3103, "step": 3122 }, { "epoch": 0.0055391102139312875, "grad_norm": 0.294921875, "learning_rate": 0.001997783359531927, "loss": 0.2796, "step": 3124 }, { "epoch": 0.005542656379241103, "grad_norm": 1.2578125, "learning_rate": 0.0019977791828535, "loss": 0.3919, "step": 3126 }, { "epoch": 0.005546202544550918, "grad_norm": 1.3515625, "learning_rate": 0.001997775002248708, "loss": 0.5206, "step": 3128 }, { "epoch": 0.005549748709860733, "grad_norm": 1.6171875, "learning_rate": 0.0019977708177175697, "loss": 0.3797, "step": 3130 }, { "epoch": 0.005553294875170548, "grad_norm": 0.8984375, "learning_rate": 0.001997766629260103, "loss": 0.2913, "step": 3132 }, { "epoch": 0.005556841040480364, "grad_norm": 0.224609375, "learning_rate": 0.001997762436876327, "loss": 0.2958, "step": 3134 }, { "epoch": 0.005560387205790179, "grad_norm": 0.5078125, "learning_rate": 0.0019977582405662593, "loss": 0.3557, "step": 3136 }, { "epoch": 0.005563933371099994, "grad_norm": 0.875, "learning_rate": 0.0019977540403299182, "loss": 0.5528, "step": 3138 }, { "epoch": 0.005567479536409809, "grad_norm": 0.625, "learning_rate": 0.001997749836167323, "loss": 0.4444, "step": 3140 }, { "epoch": 0.0055710257017196245, "grad_norm": 3.328125, "learning_rate": 0.0019977456280784915, "loss": 0.4219, "step": 3142 }, { "epoch": 0.005574571867029439, "grad_norm": 0.98828125, "learning_rate": 0.001997741416063442, "loss": 0.2498, "step": 3144 }, { "epoch": 0.0055781180323392545, "grad_norm": 0.318359375, "learning_rate": 0.0019977372001221926, "loss": 0.3011, "step": 3146 }, { "epoch": 0.00558166419764907, "grad_norm": 1.0859375, "learning_rate": 0.0019977329802547627, "loss": 0.2365, "step": 3148 }, { "epoch": 0.0055852103629588845, "grad_norm": 2.203125, "learning_rate": 0.00199772875646117, "loss": 0.6416, "step": 3150 }, { "epoch": 0.0055887565282687, "grad_norm": 0.640625, "learning_rate": 0.0019977245287414328, "loss": 0.3236, "step": 3152 }, { "epoch": 0.005592302693578515, "grad_norm": 0.3046875, "learning_rate": 0.0019977202970955705, "loss": 0.282, "step": 3154 }, { "epoch": 0.005595848858888331, "grad_norm": 3.296875, "learning_rate": 0.001997716061523601, "loss": 0.4333, "step": 3156 }, { "epoch": 0.005599395024198145, "grad_norm": 1.515625, "learning_rate": 0.001997711822025543, "loss": 0.4657, "step": 3158 }, { "epoch": 0.005602941189507961, "grad_norm": 0.50390625, "learning_rate": 0.0019977075786014147, "loss": 0.2869, "step": 3160 }, { "epoch": 0.005606487354817776, "grad_norm": 0.6875, "learning_rate": 0.0019977033312512348, "loss": 0.2784, "step": 3162 }, { "epoch": 0.005610033520127591, "grad_norm": 0.78515625, "learning_rate": 0.0019976990799750217, "loss": 0.3246, "step": 3164 }, { "epoch": 0.005613579685437406, "grad_norm": 0.3828125, "learning_rate": 0.001997694824772795, "loss": 0.2738, "step": 3166 }, { "epoch": 0.0056171258507472215, "grad_norm": 0.6484375, "learning_rate": 0.0019976905656445727, "loss": 0.2708, "step": 3168 }, { "epoch": 0.005620672016057037, "grad_norm": 1.34375, "learning_rate": 0.0019976863025903723, "loss": 0.4467, "step": 3170 }, { "epoch": 0.0056242181813668515, "grad_norm": 0.1953125, "learning_rate": 0.001997682035610214, "loss": 0.2979, "step": 3172 }, { "epoch": 0.005627764346676667, "grad_norm": 0.1767578125, "learning_rate": 0.0019976777647041162, "loss": 0.207, "step": 3174 }, { "epoch": 0.005631310511986482, "grad_norm": 0.58203125, "learning_rate": 0.001997673489872097, "loss": 0.3519, "step": 3176 }, { "epoch": 0.005634856677296297, "grad_norm": 0.474609375, "learning_rate": 0.0019976692111141753, "loss": 0.2799, "step": 3178 }, { "epoch": 0.005638402842606112, "grad_norm": 0.357421875, "learning_rate": 0.0019976649284303705, "loss": 0.2251, "step": 3180 }, { "epoch": 0.005641949007915928, "grad_norm": 0.2197265625, "learning_rate": 0.0019976606418207004, "loss": 0.3615, "step": 3182 }, { "epoch": 0.005645495173225742, "grad_norm": 0.3828125, "learning_rate": 0.0019976563512851837, "loss": 0.342, "step": 3184 }, { "epoch": 0.005649041338535558, "grad_norm": 1.5234375, "learning_rate": 0.00199765205682384, "loss": 0.3305, "step": 3186 }, { "epoch": 0.005652587503845373, "grad_norm": 0.69921875, "learning_rate": 0.001997647758436687, "loss": 0.2968, "step": 3188 }, { "epoch": 0.0056561336691551885, "grad_norm": 0.4375, "learning_rate": 0.0019976434561237446, "loss": 0.3484, "step": 3190 }, { "epoch": 0.005659679834465003, "grad_norm": 0.404296875, "learning_rate": 0.001997639149885031, "loss": 0.2637, "step": 3192 }, { "epoch": 0.0056632259997748185, "grad_norm": 0.35546875, "learning_rate": 0.001997634839720565, "loss": 0.3236, "step": 3194 }, { "epoch": 0.005666772165084634, "grad_norm": 0.41015625, "learning_rate": 0.0019976305256303663, "loss": 0.3569, "step": 3196 }, { "epoch": 0.0056703183303944485, "grad_norm": 4.46875, "learning_rate": 0.0019976262076144523, "loss": 0.2971, "step": 3198 }, { "epoch": 0.005673864495704264, "grad_norm": 1.3515625, "learning_rate": 0.001997621885672843, "loss": 0.3112, "step": 3200 }, { "epoch": 0.005677410661014079, "grad_norm": 1.6953125, "learning_rate": 0.001997617559805557, "loss": 0.5851, "step": 3202 }, { "epoch": 0.005680956826323895, "grad_norm": 1.5625, "learning_rate": 0.001997613230012613, "loss": 0.3979, "step": 3204 }, { "epoch": 0.005684502991633709, "grad_norm": 0.765625, "learning_rate": 0.001997608896294031, "loss": 0.4134, "step": 3206 }, { "epoch": 0.005688049156943525, "grad_norm": 0.5390625, "learning_rate": 0.001997604558649828, "loss": 0.3292, "step": 3208 }, { "epoch": 0.00569159532225334, "grad_norm": 0.41015625, "learning_rate": 0.001997600217080024, "loss": 0.3304, "step": 3210 }, { "epoch": 0.005695141487563155, "grad_norm": 0.26953125, "learning_rate": 0.0019975958715846387, "loss": 0.3288, "step": 3212 }, { "epoch": 0.00569868765287297, "grad_norm": 0.796875, "learning_rate": 0.0019975915221636903, "loss": 0.3124, "step": 3214 }, { "epoch": 0.0057022338181827855, "grad_norm": 0.263671875, "learning_rate": 0.001997587168817198, "loss": 0.2574, "step": 3216 }, { "epoch": 0.0057057799834926, "grad_norm": 0.27734375, "learning_rate": 0.0019975828115451804, "loss": 0.3256, "step": 3218 }, { "epoch": 0.0057093261488024155, "grad_norm": 2.1875, "learning_rate": 0.0019975784503476575, "loss": 0.3901, "step": 3220 }, { "epoch": 0.005712872314112231, "grad_norm": 2.765625, "learning_rate": 0.0019975740852246474, "loss": 0.3555, "step": 3222 }, { "epoch": 0.005716418479422046, "grad_norm": 0.53515625, "learning_rate": 0.00199756971617617, "loss": 0.3528, "step": 3224 }, { "epoch": 0.005719964644731861, "grad_norm": 1.625, "learning_rate": 0.0019975653432022437, "loss": 0.3324, "step": 3226 }, { "epoch": 0.005723510810041676, "grad_norm": 0.396484375, "learning_rate": 0.001997560966302888, "loss": 0.2853, "step": 3228 }, { "epoch": 0.005727056975351492, "grad_norm": 1.6953125, "learning_rate": 0.001997556585478122, "loss": 0.3712, "step": 3230 }, { "epoch": 0.005730603140661306, "grad_norm": 0.3828125, "learning_rate": 0.001997552200727965, "loss": 0.3184, "step": 3232 }, { "epoch": 0.005734149305971122, "grad_norm": 0.390625, "learning_rate": 0.001997547812052436, "loss": 0.3259, "step": 3234 }, { "epoch": 0.005737695471280937, "grad_norm": 0.58203125, "learning_rate": 0.0019975434194515543, "loss": 0.3406, "step": 3236 }, { "epoch": 0.0057412416365907526, "grad_norm": 0.61328125, "learning_rate": 0.001997539022925339, "loss": 0.2699, "step": 3238 }, { "epoch": 0.005744787801900567, "grad_norm": 0.5, "learning_rate": 0.001997534622473809, "loss": 0.2815, "step": 3240 }, { "epoch": 0.0057483339672103825, "grad_norm": 0.52734375, "learning_rate": 0.0019975302180969844, "loss": 0.2411, "step": 3242 }, { "epoch": 0.005751880132520198, "grad_norm": 0.2421875, "learning_rate": 0.001997525809794884, "loss": 0.3577, "step": 3244 }, { "epoch": 0.0057554262978300125, "grad_norm": 0.46484375, "learning_rate": 0.0019975213975675266, "loss": 0.3507, "step": 3246 }, { "epoch": 0.005758972463139828, "grad_norm": 0.361328125, "learning_rate": 0.0019975169814149324, "loss": 0.2854, "step": 3248 }, { "epoch": 0.005762518628449643, "grad_norm": 0.60546875, "learning_rate": 0.00199751256133712, "loss": 0.359, "step": 3250 }, { "epoch": 0.005766064793759458, "grad_norm": 0.455078125, "learning_rate": 0.001997508137334109, "loss": 0.2627, "step": 3252 }, { "epoch": 0.005769610959069273, "grad_norm": 0.3203125, "learning_rate": 0.001997503709405919, "loss": 0.3197, "step": 3254 }, { "epoch": 0.005773157124379089, "grad_norm": 0.384765625, "learning_rate": 0.001997499277552569, "loss": 0.2707, "step": 3256 }, { "epoch": 0.005776703289688904, "grad_norm": 0.8125, "learning_rate": 0.0019974948417740782, "loss": 0.2898, "step": 3258 }, { "epoch": 0.005780249454998719, "grad_norm": 0.33984375, "learning_rate": 0.001997490402070466, "loss": 0.2489, "step": 3260 }, { "epoch": 0.005783795620308534, "grad_norm": 6.1875, "learning_rate": 0.001997485958441753, "loss": 0.3204, "step": 3262 }, { "epoch": 0.0057873417856183496, "grad_norm": 0.400390625, "learning_rate": 0.001997481510887957, "loss": 0.2608, "step": 3264 }, { "epoch": 0.005790887950928164, "grad_norm": 0.296875, "learning_rate": 0.001997477059409099, "loss": 0.3455, "step": 3266 }, { "epoch": 0.0057944341162379795, "grad_norm": 0.55078125, "learning_rate": 0.001997472604005197, "loss": 0.2793, "step": 3268 }, { "epoch": 0.005797980281547795, "grad_norm": 0.25390625, "learning_rate": 0.001997468144676271, "loss": 0.2685, "step": 3270 }, { "epoch": 0.00580152644685761, "grad_norm": 0.67578125, "learning_rate": 0.0019974636814223414, "loss": 0.302, "step": 3272 }, { "epoch": 0.005805072612167425, "grad_norm": 0.330078125, "learning_rate": 0.0019974592142434264, "loss": 0.3065, "step": 3274 }, { "epoch": 0.00580861877747724, "grad_norm": 0.419921875, "learning_rate": 0.001997454743139546, "loss": 0.2545, "step": 3276 }, { "epoch": 0.005812164942787056, "grad_norm": 0.37890625, "learning_rate": 0.0019974502681107203, "loss": 0.2476, "step": 3278 }, { "epoch": 0.00581571110809687, "grad_norm": 0.294921875, "learning_rate": 0.001997445789156968, "loss": 0.3519, "step": 3280 }, { "epoch": 0.005819257273406686, "grad_norm": 0.287109375, "learning_rate": 0.0019974413062783095, "loss": 0.3906, "step": 3282 }, { "epoch": 0.005822803438716501, "grad_norm": 0.41796875, "learning_rate": 0.0019974368194747637, "loss": 0.2944, "step": 3284 }, { "epoch": 0.005826349604026316, "grad_norm": 0.439453125, "learning_rate": 0.001997432328746351, "loss": 0.2693, "step": 3286 }, { "epoch": 0.005829895769336131, "grad_norm": 0.322265625, "learning_rate": 0.00199742783409309, "loss": 0.3522, "step": 3288 }, { "epoch": 0.0058334419346459465, "grad_norm": 0.3125, "learning_rate": 0.0019974233355150015, "loss": 0.3537, "step": 3290 }, { "epoch": 0.005836988099955762, "grad_norm": 0.341796875, "learning_rate": 0.0019974188330121045, "loss": 0.2557, "step": 3292 }, { "epoch": 0.0058405342652655765, "grad_norm": 0.6953125, "learning_rate": 0.0019974143265844187, "loss": 0.2975, "step": 3294 }, { "epoch": 0.005844080430575392, "grad_norm": 0.52734375, "learning_rate": 0.0019974098162319634, "loss": 0.2782, "step": 3296 }, { "epoch": 0.005847626595885207, "grad_norm": 0.6015625, "learning_rate": 0.001997405301954759, "loss": 0.3137, "step": 3298 }, { "epoch": 0.005851172761195022, "grad_norm": 2.28125, "learning_rate": 0.001997400783752826, "loss": 0.5785, "step": 3300 }, { "epoch": 0.005854718926504837, "grad_norm": 0.65234375, "learning_rate": 0.0019973962616261823, "loss": 0.3086, "step": 3302 }, { "epoch": 0.005858265091814653, "grad_norm": 0.447265625, "learning_rate": 0.001997391735574849, "loss": 0.3966, "step": 3304 }, { "epoch": 0.005861811257124468, "grad_norm": 0.314453125, "learning_rate": 0.0019973872055988454, "loss": 0.2754, "step": 3306 }, { "epoch": 0.005865357422434283, "grad_norm": 0.390625, "learning_rate": 0.001997382671698192, "loss": 0.3123, "step": 3308 }, { "epoch": 0.005868903587744098, "grad_norm": 0.375, "learning_rate": 0.0019973781338729073, "loss": 0.6683, "step": 3310 }, { "epoch": 0.005872449753053914, "grad_norm": 1.0859375, "learning_rate": 0.0019973735921230123, "loss": 0.3723, "step": 3312 }, { "epoch": 0.005875995918363728, "grad_norm": 0.546875, "learning_rate": 0.001997369046448526, "loss": 0.2932, "step": 3314 }, { "epoch": 0.0058795420836735435, "grad_norm": 0.6640625, "learning_rate": 0.0019973644968494693, "loss": 0.4134, "step": 3316 }, { "epoch": 0.005883088248983359, "grad_norm": 0.69921875, "learning_rate": 0.001997359943325861, "loss": 0.4393, "step": 3318 }, { "epoch": 0.0058866344142931735, "grad_norm": 0.4375, "learning_rate": 0.001997355385877722, "loss": 0.3295, "step": 3320 }, { "epoch": 0.005890180579602989, "grad_norm": 0.322265625, "learning_rate": 0.0019973508245050716, "loss": 0.3216, "step": 3322 }, { "epoch": 0.005893726744912804, "grad_norm": 0.5, "learning_rate": 0.00199734625920793, "loss": 0.3491, "step": 3324 }, { "epoch": 0.00589727291022262, "grad_norm": 0.263671875, "learning_rate": 0.0019973416899863173, "loss": 0.2758, "step": 3326 }, { "epoch": 0.005900819075532434, "grad_norm": 0.353515625, "learning_rate": 0.0019973371168402533, "loss": 0.275, "step": 3328 }, { "epoch": 0.00590436524084225, "grad_norm": 0.59375, "learning_rate": 0.0019973325397697576, "loss": 0.5971, "step": 3330 }, { "epoch": 0.005907911406152065, "grad_norm": 0.38671875, "learning_rate": 0.001997327958774851, "loss": 0.3286, "step": 3332 }, { "epoch": 0.00591145757146188, "grad_norm": 0.6875, "learning_rate": 0.0019973233738555525, "loss": 0.3469, "step": 3334 }, { "epoch": 0.005915003736771695, "grad_norm": 0.404296875, "learning_rate": 0.0019973187850118833, "loss": 0.3213, "step": 3336 }, { "epoch": 0.0059185499020815106, "grad_norm": 1.71875, "learning_rate": 0.001997314192243863, "loss": 0.4108, "step": 3338 }, { "epoch": 0.005922096067391325, "grad_norm": 0.46875, "learning_rate": 0.0019973095955515114, "loss": 0.2918, "step": 3340 }, { "epoch": 0.0059256422327011405, "grad_norm": 0.40234375, "learning_rate": 0.001997304994934849, "loss": 0.2826, "step": 3342 }, { "epoch": 0.005929188398010956, "grad_norm": 0.283203125, "learning_rate": 0.0019973003903938956, "loss": 0.305, "step": 3344 }, { "epoch": 0.005932734563320771, "grad_norm": 0.330078125, "learning_rate": 0.0019972957819286716, "loss": 0.288, "step": 3346 }, { "epoch": 0.005936280728630586, "grad_norm": 0.28515625, "learning_rate": 0.0019972911695391973, "loss": 0.2985, "step": 3348 }, { "epoch": 0.005939826893940401, "grad_norm": 4.03125, "learning_rate": 0.0019972865532254924, "loss": 0.3797, "step": 3350 }, { "epoch": 0.005943373059250217, "grad_norm": 0.349609375, "learning_rate": 0.0019972819329875774, "loss": 0.2655, "step": 3352 }, { "epoch": 0.005946919224560031, "grad_norm": 0.2734375, "learning_rate": 0.001997277308825472, "loss": 0.2296, "step": 3354 }, { "epoch": 0.005950465389869847, "grad_norm": 0.6796875, "learning_rate": 0.0019972726807391977, "loss": 0.3465, "step": 3356 }, { "epoch": 0.005954011555179662, "grad_norm": 0.396484375, "learning_rate": 0.001997268048728773, "loss": 0.268, "step": 3358 }, { "epoch": 0.005957557720489478, "grad_norm": 0.28515625, "learning_rate": 0.00199726341279422, "loss": 0.2914, "step": 3360 }, { "epoch": 0.005961103885799292, "grad_norm": 0.330078125, "learning_rate": 0.001997258772935557, "loss": 0.2261, "step": 3362 }, { "epoch": 0.0059646500511091076, "grad_norm": 0.55078125, "learning_rate": 0.001997254129152806, "loss": 0.3199, "step": 3364 }, { "epoch": 0.005968196216418923, "grad_norm": 0.53515625, "learning_rate": 0.0019972494814459864, "loss": 0.2794, "step": 3366 }, { "epoch": 0.0059717423817287375, "grad_norm": 0.416015625, "learning_rate": 0.001997244829815119, "loss": 0.3171, "step": 3368 }, { "epoch": 0.005975288547038553, "grad_norm": 0.34765625, "learning_rate": 0.0019972401742602234, "loss": 0.3427, "step": 3370 }, { "epoch": 0.005978834712348368, "grad_norm": 0.6171875, "learning_rate": 0.001997235514781321, "loss": 0.2825, "step": 3372 }, { "epoch": 0.005982380877658183, "grad_norm": 0.69921875, "learning_rate": 0.0019972308513784313, "loss": 0.2863, "step": 3374 }, { "epoch": 0.005985927042967998, "grad_norm": 0.265625, "learning_rate": 0.001997226184051575, "loss": 0.2753, "step": 3376 }, { "epoch": 0.005989473208277814, "grad_norm": 0.53515625, "learning_rate": 0.0019972215128007727, "loss": 0.2723, "step": 3378 }, { "epoch": 0.005993019373587629, "grad_norm": 2.828125, "learning_rate": 0.0019972168376260445, "loss": 0.4631, "step": 3380 }, { "epoch": 0.005996565538897444, "grad_norm": 2.296875, "learning_rate": 0.0019972121585274116, "loss": 0.4052, "step": 3382 }, { "epoch": 0.006000111704207259, "grad_norm": 0.515625, "learning_rate": 0.0019972074755048932, "loss": 0.3341, "step": 3384 }, { "epoch": 0.006003657869517075, "grad_norm": 0.275390625, "learning_rate": 0.0019972027885585106, "loss": 0.2874, "step": 3386 }, { "epoch": 0.006007204034826889, "grad_norm": 0.39453125, "learning_rate": 0.0019971980976882845, "loss": 0.2982, "step": 3388 }, { "epoch": 0.0060107502001367046, "grad_norm": 0.33984375, "learning_rate": 0.001997193402894235, "loss": 0.2858, "step": 3390 }, { "epoch": 0.00601429636544652, "grad_norm": 0.4140625, "learning_rate": 0.001997188704176382, "loss": 0.2857, "step": 3392 }, { "epoch": 0.006017842530756335, "grad_norm": 0.353515625, "learning_rate": 0.0019971840015347475, "loss": 0.2693, "step": 3394 }, { "epoch": 0.00602138869606615, "grad_norm": 0.45703125, "learning_rate": 0.001997179294969351, "loss": 0.2423, "step": 3396 }, { "epoch": 0.006024934861375965, "grad_norm": 0.416015625, "learning_rate": 0.001997174584480214, "loss": 0.2608, "step": 3398 }, { "epoch": 0.006028481026685781, "grad_norm": 0.27734375, "learning_rate": 0.001997169870067356, "loss": 0.2955, "step": 3400 }, { "epoch": 0.006032027191995595, "grad_norm": 0.91796875, "learning_rate": 0.001997165151730798, "loss": 0.3376, "step": 3402 }, { "epoch": 0.006035573357305411, "grad_norm": 0.306640625, "learning_rate": 0.0019971604294705607, "loss": 0.3282, "step": 3404 }, { "epoch": 0.006039119522615226, "grad_norm": 0.2470703125, "learning_rate": 0.001997155703286665, "loss": 0.2785, "step": 3406 }, { "epoch": 0.006042665687925041, "grad_norm": 0.396484375, "learning_rate": 0.0019971509731791315, "loss": 0.3063, "step": 3408 }, { "epoch": 0.006046211853234856, "grad_norm": 0.38671875, "learning_rate": 0.0019971462391479805, "loss": 0.2914, "step": 3410 }, { "epoch": 0.006049758018544672, "grad_norm": 2.375, "learning_rate": 0.001997141501193233, "loss": 0.5058, "step": 3412 }, { "epoch": 0.006053304183854487, "grad_norm": 0.4296875, "learning_rate": 0.00199713675931491, "loss": 0.3155, "step": 3414 }, { "epoch": 0.0060568503491643015, "grad_norm": 0.333984375, "learning_rate": 0.001997132013513032, "loss": 0.3373, "step": 3416 }, { "epoch": 0.006060396514474117, "grad_norm": 0.359375, "learning_rate": 0.0019971272637876194, "loss": 0.3214, "step": 3418 }, { "epoch": 0.006063942679783932, "grad_norm": 0.375, "learning_rate": 0.001997122510138693, "loss": 0.4549, "step": 3420 }, { "epoch": 0.006067488845093747, "grad_norm": 0.7890625, "learning_rate": 0.0019971177525662746, "loss": 0.3052, "step": 3422 }, { "epoch": 0.006071035010403562, "grad_norm": 0.451171875, "learning_rate": 0.0019971129910703834, "loss": 0.3794, "step": 3424 }, { "epoch": 0.006074581175713378, "grad_norm": 0.5625, "learning_rate": 0.0019971082256510417, "loss": 0.2553, "step": 3426 }, { "epoch": 0.006078127341023193, "grad_norm": 0.82421875, "learning_rate": 0.00199710345630827, "loss": 0.2827, "step": 3428 }, { "epoch": 0.006081673506333008, "grad_norm": 0.51953125, "learning_rate": 0.0019970986830420883, "loss": 0.2815, "step": 3430 }, { "epoch": 0.006085219671642823, "grad_norm": 0.322265625, "learning_rate": 0.001997093905852518, "loss": 0.2938, "step": 3432 }, { "epoch": 0.006088765836952639, "grad_norm": 1.4375, "learning_rate": 0.0019970891247395803, "loss": 0.2535, "step": 3434 }, { "epoch": 0.006092312002262453, "grad_norm": 0.33984375, "learning_rate": 0.0019970843397032955, "loss": 0.2621, "step": 3436 }, { "epoch": 0.006095858167572269, "grad_norm": 0.61328125, "learning_rate": 0.0019970795507436856, "loss": 0.3499, "step": 3438 }, { "epoch": 0.006099404332882084, "grad_norm": 0.828125, "learning_rate": 0.0019970747578607704, "loss": 0.3129, "step": 3440 }, { "epoch": 0.0061029504981918985, "grad_norm": 0.369140625, "learning_rate": 0.001997069961054571, "loss": 0.3029, "step": 3442 }, { "epoch": 0.006106496663501714, "grad_norm": 0.333984375, "learning_rate": 0.001997065160325109, "loss": 0.2923, "step": 3444 }, { "epoch": 0.006110042828811529, "grad_norm": 0.65625, "learning_rate": 0.0019970603556724053, "loss": 0.3006, "step": 3446 }, { "epoch": 0.006113588994121345, "grad_norm": 0.66015625, "learning_rate": 0.0019970555470964803, "loss": 0.3404, "step": 3448 }, { "epoch": 0.006117135159431159, "grad_norm": 0.48828125, "learning_rate": 0.001997050734597356, "loss": 0.2949, "step": 3450 }, { "epoch": 0.006120681324740975, "grad_norm": 0.640625, "learning_rate": 0.001997045918175052, "loss": 0.3468, "step": 3452 }, { "epoch": 0.00612422749005079, "grad_norm": 1.78125, "learning_rate": 0.001997041097829591, "loss": 0.4974, "step": 3454 }, { "epoch": 0.006127773655360605, "grad_norm": 0.318359375, "learning_rate": 0.001997036273560992, "loss": 0.2368, "step": 3456 }, { "epoch": 0.00613131982067042, "grad_norm": 0.9296875, "learning_rate": 0.001997031445369279, "loss": 0.3351, "step": 3458 }, { "epoch": 0.006134865985980236, "grad_norm": 1.40625, "learning_rate": 0.0019970266132544705, "loss": 0.2688, "step": 3460 }, { "epoch": 0.006138412151290051, "grad_norm": 0.248046875, "learning_rate": 0.001997021777216589, "loss": 0.2481, "step": 3462 }, { "epoch": 0.0061419583165998656, "grad_norm": 1.0703125, "learning_rate": 0.0019970169372556554, "loss": 0.3255, "step": 3464 }, { "epoch": 0.006145504481909681, "grad_norm": 0.46875, "learning_rate": 0.001997012093371691, "loss": 0.3865, "step": 3466 }, { "epoch": 0.006149050647219496, "grad_norm": 0.380859375, "learning_rate": 0.001997007245564716, "loss": 0.2771, "step": 3468 }, { "epoch": 0.006152596812529311, "grad_norm": 9.25, "learning_rate": 0.001997002393834753, "loss": 0.3283, "step": 3470 }, { "epoch": 0.006156142977839126, "grad_norm": 0.51953125, "learning_rate": 0.001996997538181822, "loss": 0.3164, "step": 3472 }, { "epoch": 0.006159689143148942, "grad_norm": 0.44140625, "learning_rate": 0.0019969926786059453, "loss": 0.3194, "step": 3474 }, { "epoch": 0.006163235308458756, "grad_norm": 1.25, "learning_rate": 0.0019969878151071432, "loss": 0.2693, "step": 3476 }, { "epoch": 0.006166781473768572, "grad_norm": 0.35546875, "learning_rate": 0.001996982947685438, "loss": 0.2238, "step": 3478 }, { "epoch": 0.006170327639078387, "grad_norm": 0.8125, "learning_rate": 0.00199697807634085, "loss": 0.3017, "step": 3480 }, { "epoch": 0.006173873804388203, "grad_norm": 1.8203125, "learning_rate": 0.001996973201073401, "loss": 0.3029, "step": 3482 }, { "epoch": 0.006177419969698017, "grad_norm": 0.26953125, "learning_rate": 0.001996968321883112, "loss": 0.3359, "step": 3484 }, { "epoch": 0.006180966135007833, "grad_norm": 0.6328125, "learning_rate": 0.001996963438770005, "loss": 0.2878, "step": 3486 }, { "epoch": 0.006184512300317648, "grad_norm": 0.875, "learning_rate": 0.0019969585517341007, "loss": 0.321, "step": 3488 }, { "epoch": 0.0061880584656274626, "grad_norm": 0.40234375, "learning_rate": 0.0019969536607754215, "loss": 0.2913, "step": 3490 }, { "epoch": 0.006191604630937278, "grad_norm": 0.34765625, "learning_rate": 0.0019969487658939872, "loss": 0.2863, "step": 3492 }, { "epoch": 0.006195150796247093, "grad_norm": 0.77734375, "learning_rate": 0.00199694386708982, "loss": 0.5539, "step": 3494 }, { "epoch": 0.006198696961556909, "grad_norm": 0.56640625, "learning_rate": 0.0019969389643629413, "loss": 0.2408, "step": 3496 }, { "epoch": 0.006202243126866723, "grad_norm": 0.515625, "learning_rate": 0.001996934057713373, "loss": 0.2953, "step": 3498 }, { "epoch": 0.006205789292176539, "grad_norm": 0.66015625, "learning_rate": 0.0019969291471411354, "loss": 0.3432, "step": 3500 }, { "epoch": 0.006209335457486354, "grad_norm": 0.357421875, "learning_rate": 0.0019969242326462514, "loss": 0.342, "step": 3502 }, { "epoch": 0.006212881622796169, "grad_norm": 0.326171875, "learning_rate": 0.0019969193142287418, "loss": 0.2239, "step": 3504 }, { "epoch": 0.006216427788105984, "grad_norm": 0.25390625, "learning_rate": 0.0019969143918886277, "loss": 0.2682, "step": 3506 }, { "epoch": 0.0062199739534158, "grad_norm": 0.53125, "learning_rate": 0.0019969094656259313, "loss": 0.3301, "step": 3508 }, { "epoch": 0.006223520118725614, "grad_norm": 0.51953125, "learning_rate": 0.0019969045354406734, "loss": 0.2506, "step": 3510 }, { "epoch": 0.00622706628403543, "grad_norm": 0.49609375, "learning_rate": 0.001996899601332877, "loss": 0.344, "step": 3512 }, { "epoch": 0.006230612449345245, "grad_norm": 0.55859375, "learning_rate": 0.001996894663302562, "loss": 0.3055, "step": 3514 }, { "epoch": 0.00623415861465506, "grad_norm": 0.70703125, "learning_rate": 0.0019968897213497507, "loss": 0.3131, "step": 3516 }, { "epoch": 0.006237704779964875, "grad_norm": 2.0625, "learning_rate": 0.001996884775474465, "loss": 0.298, "step": 3518 }, { "epoch": 0.00624125094527469, "grad_norm": 0.60546875, "learning_rate": 0.0019968798256767262, "loss": 0.2768, "step": 3520 }, { "epoch": 0.006244797110584506, "grad_norm": 76.0, "learning_rate": 0.001996874871956556, "loss": 0.4224, "step": 3522 }, { "epoch": 0.00624834327589432, "grad_norm": 0.51953125, "learning_rate": 0.001996869914313976, "loss": 0.2167, "step": 3524 }, { "epoch": 0.006251889441204136, "grad_norm": 0.3125, "learning_rate": 0.0019968649527490083, "loss": 0.2859, "step": 3526 }, { "epoch": 0.006255435606513951, "grad_norm": 0.4765625, "learning_rate": 0.001996859987261674, "loss": 0.2549, "step": 3528 }, { "epoch": 0.006258981771823767, "grad_norm": 1.546875, "learning_rate": 0.001996855017851995, "loss": 0.3599, "step": 3530 }, { "epoch": 0.006262527937133581, "grad_norm": 0.29296875, "learning_rate": 0.0019968500445199933, "loss": 0.2769, "step": 3532 }, { "epoch": 0.006266074102443397, "grad_norm": 0.234375, "learning_rate": 0.0019968450672656905, "loss": 0.3375, "step": 3534 }, { "epoch": 0.006269620267753212, "grad_norm": 0.6484375, "learning_rate": 0.0019968400860891082, "loss": 0.307, "step": 3536 }, { "epoch": 0.006273166433063027, "grad_norm": 0.5078125, "learning_rate": 0.001996835100990268, "loss": 0.4704, "step": 3538 }, { "epoch": 0.006276712598372842, "grad_norm": 0.78125, "learning_rate": 0.0019968301119691924, "loss": 0.2594, "step": 3540 }, { "epoch": 0.006280258763682657, "grad_norm": 0.51171875, "learning_rate": 0.0019968251190259027, "loss": 0.2916, "step": 3542 }, { "epoch": 0.006283804928992472, "grad_norm": 1.671875, "learning_rate": 0.001996820122160421, "loss": 0.664, "step": 3544 }, { "epoch": 0.006287351094302287, "grad_norm": 2.078125, "learning_rate": 0.001996815121372769, "loss": 0.304, "step": 3546 }, { "epoch": 0.006290897259612103, "grad_norm": 0.57421875, "learning_rate": 0.0019968101166629683, "loss": 0.2742, "step": 3548 }, { "epoch": 0.006294443424921918, "grad_norm": 15.6875, "learning_rate": 0.001996805108031041, "loss": 0.3182, "step": 3550 }, { "epoch": 0.006297989590231733, "grad_norm": 1.6875, "learning_rate": 0.00199680009547701, "loss": 0.4785, "step": 3552 }, { "epoch": 0.006301535755541548, "grad_norm": 2.375, "learning_rate": 0.0019967950790008952, "loss": 0.4236, "step": 3554 }, { "epoch": 0.006305081920851364, "grad_norm": 0.5390625, "learning_rate": 0.0019967900586027204, "loss": 0.2976, "step": 3556 }, { "epoch": 0.006308628086161178, "grad_norm": 2.90625, "learning_rate": 0.0019967850342825066, "loss": 0.3757, "step": 3558 }, { "epoch": 0.006312174251470994, "grad_norm": 0.328125, "learning_rate": 0.0019967800060402756, "loss": 0.1831, "step": 3560 }, { "epoch": 0.006315720416780809, "grad_norm": 0.56640625, "learning_rate": 0.0019967749738760503, "loss": 0.2324, "step": 3562 }, { "epoch": 0.006319266582090624, "grad_norm": 0.3671875, "learning_rate": 0.0019967699377898517, "loss": 0.3373, "step": 3564 }, { "epoch": 0.006322812747400439, "grad_norm": 1.984375, "learning_rate": 0.0019967648977817025, "loss": 0.2453, "step": 3566 }, { "epoch": 0.006326358912710254, "grad_norm": 0.416015625, "learning_rate": 0.0019967598538516247, "loss": 0.3425, "step": 3568 }, { "epoch": 0.00632990507802007, "grad_norm": 0.40234375, "learning_rate": 0.00199675480599964, "loss": 0.2809, "step": 3570 }, { "epoch": 0.006333451243329884, "grad_norm": 1.3203125, "learning_rate": 0.0019967497542257707, "loss": 0.3548, "step": 3572 }, { "epoch": 0.0063369974086397, "grad_norm": 0.478515625, "learning_rate": 0.001996744698530039, "loss": 0.3291, "step": 3574 }, { "epoch": 0.006340543573949515, "grad_norm": 0.50390625, "learning_rate": 0.0019967396389124667, "loss": 0.2642, "step": 3576 }, { "epoch": 0.00634408973925933, "grad_norm": 0.8828125, "learning_rate": 0.001996734575373076, "loss": 0.2874, "step": 3578 }, { "epoch": 0.006347635904569145, "grad_norm": 0.43359375, "learning_rate": 0.0019967295079118897, "loss": 0.3389, "step": 3580 }, { "epoch": 0.006351182069878961, "grad_norm": 0.87109375, "learning_rate": 0.0019967244365289294, "loss": 0.5039, "step": 3582 }, { "epoch": 0.006354728235188776, "grad_norm": 1.2421875, "learning_rate": 0.001996719361224217, "loss": 0.3724, "step": 3584 }, { "epoch": 0.006358274400498591, "grad_norm": 4.96875, "learning_rate": 0.0019967142819977742, "loss": 0.7329, "step": 3586 }, { "epoch": 0.006361820565808406, "grad_norm": 0.357421875, "learning_rate": 0.0019967091988496253, "loss": 0.2947, "step": 3588 }, { "epoch": 0.006365366731118221, "grad_norm": 0.373046875, "learning_rate": 0.0019967041117797905, "loss": 0.5445, "step": 3590 }, { "epoch": 0.006368912896428036, "grad_norm": 0.58984375, "learning_rate": 0.001996699020788293, "loss": 0.2339, "step": 3592 }, { "epoch": 0.006372459061737851, "grad_norm": 0.490234375, "learning_rate": 0.0019966939258751547, "loss": 0.3004, "step": 3594 }, { "epoch": 0.006376005227047667, "grad_norm": 2.234375, "learning_rate": 0.0019966888270403983, "loss": 0.3147, "step": 3596 }, { "epoch": 0.006379551392357482, "grad_norm": 1.2578125, "learning_rate": 0.0019966837242840455, "loss": 0.3833, "step": 3598 }, { "epoch": 0.006383097557667297, "grad_norm": 0.73828125, "learning_rate": 0.0019966786176061195, "loss": 0.4519, "step": 3600 }, { "epoch": 0.006386643722977112, "grad_norm": 0.333984375, "learning_rate": 0.0019966735070066416, "loss": 0.2226, "step": 3602 }, { "epoch": 0.006390189888286928, "grad_norm": 0.70703125, "learning_rate": 0.0019966683924856348, "loss": 0.2774, "step": 3604 }, { "epoch": 0.006393736053596742, "grad_norm": 0.76171875, "learning_rate": 0.001996663274043121, "loss": 0.3601, "step": 3606 }, { "epoch": 0.006397282218906558, "grad_norm": 0.498046875, "learning_rate": 0.001996658151679123, "loss": 0.2697, "step": 3608 }, { "epoch": 0.006400828384216373, "grad_norm": 0.349609375, "learning_rate": 0.0019966530253936634, "loss": 0.2523, "step": 3610 }, { "epoch": 0.006404374549526188, "grad_norm": 2.875, "learning_rate": 0.001996647895186764, "loss": 0.3261, "step": 3612 }, { "epoch": 0.006407920714836003, "grad_norm": 0.478515625, "learning_rate": 0.001996642761058448, "loss": 0.2746, "step": 3614 }, { "epoch": 0.006411466880145818, "grad_norm": 0.27734375, "learning_rate": 0.001996637623008737, "loss": 0.2738, "step": 3616 }, { "epoch": 0.006415013045455634, "grad_norm": 1.1484375, "learning_rate": 0.0019966324810376536, "loss": 0.3113, "step": 3618 }, { "epoch": 0.006418559210765448, "grad_norm": 0.90234375, "learning_rate": 0.001996627335145221, "loss": 0.2843, "step": 3620 }, { "epoch": 0.006422105376075264, "grad_norm": 0.96875, "learning_rate": 0.001996622185331461, "loss": 0.2946, "step": 3622 }, { "epoch": 0.006425651541385079, "grad_norm": 1.734375, "learning_rate": 0.0019966170315963965, "loss": 0.3485, "step": 3624 }, { "epoch": 0.006429197706694894, "grad_norm": 2.046875, "learning_rate": 0.0019966118739400502, "loss": 0.4324, "step": 3626 }, { "epoch": 0.006432743872004709, "grad_norm": 0.4453125, "learning_rate": 0.001996606712362444, "loss": 0.2174, "step": 3628 }, { "epoch": 0.006436290037314525, "grad_norm": 1.5390625, "learning_rate": 0.001996601546863601, "loss": 0.3234, "step": 3630 }, { "epoch": 0.00643983620262434, "grad_norm": 0.451171875, "learning_rate": 0.0019965963774435437, "loss": 0.3029, "step": 3632 }, { "epoch": 0.006443382367934155, "grad_norm": 0.82421875, "learning_rate": 0.0019965912041022943, "loss": 0.2654, "step": 3634 }, { "epoch": 0.00644692853324397, "grad_norm": 0.53515625, "learning_rate": 0.001996586026839876, "loss": 0.2686, "step": 3636 }, { "epoch": 0.0064504746985537854, "grad_norm": 1.1796875, "learning_rate": 0.0019965808456563114, "loss": 0.2771, "step": 3638 }, { "epoch": 0.0064540208638636, "grad_norm": 0.73046875, "learning_rate": 0.001996575660551623, "loss": 0.415, "step": 3640 }, { "epoch": 0.006457567029173415, "grad_norm": 0.66015625, "learning_rate": 0.001996570471525833, "loss": 0.4476, "step": 3642 }, { "epoch": 0.006461113194483231, "grad_norm": 2.5, "learning_rate": 0.001996565278578965, "loss": 0.495, "step": 3644 }, { "epoch": 0.006464659359793045, "grad_norm": 0.55859375, "learning_rate": 0.001996560081711041, "loss": 0.2976, "step": 3646 }, { "epoch": 0.006468205525102861, "grad_norm": 1.0, "learning_rate": 0.001996554880922084, "loss": 0.2428, "step": 3648 }, { "epoch": 0.006471751690412676, "grad_norm": 0.90234375, "learning_rate": 0.001996549676212117, "loss": 0.5618, "step": 3650 }, { "epoch": 0.006475297855722492, "grad_norm": 0.396484375, "learning_rate": 0.0019965444675811624, "loss": 0.2521, "step": 3652 }, { "epoch": 0.006478844021032306, "grad_norm": 1.234375, "learning_rate": 0.001996539255029243, "loss": 0.3476, "step": 3654 }, { "epoch": 0.006482390186342122, "grad_norm": 0.625, "learning_rate": 0.0019965340385563815, "loss": 0.2919, "step": 3656 }, { "epoch": 0.006485936351651937, "grad_norm": 0.83203125, "learning_rate": 0.001996528818162601, "loss": 0.37, "step": 3658 }, { "epoch": 0.006489482516961752, "grad_norm": 0.578125, "learning_rate": 0.001996523593847924, "loss": 0.3794, "step": 3660 }, { "epoch": 0.006493028682271567, "grad_norm": 0.453125, "learning_rate": 0.001996518365612374, "loss": 0.2594, "step": 3662 }, { "epoch": 0.0064965748475813824, "grad_norm": 0.46484375, "learning_rate": 0.0019965131334559734, "loss": 0.4004, "step": 3664 }, { "epoch": 0.006500121012891198, "grad_norm": 0.7734375, "learning_rate": 0.001996507897378745, "loss": 0.321, "step": 3666 }, { "epoch": 0.006503667178201012, "grad_norm": 0.69140625, "learning_rate": 0.0019965026573807113, "loss": 0.3256, "step": 3668 }, { "epoch": 0.006507213343510828, "grad_norm": 0.5234375, "learning_rate": 0.001996497413461896, "loss": 0.2443, "step": 3670 }, { "epoch": 0.006510759508820643, "grad_norm": 1.6484375, "learning_rate": 0.0019964921656223226, "loss": 0.5052, "step": 3672 }, { "epoch": 0.006514305674130458, "grad_norm": 0.4140625, "learning_rate": 0.001996486913862012, "loss": 0.3542, "step": 3674 }, { "epoch": 0.006517851839440273, "grad_norm": 0.365234375, "learning_rate": 0.0019964816581809893, "loss": 0.2876, "step": 3676 }, { "epoch": 0.006521398004750089, "grad_norm": 0.404296875, "learning_rate": 0.0019964763985792764, "loss": 0.3216, "step": 3678 }, { "epoch": 0.006524944170059903, "grad_norm": 0.8515625, "learning_rate": 0.0019964711350568963, "loss": 0.3296, "step": 3680 }, { "epoch": 0.006528490335369719, "grad_norm": 0.287109375, "learning_rate": 0.001996465867613872, "loss": 0.2429, "step": 3682 }, { "epoch": 0.006532036500679534, "grad_norm": 0.71484375, "learning_rate": 0.001996460596250227, "loss": 0.2976, "step": 3684 }, { "epoch": 0.0065355826659893495, "grad_norm": 0.35546875, "learning_rate": 0.0019964553209659837, "loss": 0.247, "step": 3686 }, { "epoch": 0.006539128831299164, "grad_norm": 0.31640625, "learning_rate": 0.0019964500417611656, "loss": 0.2985, "step": 3688 }, { "epoch": 0.006542674996608979, "grad_norm": 0.427734375, "learning_rate": 0.001996444758635796, "loss": 0.3057, "step": 3690 }, { "epoch": 0.006546221161918795, "grad_norm": 1.859375, "learning_rate": 0.001996439471589898, "loss": 0.2865, "step": 3692 }, { "epoch": 0.006549767327228609, "grad_norm": 1.1796875, "learning_rate": 0.001996434180623494, "loss": 0.3899, "step": 3694 }, { "epoch": 0.006553313492538425, "grad_norm": 1.375, "learning_rate": 0.001996428885736608, "loss": 0.5637, "step": 3696 }, { "epoch": 0.00655685965784824, "grad_norm": 0.5234375, "learning_rate": 0.0019964235869292623, "loss": 0.331, "step": 3698 }, { "epoch": 0.006560405823158056, "grad_norm": 0.81640625, "learning_rate": 0.0019964182842014807, "loss": 0.2936, "step": 3700 }, { "epoch": 0.00656395198846787, "grad_norm": 0.46875, "learning_rate": 0.0019964129775532865, "loss": 0.2663, "step": 3702 }, { "epoch": 0.006567498153777686, "grad_norm": 0.98828125, "learning_rate": 0.0019964076669847022, "loss": 0.3022, "step": 3704 }, { "epoch": 0.006571044319087501, "grad_norm": 1.46875, "learning_rate": 0.0019964023524957518, "loss": 0.3109, "step": 3706 }, { "epoch": 0.006574590484397316, "grad_norm": 0.234375, "learning_rate": 0.001996397034086458, "loss": 0.3486, "step": 3708 }, { "epoch": 0.006578136649707131, "grad_norm": 0.369140625, "learning_rate": 0.001996391711756844, "loss": 0.3987, "step": 3710 }, { "epoch": 0.0065816828150169465, "grad_norm": 1.265625, "learning_rate": 0.001996386385506934, "loss": 0.3227, "step": 3712 }, { "epoch": 0.006585228980326761, "grad_norm": 0.36328125, "learning_rate": 0.00199638105533675, "loss": 0.3167, "step": 3714 }, { "epoch": 0.006588775145636576, "grad_norm": 0.494140625, "learning_rate": 0.0019963757212463165, "loss": 0.2572, "step": 3716 }, { "epoch": 0.006592321310946392, "grad_norm": 0.953125, "learning_rate": 0.0019963703832356562, "loss": 0.5886, "step": 3718 }, { "epoch": 0.006595867476256207, "grad_norm": 0.349609375, "learning_rate": 0.0019963650413047924, "loss": 0.2266, "step": 3720 }, { "epoch": 0.006599413641566022, "grad_norm": 1.40625, "learning_rate": 0.0019963596954537485, "loss": 0.4081, "step": 3722 }, { "epoch": 0.006602959806875837, "grad_norm": 0.5546875, "learning_rate": 0.001996354345682548, "loss": 0.2733, "step": 3724 }, { "epoch": 0.006606505972185653, "grad_norm": 0.96484375, "learning_rate": 0.0019963489919912142, "loss": 0.2669, "step": 3726 }, { "epoch": 0.006610052137495467, "grad_norm": 0.400390625, "learning_rate": 0.0019963436343797703, "loss": 0.2175, "step": 3728 }, { "epoch": 0.006613598302805283, "grad_norm": 0.251953125, "learning_rate": 0.001996338272848241, "loss": 0.2582, "step": 3730 }, { "epoch": 0.006617144468115098, "grad_norm": 0.359375, "learning_rate": 0.001996332907396648, "loss": 0.2588, "step": 3732 }, { "epoch": 0.0066206906334249135, "grad_norm": 0.412109375, "learning_rate": 0.001996327538025015, "loss": 0.2742, "step": 3734 }, { "epoch": 0.006624236798734728, "grad_norm": 0.55859375, "learning_rate": 0.0019963221647333667, "loss": 0.2768, "step": 3736 }, { "epoch": 0.0066277829640445434, "grad_norm": 0.361328125, "learning_rate": 0.001996316787521726, "loss": 0.2891, "step": 3738 }, { "epoch": 0.006631329129354359, "grad_norm": 0.765625, "learning_rate": 0.001996311406390116, "loss": 0.3709, "step": 3740 }, { "epoch": 0.006634875294664173, "grad_norm": 2.40625, "learning_rate": 0.0019963060213385605, "loss": 0.2232, "step": 3742 }, { "epoch": 0.006638421459973989, "grad_norm": 0.890625, "learning_rate": 0.0019963006323670835, "loss": 0.2206, "step": 3744 }, { "epoch": 0.006641967625283804, "grad_norm": 0.306640625, "learning_rate": 0.001996295239475708, "loss": 0.3767, "step": 3746 }, { "epoch": 0.006645513790593619, "grad_norm": 3.140625, "learning_rate": 0.0019962898426644574, "loss": 0.4518, "step": 3748 }, { "epoch": 0.006649059955903434, "grad_norm": 1.15625, "learning_rate": 0.001996284441933356, "loss": 0.368, "step": 3750 }, { "epoch": 0.00665260612121325, "grad_norm": 0.55859375, "learning_rate": 0.001996279037282427, "loss": 0.3004, "step": 3752 }, { "epoch": 0.006656152286523065, "grad_norm": 0.6640625, "learning_rate": 0.0019962736287116936, "loss": 0.3034, "step": 3754 }, { "epoch": 0.00665969845183288, "grad_norm": 0.6953125, "learning_rate": 0.001996268216221181, "loss": 0.4052, "step": 3756 }, { "epoch": 0.006663244617142695, "grad_norm": 0.59765625, "learning_rate": 0.0019962627998109115, "loss": 0.2305, "step": 3758 }, { "epoch": 0.0066667907824525105, "grad_norm": 1.765625, "learning_rate": 0.0019962573794809085, "loss": 0.3102, "step": 3760 }, { "epoch": 0.006670336947762325, "grad_norm": 0.87890625, "learning_rate": 0.0019962519552311968, "loss": 0.3272, "step": 3762 }, { "epoch": 0.0066738831130721404, "grad_norm": 0.7265625, "learning_rate": 0.0019962465270617997, "loss": 0.3258, "step": 3764 }, { "epoch": 0.006677429278381956, "grad_norm": 0.80078125, "learning_rate": 0.0019962410949727408, "loss": 0.2817, "step": 3766 }, { "epoch": 0.006680975443691771, "grad_norm": 0.3984375, "learning_rate": 0.0019962356589640438, "loss": 0.3349, "step": 3768 }, { "epoch": 0.006684521609001586, "grad_norm": 0.80859375, "learning_rate": 0.001996230219035733, "loss": 0.292, "step": 3770 }, { "epoch": 0.006688067774311401, "grad_norm": 0.77734375, "learning_rate": 0.0019962247751878315, "loss": 0.3389, "step": 3772 }, { "epoch": 0.006691613939621217, "grad_norm": 0.515625, "learning_rate": 0.001996219327420364, "loss": 0.3023, "step": 3774 }, { "epoch": 0.006695160104931031, "grad_norm": 0.80859375, "learning_rate": 0.0019962138757333527, "loss": 0.3233, "step": 3776 }, { "epoch": 0.006698706270240847, "grad_norm": 0.61328125, "learning_rate": 0.0019962084201268233, "loss": 0.3144, "step": 3778 }, { "epoch": 0.006702252435550662, "grad_norm": 0.478515625, "learning_rate": 0.0019962029606007984, "loss": 0.2663, "step": 3780 }, { "epoch": 0.006705798600860477, "grad_norm": 0.59375, "learning_rate": 0.0019961974971553025, "loss": 0.2852, "step": 3782 }, { "epoch": 0.006709344766170292, "grad_norm": 0.435546875, "learning_rate": 0.0019961920297903593, "loss": 0.2436, "step": 3784 }, { "epoch": 0.0067128909314801075, "grad_norm": 0.328125, "learning_rate": 0.001996186558505993, "loss": 0.4178, "step": 3786 }, { "epoch": 0.006716437096789923, "grad_norm": 0.373046875, "learning_rate": 0.0019961810833022267, "loss": 0.3111, "step": 3788 }, { "epoch": 0.006719983262099737, "grad_norm": 0.48046875, "learning_rate": 0.0019961756041790854, "loss": 0.2805, "step": 3790 }, { "epoch": 0.006723529427409553, "grad_norm": 0.3984375, "learning_rate": 0.0019961701211365927, "loss": 0.3611, "step": 3792 }, { "epoch": 0.006727075592719368, "grad_norm": 0.302734375, "learning_rate": 0.001996164634174772, "loss": 0.2097, "step": 3794 }, { "epoch": 0.006730621758029183, "grad_norm": 0.515625, "learning_rate": 0.0019961591432936477, "loss": 0.3033, "step": 3796 }, { "epoch": 0.006734167923338998, "grad_norm": 7.34375, "learning_rate": 0.0019961536484932444, "loss": 0.2526, "step": 3798 }, { "epoch": 0.006737714088648814, "grad_norm": 1.234375, "learning_rate": 0.001996148149773585, "loss": 0.2675, "step": 3800 }, { "epoch": 0.006741260253958629, "grad_norm": 0.462890625, "learning_rate": 0.0019961426471346946, "loss": 0.3236, "step": 3802 }, { "epoch": 0.006744806419268444, "grad_norm": 0.322265625, "learning_rate": 0.0019961371405765966, "loss": 0.2906, "step": 3804 }, { "epoch": 0.006748352584578259, "grad_norm": 0.59375, "learning_rate": 0.001996131630099315, "loss": 0.3176, "step": 3806 }, { "epoch": 0.0067518987498880745, "grad_norm": 0.373046875, "learning_rate": 0.0019961261157028748, "loss": 0.354, "step": 3808 }, { "epoch": 0.006755444915197889, "grad_norm": 0.2490234375, "learning_rate": 0.001996120597387299, "loss": 0.3218, "step": 3810 }, { "epoch": 0.0067589910805077045, "grad_norm": 0.5, "learning_rate": 0.001996115075152612, "loss": 0.2484, "step": 3812 }, { "epoch": 0.00676253724581752, "grad_norm": 0.400390625, "learning_rate": 0.001996109548998839, "loss": 0.3758, "step": 3814 }, { "epoch": 0.006766083411127334, "grad_norm": 0.72265625, "learning_rate": 0.001996104018926003, "loss": 0.1925, "step": 3816 }, { "epoch": 0.00676962957643715, "grad_norm": 1.859375, "learning_rate": 0.0019960984849341284, "loss": 0.3434, "step": 3818 }, { "epoch": 0.006773175741746965, "grad_norm": 0.81640625, "learning_rate": 0.00199609294702324, "loss": 0.2872, "step": 3820 }, { "epoch": 0.006776721907056781, "grad_norm": 0.416015625, "learning_rate": 0.0019960874051933608, "loss": 0.2461, "step": 3822 }, { "epoch": 0.006780268072366595, "grad_norm": 0.40625, "learning_rate": 0.0019960818594445162, "loss": 0.2654, "step": 3824 }, { "epoch": 0.006783814237676411, "grad_norm": 1.7421875, "learning_rate": 0.00199607630977673, "loss": 0.3872, "step": 3826 }, { "epoch": 0.006787360402986226, "grad_norm": 0.357421875, "learning_rate": 0.001996070756190027, "loss": 0.2594, "step": 3828 }, { "epoch": 0.006790906568296041, "grad_norm": 0.54296875, "learning_rate": 0.0019960651986844304, "loss": 0.2353, "step": 3830 }, { "epoch": 0.006794452733605856, "grad_norm": 0.30859375, "learning_rate": 0.001996059637259965, "loss": 0.3036, "step": 3832 }, { "epoch": 0.0067979988989156715, "grad_norm": 0.271484375, "learning_rate": 0.0019960540719166555, "loss": 0.2459, "step": 3834 }, { "epoch": 0.006801545064225487, "grad_norm": 0.59375, "learning_rate": 0.001996048502654526, "loss": 0.3075, "step": 3836 }, { "epoch": 0.0068050912295353014, "grad_norm": 0.34765625, "learning_rate": 0.001996042929473601, "loss": 0.336, "step": 3838 }, { "epoch": 0.006808637394845117, "grad_norm": 0.31640625, "learning_rate": 0.0019960373523739048, "loss": 0.3007, "step": 3840 }, { "epoch": 0.006812183560154932, "grad_norm": 0.3671875, "learning_rate": 0.0019960317713554614, "loss": 0.2348, "step": 3842 }, { "epoch": 0.006815729725464747, "grad_norm": 1.15625, "learning_rate": 0.0019960261864182954, "loss": 0.3357, "step": 3844 }, { "epoch": 0.006819275890774562, "grad_norm": 0.341796875, "learning_rate": 0.0019960205975624317, "loss": 0.3053, "step": 3846 }, { "epoch": 0.006822822056084378, "grad_norm": 0.44921875, "learning_rate": 0.001996015004787894, "loss": 0.265, "step": 3848 }, { "epoch": 0.006826368221394192, "grad_norm": 0.27734375, "learning_rate": 0.0019960094080947077, "loss": 0.3206, "step": 3850 }, { "epoch": 0.006829914386704008, "grad_norm": 0.333984375, "learning_rate": 0.0019960038074828958, "loss": 0.2849, "step": 3852 }, { "epoch": 0.006833460552013823, "grad_norm": 0.5546875, "learning_rate": 0.0019959982029524844, "loss": 0.2584, "step": 3854 }, { "epoch": 0.0068370067173236385, "grad_norm": 0.341796875, "learning_rate": 0.0019959925945034975, "loss": 0.3027, "step": 3856 }, { "epoch": 0.006840552882633453, "grad_norm": 0.337890625, "learning_rate": 0.001995986982135959, "loss": 0.2491, "step": 3858 }, { "epoch": 0.0068440990479432685, "grad_norm": 1.484375, "learning_rate": 0.0019959813658498938, "loss": 0.6935, "step": 3860 }, { "epoch": 0.006847645213253084, "grad_norm": 0.75, "learning_rate": 0.001995975745645327, "loss": 0.3274, "step": 3862 }, { "epoch": 0.0068511913785628984, "grad_norm": 0.578125, "learning_rate": 0.0019959701215222824, "loss": 0.2932, "step": 3864 }, { "epoch": 0.006854737543872714, "grad_norm": 1.0859375, "learning_rate": 0.001995964493480785, "loss": 0.3154, "step": 3866 }, { "epoch": 0.006858283709182529, "grad_norm": 0.5625, "learning_rate": 0.0019959588615208594, "loss": 0.3343, "step": 3868 }, { "epoch": 0.006861829874492345, "grad_norm": 0.515625, "learning_rate": 0.00199595322564253, "loss": 0.3474, "step": 3870 }, { "epoch": 0.006865376039802159, "grad_norm": 0.5078125, "learning_rate": 0.001995947585845822, "loss": 0.3114, "step": 3872 }, { "epoch": 0.006868922205111975, "grad_norm": 2.171875, "learning_rate": 0.001995941942130759, "loss": 0.4198, "step": 3874 }, { "epoch": 0.00687246837042179, "grad_norm": 0.3671875, "learning_rate": 0.0019959362944973673, "loss": 0.2397, "step": 3876 }, { "epoch": 0.006876014535731605, "grad_norm": 0.255859375, "learning_rate": 0.0019959306429456697, "loss": 0.2356, "step": 3878 }, { "epoch": 0.00687956070104142, "grad_norm": 1.015625, "learning_rate": 0.0019959249874756924, "loss": 0.2489, "step": 3880 }, { "epoch": 0.0068831068663512355, "grad_norm": 2.359375, "learning_rate": 0.0019959193280874596, "loss": 0.4578, "step": 3882 }, { "epoch": 0.00688665303166105, "grad_norm": 0.78515625, "learning_rate": 0.0019959136647809965, "loss": 0.2947, "step": 3884 }, { "epoch": 0.0068901991969708655, "grad_norm": 0.515625, "learning_rate": 0.001995907997556327, "loss": 0.3085, "step": 3886 }, { "epoch": 0.006893745362280681, "grad_norm": 2.15625, "learning_rate": 0.001995902326413476, "loss": 0.3363, "step": 3888 }, { "epoch": 0.006897291527590496, "grad_norm": 0.55859375, "learning_rate": 0.001995896651352469, "loss": 0.2899, "step": 3890 }, { "epoch": 0.006900837692900311, "grad_norm": 0.423828125, "learning_rate": 0.0019958909723733305, "loss": 0.267, "step": 3892 }, { "epoch": 0.006904383858210126, "grad_norm": 0.9375, "learning_rate": 0.0019958852894760852, "loss": 0.3345, "step": 3894 }, { "epoch": 0.006907930023519942, "grad_norm": 0.47265625, "learning_rate": 0.001995879602660758, "loss": 0.3044, "step": 3896 }, { "epoch": 0.006911476188829756, "grad_norm": 0.275390625, "learning_rate": 0.001995873911927374, "loss": 0.2471, "step": 3898 }, { "epoch": 0.006915022354139572, "grad_norm": 0.57421875, "learning_rate": 0.0019958682172759577, "loss": 0.2842, "step": 3900 }, { "epoch": 0.006918568519449387, "grad_norm": 0.306640625, "learning_rate": 0.0019958625187065336, "loss": 0.292, "step": 3902 }, { "epoch": 0.0069221146847592025, "grad_norm": 0.416015625, "learning_rate": 0.001995856816219128, "loss": 0.2522, "step": 3904 }, { "epoch": 0.006925660850069017, "grad_norm": 0.2041015625, "learning_rate": 0.0019958511098137647, "loss": 0.2242, "step": 3906 }, { "epoch": 0.0069292070153788325, "grad_norm": 0.5859375, "learning_rate": 0.0019958453994904693, "loss": 0.2738, "step": 3908 }, { "epoch": 0.006932753180688648, "grad_norm": 0.451171875, "learning_rate": 0.0019958396852492667, "loss": 0.2446, "step": 3910 }, { "epoch": 0.0069362993459984625, "grad_norm": 2.59375, "learning_rate": 0.0019958339670901816, "loss": 0.3562, "step": 3912 }, { "epoch": 0.006939845511308278, "grad_norm": 0.83203125, "learning_rate": 0.0019958282450132387, "loss": 0.2546, "step": 3914 }, { "epoch": 0.006943391676618093, "grad_norm": 0.72265625, "learning_rate": 0.0019958225190184636, "loss": 0.2398, "step": 3916 }, { "epoch": 0.006946937841927908, "grad_norm": 0.6796875, "learning_rate": 0.001995816789105881, "loss": 0.3362, "step": 3918 }, { "epoch": 0.006950484007237723, "grad_norm": 0.486328125, "learning_rate": 0.0019958110552755165, "loss": 0.29, "step": 3920 }, { "epoch": 0.006954030172547539, "grad_norm": 0.75, "learning_rate": 0.0019958053175273944, "loss": 0.2743, "step": 3922 }, { "epoch": 0.006957576337857354, "grad_norm": 1.6875, "learning_rate": 0.0019957995758615402, "loss": 0.3565, "step": 3924 }, { "epoch": 0.006961122503167169, "grad_norm": 0.78515625, "learning_rate": 0.0019957938302779792, "loss": 0.2403, "step": 3926 }, { "epoch": 0.006964668668476984, "grad_norm": 1.0, "learning_rate": 0.0019957880807767365, "loss": 0.3427, "step": 3928 }, { "epoch": 0.0069682148337867995, "grad_norm": 0.318359375, "learning_rate": 0.0019957823273578368, "loss": 0.221, "step": 3930 }, { "epoch": 0.006971760999096614, "grad_norm": 0.255859375, "learning_rate": 0.0019957765700213057, "loss": 0.2437, "step": 3932 }, { "epoch": 0.0069753071644064295, "grad_norm": 1.3828125, "learning_rate": 0.001995770808767168, "loss": 0.3686, "step": 3934 }, { "epoch": 0.006978853329716245, "grad_norm": 0.345703125, "learning_rate": 0.001995765043595449, "loss": 0.2674, "step": 3936 }, { "epoch": 0.0069823994950260595, "grad_norm": 2.109375, "learning_rate": 0.0019957592745061745, "loss": 0.3771, "step": 3938 }, { "epoch": 0.006985945660335875, "grad_norm": 0.8515625, "learning_rate": 0.001995753501499369, "loss": 0.2991, "step": 3940 }, { "epoch": 0.00698949182564569, "grad_norm": 0.828125, "learning_rate": 0.001995747724575058, "loss": 0.357, "step": 3942 }, { "epoch": 0.006993037990955506, "grad_norm": 0.265625, "learning_rate": 0.0019957419437332665, "loss": 0.2131, "step": 3944 }, { "epoch": 0.00699658415626532, "grad_norm": 0.251953125, "learning_rate": 0.0019957361589740203, "loss": 0.3052, "step": 3946 }, { "epoch": 0.007000130321575136, "grad_norm": 0.5546875, "learning_rate": 0.0019957303702973442, "loss": 0.2233, "step": 3948 }, { "epoch": 0.007003676486884951, "grad_norm": 0.765625, "learning_rate": 0.001995724577703264, "loss": 0.3408, "step": 3950 }, { "epoch": 0.007007222652194766, "grad_norm": 1.6015625, "learning_rate": 0.001995718781191805, "loss": 0.4903, "step": 3952 }, { "epoch": 0.007010768817504581, "grad_norm": 1.109375, "learning_rate": 0.001995712980762992, "loss": 0.2679, "step": 3954 }, { "epoch": 0.0070143149828143965, "grad_norm": 0.404296875, "learning_rate": 0.001995707176416851, "loss": 0.3888, "step": 3956 }, { "epoch": 0.007017861148124212, "grad_norm": 0.88671875, "learning_rate": 0.001995701368153407, "loss": 0.2263, "step": 3958 }, { "epoch": 0.0070214073134340265, "grad_norm": 3.015625, "learning_rate": 0.0019956955559726854, "loss": 0.3457, "step": 3960 }, { "epoch": 0.007024953478743842, "grad_norm": 0.369140625, "learning_rate": 0.0019956897398747116, "loss": 0.2678, "step": 3962 }, { "epoch": 0.007028499644053657, "grad_norm": 0.427734375, "learning_rate": 0.0019956839198595117, "loss": 0.3978, "step": 3964 }, { "epoch": 0.007032045809363472, "grad_norm": 0.9375, "learning_rate": 0.00199567809592711, "loss": 0.3307, "step": 3966 }, { "epoch": 0.007035591974673287, "grad_norm": 0.474609375, "learning_rate": 0.0019956722680775324, "loss": 0.2712, "step": 3968 }, { "epoch": 0.007039138139983103, "grad_norm": 0.65625, "learning_rate": 0.0019956664363108053, "loss": 0.3092, "step": 3970 }, { "epoch": 0.007042684305292917, "grad_norm": 0.65625, "learning_rate": 0.001995660600626953, "loss": 0.2978, "step": 3972 }, { "epoch": 0.007046230470602733, "grad_norm": 0.306640625, "learning_rate": 0.0019956547610260017, "loss": 0.251, "step": 3974 }, { "epoch": 0.007049776635912548, "grad_norm": 3.265625, "learning_rate": 0.0019956489175079768, "loss": 0.5655, "step": 3976 }, { "epoch": 0.0070533228012223635, "grad_norm": 0.365234375, "learning_rate": 0.0019956430700729037, "loss": 0.2985, "step": 3978 }, { "epoch": 0.007056868966532178, "grad_norm": 0.8671875, "learning_rate": 0.001995637218720808, "loss": 0.4254, "step": 3980 }, { "epoch": 0.0070604151318419935, "grad_norm": 1.1328125, "learning_rate": 0.0019956313634517152, "loss": 0.3667, "step": 3982 }, { "epoch": 0.007063961297151809, "grad_norm": 1.078125, "learning_rate": 0.0019956255042656514, "loss": 0.3232, "step": 3984 }, { "epoch": 0.0070675074624616235, "grad_norm": 0.54296875, "learning_rate": 0.0019956196411626418, "loss": 0.2812, "step": 3986 }, { "epoch": 0.007071053627771439, "grad_norm": 0.208984375, "learning_rate": 0.001995613774142712, "loss": 0.2535, "step": 3988 }, { "epoch": 0.007074599793081254, "grad_norm": 0.474609375, "learning_rate": 0.0019956079032058876, "loss": 0.3384, "step": 3990 }, { "epoch": 0.00707814595839107, "grad_norm": 0.6640625, "learning_rate": 0.001995602028352195, "loss": 0.2275, "step": 3992 }, { "epoch": 0.007081692123700884, "grad_norm": 0.3828125, "learning_rate": 0.001995596149581659, "loss": 0.2794, "step": 3994 }, { "epoch": 0.0070852382890107, "grad_norm": 0.251953125, "learning_rate": 0.001995590266894306, "loss": 0.2846, "step": 3996 }, { "epoch": 0.007088784454320515, "grad_norm": 0.423828125, "learning_rate": 0.001995584380290161, "loss": 0.328, "step": 3998 }, { "epoch": 0.00709233061963033, "grad_norm": 0.3671875, "learning_rate": 0.0019955784897692504, "loss": 0.2343, "step": 4000 }, { "epoch": 0.007095876784940145, "grad_norm": 0.287109375, "learning_rate": 0.0019955725953315997, "loss": 0.2368, "step": 4002 }, { "epoch": 0.0070994229502499605, "grad_norm": 0.828125, "learning_rate": 0.001995566696977234, "loss": 0.5125, "step": 4004 }, { "epoch": 0.007102969115559775, "grad_norm": 0.353515625, "learning_rate": 0.0019955607947061806, "loss": 0.3207, "step": 4006 }, { "epoch": 0.0071065152808695905, "grad_norm": 0.25390625, "learning_rate": 0.001995554888518464, "loss": 0.2598, "step": 4008 }, { "epoch": 0.007110061446179406, "grad_norm": 0.97265625, "learning_rate": 0.0019955489784141108, "loss": 0.2748, "step": 4010 }, { "epoch": 0.007113607611489221, "grad_norm": 0.345703125, "learning_rate": 0.0019955430643931464, "loss": 0.251, "step": 4012 }, { "epoch": 0.007117153776799036, "grad_norm": 0.376953125, "learning_rate": 0.001995537146455597, "loss": 0.2903, "step": 4014 }, { "epoch": 0.007120699942108851, "grad_norm": 0.65625, "learning_rate": 0.001995531224601488, "loss": 0.3316, "step": 4016 }, { "epoch": 0.007124246107418667, "grad_norm": 0.328125, "learning_rate": 0.001995525298830846, "loss": 0.269, "step": 4018 }, { "epoch": 0.007127792272728481, "grad_norm": 0.478515625, "learning_rate": 0.0019955193691436964, "loss": 0.3467, "step": 4020 }, { "epoch": 0.007131338438038297, "grad_norm": 0.392578125, "learning_rate": 0.0019955134355400654, "loss": 0.2788, "step": 4022 }, { "epoch": 0.007134884603348112, "grad_norm": 0.427734375, "learning_rate": 0.0019955074980199782, "loss": 0.2938, "step": 4024 }, { "epoch": 0.0071384307686579275, "grad_norm": 0.47265625, "learning_rate": 0.001995501556583462, "loss": 0.3235, "step": 4026 }, { "epoch": 0.007141976933967742, "grad_norm": 0.328125, "learning_rate": 0.0019954956112305418, "loss": 0.2632, "step": 4028 }, { "epoch": 0.0071455230992775575, "grad_norm": 1.4765625, "learning_rate": 0.0019954896619612445, "loss": 0.4317, "step": 4030 }, { "epoch": 0.007149069264587373, "grad_norm": 0.310546875, "learning_rate": 0.0019954837087755952, "loss": 0.3593, "step": 4032 }, { "epoch": 0.0071526154298971875, "grad_norm": 0.6484375, "learning_rate": 0.0019954777516736203, "loss": 0.4007, "step": 4034 }, { "epoch": 0.007156161595207003, "grad_norm": 0.271484375, "learning_rate": 0.001995471790655346, "loss": 0.2364, "step": 4036 }, { "epoch": 0.007159707760516818, "grad_norm": 1.2109375, "learning_rate": 0.0019954658257207982, "loss": 0.3287, "step": 4038 }, { "epoch": 0.007163253925826633, "grad_norm": 0.3125, "learning_rate": 0.0019954598568700027, "loss": 0.2472, "step": 4040 }, { "epoch": 0.007166800091136448, "grad_norm": 0.5078125, "learning_rate": 0.0019954538841029865, "loss": 0.2895, "step": 4042 }, { "epoch": 0.007170346256446264, "grad_norm": 0.318359375, "learning_rate": 0.0019954479074197743, "loss": 0.305, "step": 4044 }, { "epoch": 0.007173892421756079, "grad_norm": 0.3203125, "learning_rate": 0.001995441926820394, "loss": 0.2857, "step": 4046 }, { "epoch": 0.007177438587065894, "grad_norm": 0.65234375, "learning_rate": 0.0019954359423048702, "loss": 0.3389, "step": 4048 }, { "epoch": 0.007180984752375709, "grad_norm": 0.578125, "learning_rate": 0.00199542995387323, "loss": 0.3093, "step": 4050 }, { "epoch": 0.0071845309176855245, "grad_norm": 1.8046875, "learning_rate": 0.0019954239615254995, "loss": 0.3945, "step": 4052 }, { "epoch": 0.007188077082995339, "grad_norm": 0.419921875, "learning_rate": 0.0019954179652617045, "loss": 0.2524, "step": 4054 }, { "epoch": 0.0071916232483051545, "grad_norm": 0.291015625, "learning_rate": 0.0019954119650818715, "loss": 0.3658, "step": 4056 }, { "epoch": 0.00719516941361497, "grad_norm": 0.302734375, "learning_rate": 0.001995405960986027, "loss": 0.2964, "step": 4058 }, { "epoch": 0.007198715578924785, "grad_norm": 0.21484375, "learning_rate": 0.001995399952974196, "loss": 0.2584, "step": 4060 }, { "epoch": 0.0072022617442346, "grad_norm": 1.2421875, "learning_rate": 0.0019953939410464064, "loss": 0.4, "step": 4062 }, { "epoch": 0.007205807909544415, "grad_norm": 0.310546875, "learning_rate": 0.001995387925202684, "loss": 0.2817, "step": 4064 }, { "epoch": 0.007209354074854231, "grad_norm": 0.55859375, "learning_rate": 0.0019953819054430544, "loss": 0.3777, "step": 4066 }, { "epoch": 0.007212900240164045, "grad_norm": 0.3984375, "learning_rate": 0.0019953758817675446, "loss": 0.2462, "step": 4068 }, { "epoch": 0.007216446405473861, "grad_norm": 0.375, "learning_rate": 0.001995369854176181, "loss": 0.4524, "step": 4070 }, { "epoch": 0.007219992570783676, "grad_norm": 0.203125, "learning_rate": 0.00199536382266899, "loss": 0.2715, "step": 4072 }, { "epoch": 0.007223538736093491, "grad_norm": 0.453125, "learning_rate": 0.001995357787245997, "loss": 0.2618, "step": 4074 }, { "epoch": 0.007227084901403306, "grad_norm": 0.494140625, "learning_rate": 0.0019953517479072294, "loss": 0.4218, "step": 4076 }, { "epoch": 0.0072306310667131215, "grad_norm": 0.49609375, "learning_rate": 0.0019953457046527133, "loss": 0.2579, "step": 4078 }, { "epoch": 0.007234177232022937, "grad_norm": 0.294921875, "learning_rate": 0.001995339657482475, "loss": 0.2531, "step": 4080 }, { "epoch": 0.0072377233973327515, "grad_norm": 0.421875, "learning_rate": 0.0019953336063965417, "loss": 0.2742, "step": 4082 }, { "epoch": 0.007241269562642567, "grad_norm": 0.53125, "learning_rate": 0.001995327551394939, "loss": 0.2189, "step": 4084 }, { "epoch": 0.007244815727952382, "grad_norm": 0.984375, "learning_rate": 0.0019953214924776936, "loss": 0.3908, "step": 4086 }, { "epoch": 0.007248361893262197, "grad_norm": 0.263671875, "learning_rate": 0.001995315429644832, "loss": 0.2772, "step": 4088 }, { "epoch": 0.007251908058572012, "grad_norm": 1.203125, "learning_rate": 0.001995309362896381, "loss": 0.3453, "step": 4090 }, { "epoch": 0.007255454223881828, "grad_norm": 0.859375, "learning_rate": 0.0019953032922323667, "loss": 0.3992, "step": 4092 }, { "epoch": 0.007259000389191643, "grad_norm": 0.51953125, "learning_rate": 0.001995297217652816, "loss": 0.3004, "step": 4094 }, { "epoch": 0.007262546554501458, "grad_norm": 1.1015625, "learning_rate": 0.0019952911391577554, "loss": 0.5569, "step": 4096 }, { "epoch": 0.007266092719811273, "grad_norm": 0.388671875, "learning_rate": 0.001995285056747211, "loss": 0.3472, "step": 4098 }, { "epoch": 0.0072696388851210885, "grad_norm": 0.77734375, "learning_rate": 0.00199527897042121, "loss": 0.4015, "step": 4100 }, { "epoch": 0.007273185050430903, "grad_norm": 0.365234375, "learning_rate": 0.0019952728801797786, "loss": 0.264, "step": 4102 }, { "epoch": 0.0072767312157407185, "grad_norm": 0.5546875, "learning_rate": 0.001995266786022944, "loss": 0.38, "step": 4104 }, { "epoch": 0.007280277381050534, "grad_norm": 0.259765625, "learning_rate": 0.0019952606879507324, "loss": 0.2326, "step": 4106 }, { "epoch": 0.0072838235463603485, "grad_norm": 0.53515625, "learning_rate": 0.001995254585963171, "loss": 0.2613, "step": 4108 }, { "epoch": 0.007287369711670164, "grad_norm": 0.341796875, "learning_rate": 0.0019952484800602856, "loss": 0.2423, "step": 4110 }, { "epoch": 0.007290915876979979, "grad_norm": 1.046875, "learning_rate": 0.0019952423702421034, "loss": 0.2558, "step": 4112 }, { "epoch": 0.007294462042289795, "grad_norm": 0.30859375, "learning_rate": 0.001995236256508651, "loss": 0.2847, "step": 4114 }, { "epoch": 0.007298008207599609, "grad_norm": 0.470703125, "learning_rate": 0.0019952301388599554, "loss": 0.3737, "step": 4116 }, { "epoch": 0.007301554372909425, "grad_norm": 0.30859375, "learning_rate": 0.0019952240172960434, "loss": 0.2665, "step": 4118 }, { "epoch": 0.00730510053821924, "grad_norm": 0.67578125, "learning_rate": 0.001995217891816941, "loss": 0.2976, "step": 4120 }, { "epoch": 0.007308646703529055, "grad_norm": 0.796875, "learning_rate": 0.001995211762422676, "loss": 0.3633, "step": 4122 }, { "epoch": 0.00731219286883887, "grad_norm": 0.6015625, "learning_rate": 0.001995205629113275, "loss": 0.2471, "step": 4124 }, { "epoch": 0.0073157390341486855, "grad_norm": 0.82421875, "learning_rate": 0.001995199491888764, "loss": 0.3493, "step": 4126 }, { "epoch": 0.007319285199458501, "grad_norm": 0.55078125, "learning_rate": 0.0019951933507491703, "loss": 0.2472, "step": 4128 }, { "epoch": 0.0073228313647683155, "grad_norm": 0.34765625, "learning_rate": 0.001995187205694521, "loss": 0.2807, "step": 4130 }, { "epoch": 0.007326377530078131, "grad_norm": 0.703125, "learning_rate": 0.001995181056724843, "loss": 0.2845, "step": 4132 }, { "epoch": 0.007329923695387946, "grad_norm": 0.57421875, "learning_rate": 0.0019951749038401635, "loss": 0.4949, "step": 4134 }, { "epoch": 0.007333469860697761, "grad_norm": 2.03125, "learning_rate": 0.0019951687470405083, "loss": 0.2666, "step": 4136 }, { "epoch": 0.007337016026007576, "grad_norm": 1.2890625, "learning_rate": 0.0019951625863259053, "loss": 0.3614, "step": 4138 }, { "epoch": 0.007340562191317392, "grad_norm": 0.427734375, "learning_rate": 0.001995156421696381, "loss": 0.3208, "step": 4140 }, { "epoch": 0.007344108356627206, "grad_norm": 0.255859375, "learning_rate": 0.0019951502531519627, "loss": 0.2495, "step": 4142 }, { "epoch": 0.007347654521937022, "grad_norm": 0.58984375, "learning_rate": 0.001995144080692677, "loss": 0.3306, "step": 4144 }, { "epoch": 0.007351200687246837, "grad_norm": 0.4921875, "learning_rate": 0.0019951379043185507, "loss": 0.3835, "step": 4146 }, { "epoch": 0.0073547468525566526, "grad_norm": 0.439453125, "learning_rate": 0.0019951317240296117, "loss": 0.4441, "step": 4148 }, { "epoch": 0.007358293017866467, "grad_norm": 0.6328125, "learning_rate": 0.001995125539825886, "loss": 0.2542, "step": 4150 }, { "epoch": 0.0073618391831762825, "grad_norm": 0.369140625, "learning_rate": 0.0019951193517074015, "loss": 0.3727, "step": 4152 }, { "epoch": 0.007365385348486098, "grad_norm": 0.228515625, "learning_rate": 0.0019951131596741846, "loss": 0.2044, "step": 4154 }, { "epoch": 0.0073689315137959125, "grad_norm": 0.6171875, "learning_rate": 0.0019951069637262633, "loss": 0.2646, "step": 4156 }, { "epoch": 0.007372477679105728, "grad_norm": 0.4296875, "learning_rate": 0.0019951007638636634, "loss": 0.3063, "step": 4158 }, { "epoch": 0.007376023844415543, "grad_norm": 0.80078125, "learning_rate": 0.001995094560086413, "loss": 0.5668, "step": 4160 }, { "epoch": 0.007379570009725359, "grad_norm": 0.28515625, "learning_rate": 0.001995088352394539, "loss": 0.331, "step": 4162 }, { "epoch": 0.007383116175035173, "grad_norm": 0.578125, "learning_rate": 0.0019950821407880683, "loss": 0.2038, "step": 4164 }, { "epoch": 0.007386662340344989, "grad_norm": 0.859375, "learning_rate": 0.001995075925267028, "loss": 0.275, "step": 4166 }, { "epoch": 0.007390208505654804, "grad_norm": 0.39453125, "learning_rate": 0.001995069705831446, "loss": 0.291, "step": 4168 }, { "epoch": 0.007393754670964619, "grad_norm": 0.23046875, "learning_rate": 0.0019950634824813488, "loss": 0.2342, "step": 4170 }, { "epoch": 0.007397300836274434, "grad_norm": 0.3984375, "learning_rate": 0.0019950572552167637, "loss": 0.2628, "step": 4172 }, { "epoch": 0.0074008470015842496, "grad_norm": 0.337890625, "learning_rate": 0.0019950510240377187, "loss": 0.2843, "step": 4174 }, { "epoch": 0.007404393166894064, "grad_norm": 0.54296875, "learning_rate": 0.00199504478894424, "loss": 0.3176, "step": 4176 }, { "epoch": 0.0074079393322038795, "grad_norm": 0.333984375, "learning_rate": 0.0019950385499363553, "loss": 0.2461, "step": 4178 }, { "epoch": 0.007411485497513695, "grad_norm": 0.703125, "learning_rate": 0.0019950323070140915, "loss": 0.4238, "step": 4180 }, { "epoch": 0.00741503166282351, "grad_norm": 0.265625, "learning_rate": 0.0019950260601774767, "loss": 0.1986, "step": 4182 }, { "epoch": 0.007418577828133325, "grad_norm": 0.357421875, "learning_rate": 0.0019950198094265377, "loss": 0.2889, "step": 4184 }, { "epoch": 0.00742212399344314, "grad_norm": 3.609375, "learning_rate": 0.0019950135547613023, "loss": 0.3667, "step": 4186 }, { "epoch": 0.007425670158752956, "grad_norm": 0.4140625, "learning_rate": 0.001995007296181797, "loss": 0.2688, "step": 4188 }, { "epoch": 0.00742921632406277, "grad_norm": 0.2265625, "learning_rate": 0.00199500103368805, "loss": 0.2974, "step": 4190 }, { "epoch": 0.007432762489372586, "grad_norm": 0.408203125, "learning_rate": 0.0019949947672800884, "loss": 0.3078, "step": 4192 }, { "epoch": 0.007436308654682401, "grad_norm": 0.7421875, "learning_rate": 0.0019949884969579393, "loss": 0.2632, "step": 4194 }, { "epoch": 0.007439854819992217, "grad_norm": 0.83984375, "learning_rate": 0.0019949822227216304, "loss": 0.3044, "step": 4196 }, { "epoch": 0.007443400985302031, "grad_norm": 0.64453125, "learning_rate": 0.0019949759445711895, "loss": 0.2697, "step": 4198 }, { "epoch": 0.0074469471506118465, "grad_norm": 0.341796875, "learning_rate": 0.001994969662506643, "loss": 0.2577, "step": 4200 }, { "epoch": 0.007450493315921662, "grad_norm": 0.474609375, "learning_rate": 0.00199496337652802, "loss": 0.3347, "step": 4202 }, { "epoch": 0.0074540394812314765, "grad_norm": 0.35546875, "learning_rate": 0.0019949570866353464, "loss": 0.3018, "step": 4204 }, { "epoch": 0.007457585646541292, "grad_norm": 0.5546875, "learning_rate": 0.0019949507928286505, "loss": 0.2547, "step": 4206 }, { "epoch": 0.007461131811851107, "grad_norm": 0.91796875, "learning_rate": 0.001994944495107959, "loss": 0.4746, "step": 4208 }, { "epoch": 0.007464677977160922, "grad_norm": 0.546875, "learning_rate": 0.001994938193473301, "loss": 0.2632, "step": 4210 }, { "epoch": 0.007468224142470737, "grad_norm": 0.2294921875, "learning_rate": 0.001994931887924703, "loss": 0.2509, "step": 4212 }, { "epoch": 0.007471770307780553, "grad_norm": 0.30859375, "learning_rate": 0.001994925578462193, "loss": 0.2849, "step": 4214 }, { "epoch": 0.007475316473090368, "grad_norm": 0.263671875, "learning_rate": 0.001994919265085798, "loss": 0.2497, "step": 4216 }, { "epoch": 0.007478862638400183, "grad_norm": 0.345703125, "learning_rate": 0.0019949129477955458, "loss": 0.2916, "step": 4218 }, { "epoch": 0.007482408803709998, "grad_norm": 0.26953125, "learning_rate": 0.0019949066265914648, "loss": 0.2228, "step": 4220 }, { "epoch": 0.007485954969019814, "grad_norm": 0.28125, "learning_rate": 0.0019949003014735813, "loss": 0.2561, "step": 4222 }, { "epoch": 0.007489501134329628, "grad_norm": 0.490234375, "learning_rate": 0.001994893972441924, "loss": 0.3268, "step": 4224 }, { "epoch": 0.0074930472996394435, "grad_norm": 1.4609375, "learning_rate": 0.0019948876394965207, "loss": 0.5349, "step": 4226 }, { "epoch": 0.007496593464949259, "grad_norm": 0.38671875, "learning_rate": 0.0019948813026373978, "loss": 0.4591, "step": 4228 }, { "epoch": 0.007500139630259074, "grad_norm": 0.58984375, "learning_rate": 0.0019948749618645843, "loss": 0.2819, "step": 4230 }, { "epoch": 0.007503685795568889, "grad_norm": 0.396484375, "learning_rate": 0.001994868617178108, "loss": 0.2577, "step": 4232 }, { "epoch": 0.007507231960878704, "grad_norm": 1.75, "learning_rate": 0.001994862268577996, "loss": 0.2379, "step": 4234 }, { "epoch": 0.00751077812618852, "grad_norm": 0.4140625, "learning_rate": 0.001994855916064276, "loss": 0.325, "step": 4236 }, { "epoch": 0.007514324291498334, "grad_norm": 0.267578125, "learning_rate": 0.0019948495596369756, "loss": 0.2582, "step": 4238 }, { "epoch": 0.00751787045680815, "grad_norm": 0.259765625, "learning_rate": 0.001994843199296124, "loss": 0.3427, "step": 4240 }, { "epoch": 0.007521416622117965, "grad_norm": 0.6171875, "learning_rate": 0.0019948368350417474, "loss": 0.2594, "step": 4242 }, { "epoch": 0.00752496278742778, "grad_norm": 0.482421875, "learning_rate": 0.001994830466873874, "loss": 0.2235, "step": 4244 }, { "epoch": 0.007528508952737595, "grad_norm": 0.62109375, "learning_rate": 0.0019948240947925324, "loss": 0.3591, "step": 4246 }, { "epoch": 0.0075320551180474106, "grad_norm": 0.5546875, "learning_rate": 0.00199481771879775, "loss": 0.2935, "step": 4248 }, { "epoch": 0.007535601283357226, "grad_norm": 0.333984375, "learning_rate": 0.0019948113388895544, "loss": 0.249, "step": 4250 }, { "epoch": 0.0075391474486670405, "grad_norm": 0.7109375, "learning_rate": 0.0019948049550679737, "loss": 0.2767, "step": 4252 }, { "epoch": 0.007542693613976856, "grad_norm": 0.361328125, "learning_rate": 0.0019947985673330363, "loss": 0.3592, "step": 4254 }, { "epoch": 0.007546239779286671, "grad_norm": 0.46484375, "learning_rate": 0.0019947921756847697, "loss": 0.374, "step": 4256 }, { "epoch": 0.007549785944596486, "grad_norm": 0.291015625, "learning_rate": 0.0019947857801232015, "loss": 0.3037, "step": 4258 }, { "epoch": 0.007553332109906301, "grad_norm": 3.59375, "learning_rate": 0.0019947793806483604, "loss": 0.6666, "step": 4260 }, { "epoch": 0.007556878275216117, "grad_norm": 0.341796875, "learning_rate": 0.0019947729772602737, "loss": 0.221, "step": 4262 }, { "epoch": 0.007560424440525932, "grad_norm": 0.53515625, "learning_rate": 0.00199476656995897, "loss": 0.2994, "step": 4264 }, { "epoch": 0.007563970605835747, "grad_norm": 0.5859375, "learning_rate": 0.001994760158744477, "loss": 0.2465, "step": 4266 }, { "epoch": 0.007567516771145562, "grad_norm": 1.78125, "learning_rate": 0.001994753743616823, "loss": 0.2681, "step": 4268 }, { "epoch": 0.007571062936455378, "grad_norm": 0.484375, "learning_rate": 0.0019947473245760356, "loss": 0.4576, "step": 4270 }, { "epoch": 0.007574609101765192, "grad_norm": 0.5625, "learning_rate": 0.0019947409016221433, "loss": 0.2747, "step": 4272 }, { "epoch": 0.0075781552670750076, "grad_norm": 0.69921875, "learning_rate": 0.0019947344747551737, "loss": 0.4057, "step": 4274 }, { "epoch": 0.007581701432384823, "grad_norm": 1.6796875, "learning_rate": 0.001994728043975156, "loss": 0.3244, "step": 4276 }, { "epoch": 0.0075852475976946375, "grad_norm": 0.578125, "learning_rate": 0.0019947216092821166, "loss": 0.2432, "step": 4278 }, { "epoch": 0.007588793763004453, "grad_norm": 0.494140625, "learning_rate": 0.001994715170676085, "loss": 0.4124, "step": 4280 }, { "epoch": 0.007592339928314268, "grad_norm": 1.8203125, "learning_rate": 0.0019947087281570893, "loss": 0.3114, "step": 4282 }, { "epoch": 0.007595886093624084, "grad_norm": 0.263671875, "learning_rate": 0.001994702281725157, "loss": 0.266, "step": 4284 }, { "epoch": 0.007599432258933898, "grad_norm": 0.625, "learning_rate": 0.0019946958313803165, "loss": 0.2568, "step": 4286 }, { "epoch": 0.007602978424243714, "grad_norm": 0.6328125, "learning_rate": 0.001994689377122596, "loss": 0.3541, "step": 4288 }, { "epoch": 0.007606524589553529, "grad_norm": 0.3125, "learning_rate": 0.001994682918952024, "loss": 0.2765, "step": 4290 }, { "epoch": 0.007610070754863344, "grad_norm": 0.86328125, "learning_rate": 0.0019946764568686288, "loss": 0.3133, "step": 4292 }, { "epoch": 0.007613616920173159, "grad_norm": 0.416015625, "learning_rate": 0.0019946699908724385, "loss": 0.2714, "step": 4294 }, { "epoch": 0.007617163085482975, "grad_norm": 0.31640625, "learning_rate": 0.0019946635209634814, "loss": 0.2529, "step": 4296 }, { "epoch": 0.00762070925079279, "grad_norm": 0.60546875, "learning_rate": 0.0019946570471417856, "loss": 0.4352, "step": 4298 }, { "epoch": 0.0076242554161026046, "grad_norm": 0.28125, "learning_rate": 0.0019946505694073795, "loss": 0.2255, "step": 4300 }, { "epoch": 0.00762780158141242, "grad_norm": 0.58984375, "learning_rate": 0.0019946440877602915, "loss": 0.3296, "step": 4302 }, { "epoch": 0.007631347746722235, "grad_norm": 0.25, "learning_rate": 0.00199463760220055, "loss": 0.3566, "step": 4304 }, { "epoch": 0.00763489391203205, "grad_norm": 0.400390625, "learning_rate": 0.0019946311127281833, "loss": 0.2687, "step": 4306 }, { "epoch": 0.007638440077341865, "grad_norm": 0.21484375, "learning_rate": 0.0019946246193432195, "loss": 0.2209, "step": 4308 }, { "epoch": 0.007641986242651681, "grad_norm": 0.375, "learning_rate": 0.0019946181220456874, "loss": 0.2465, "step": 4310 }, { "epoch": 0.007645532407961495, "grad_norm": 0.73046875, "learning_rate": 0.0019946116208356154, "loss": 0.2641, "step": 4312 }, { "epoch": 0.007649078573271311, "grad_norm": 0.283203125, "learning_rate": 0.001994605115713032, "loss": 0.2838, "step": 4314 }, { "epoch": 0.007652624738581126, "grad_norm": 0.287109375, "learning_rate": 0.0019945986066779654, "loss": 0.3213, "step": 4316 }, { "epoch": 0.007656170903890942, "grad_norm": 0.27734375, "learning_rate": 0.001994592093730444, "loss": 0.2789, "step": 4318 }, { "epoch": 0.007659717069200756, "grad_norm": 1.0390625, "learning_rate": 0.0019945855768704964, "loss": 0.4544, "step": 4320 }, { "epoch": 0.007663263234510572, "grad_norm": 0.38671875, "learning_rate": 0.001994579056098151, "loss": 0.2123, "step": 4322 }, { "epoch": 0.007666809399820387, "grad_norm": 0.33984375, "learning_rate": 0.0019945725314134367, "loss": 0.253, "step": 4324 }, { "epoch": 0.0076703555651302015, "grad_norm": 0.2021484375, "learning_rate": 0.001994566002816382, "loss": 0.2218, "step": 4326 }, { "epoch": 0.007673901730440017, "grad_norm": 0.64453125, "learning_rate": 0.0019945594703070146, "loss": 0.2866, "step": 4328 }, { "epoch": 0.007677447895749832, "grad_norm": 0.80859375, "learning_rate": 0.001994552933885364, "loss": 0.2576, "step": 4330 }, { "epoch": 0.007680994061059648, "grad_norm": 1.015625, "learning_rate": 0.0019945463935514586, "loss": 0.2613, "step": 4332 }, { "epoch": 0.007684540226369462, "grad_norm": 0.453125, "learning_rate": 0.0019945398493053266, "loss": 0.4501, "step": 4334 }, { "epoch": 0.007688086391679278, "grad_norm": 0.98046875, "learning_rate": 0.0019945333011469973, "loss": 0.2772, "step": 4336 }, { "epoch": 0.007691632556989093, "grad_norm": 0.333984375, "learning_rate": 0.0019945267490764987, "loss": 0.247, "step": 4338 }, { "epoch": 0.007695178722298908, "grad_norm": 0.53125, "learning_rate": 0.00199452019309386, "loss": 0.2608, "step": 4340 }, { "epoch": 0.007698724887608723, "grad_norm": 4.09375, "learning_rate": 0.0019945136331991093, "loss": 0.364, "step": 4342 }, { "epoch": 0.007702271052918539, "grad_norm": 0.220703125, "learning_rate": 0.0019945070693922757, "loss": 0.2504, "step": 4344 }, { "epoch": 0.007705817218228353, "grad_norm": 0.3515625, "learning_rate": 0.0019945005016733875, "loss": 0.2802, "step": 4346 }, { "epoch": 0.0077093633835381686, "grad_norm": 0.37890625, "learning_rate": 0.001994493930042474, "loss": 0.2524, "step": 4348 }, { "epoch": 0.007712909548847984, "grad_norm": 0.7265625, "learning_rate": 0.001994487354499563, "loss": 0.2936, "step": 4350 }, { "epoch": 0.007716455714157799, "grad_norm": 1.2890625, "learning_rate": 0.0019944807750446845, "loss": 0.4244, "step": 4352 }, { "epoch": 0.007720001879467614, "grad_norm": 0.68359375, "learning_rate": 0.0019944741916778667, "loss": 0.3, "step": 4354 }, { "epoch": 0.007723548044777429, "grad_norm": 0.279296875, "learning_rate": 0.001994467604399138, "loss": 0.2836, "step": 4356 }, { "epoch": 0.007727094210087245, "grad_norm": 0.57421875, "learning_rate": 0.001994461013208528, "loss": 0.3151, "step": 4358 }, { "epoch": 0.007730640375397059, "grad_norm": 0.37109375, "learning_rate": 0.001994454418106065, "loss": 0.3651, "step": 4360 }, { "epoch": 0.007734186540706875, "grad_norm": 0.87109375, "learning_rate": 0.001994447819091778, "loss": 0.4105, "step": 4362 }, { "epoch": 0.00773773270601669, "grad_norm": 1.25, "learning_rate": 0.001994441216165695, "loss": 0.2935, "step": 4364 }, { "epoch": 0.007741278871326506, "grad_norm": 0.34765625, "learning_rate": 0.0019944346093278466, "loss": 0.2232, "step": 4366 }, { "epoch": 0.00774482503663632, "grad_norm": 0.69921875, "learning_rate": 0.0019944279985782605, "loss": 0.3407, "step": 4368 }, { "epoch": 0.007748371201946136, "grad_norm": 2.015625, "learning_rate": 0.0019944213839169656, "loss": 0.2508, "step": 4370 }, { "epoch": 0.007751917367255951, "grad_norm": 0.625, "learning_rate": 0.0019944147653439917, "loss": 0.3536, "step": 4372 }, { "epoch": 0.0077554635325657656, "grad_norm": 0.345703125, "learning_rate": 0.0019944081428593667, "loss": 0.4257, "step": 4374 }, { "epoch": 0.007759009697875581, "grad_norm": 0.294921875, "learning_rate": 0.00199440151646312, "loss": 0.2985, "step": 4376 }, { "epoch": 0.007762555863185396, "grad_norm": 0.337890625, "learning_rate": 0.0019943948861552807, "loss": 0.2368, "step": 4378 }, { "epoch": 0.007766102028495211, "grad_norm": 0.251953125, "learning_rate": 0.0019943882519358777, "loss": 0.2776, "step": 4380 }, { "epoch": 0.007769648193805026, "grad_norm": 0.3203125, "learning_rate": 0.00199438161380494, "loss": 0.2795, "step": 4382 }, { "epoch": 0.007773194359114842, "grad_norm": 0.330078125, "learning_rate": 0.0019943749717624966, "loss": 0.2514, "step": 4384 }, { "epoch": 0.007776740524424657, "grad_norm": 0.31640625, "learning_rate": 0.0019943683258085766, "loss": 0.2623, "step": 4386 }, { "epoch": 0.007780286689734472, "grad_norm": 0.5078125, "learning_rate": 0.001994361675943209, "loss": 0.5157, "step": 4388 }, { "epoch": 0.007783832855044287, "grad_norm": 0.5390625, "learning_rate": 0.001994355022166423, "loss": 0.374, "step": 4390 }, { "epoch": 0.007787379020354103, "grad_norm": 0.609375, "learning_rate": 0.001994348364478248, "loss": 0.3461, "step": 4392 }, { "epoch": 0.007790925185663917, "grad_norm": 0.318359375, "learning_rate": 0.001994341702878712, "loss": 0.2688, "step": 4394 }, { "epoch": 0.007794471350973733, "grad_norm": 2.609375, "learning_rate": 0.0019943350373678452, "loss": 0.4908, "step": 4396 }, { "epoch": 0.007798017516283548, "grad_norm": 0.1962890625, "learning_rate": 0.0019943283679456766, "loss": 0.2465, "step": 4398 }, { "epoch": 0.007801563681593363, "grad_norm": 0.58984375, "learning_rate": 0.001994321694612235, "loss": 0.2208, "step": 4400 }, { "epoch": 0.007805109846903178, "grad_norm": 2.109375, "learning_rate": 0.00199431501736755, "loss": 0.3789, "step": 4402 }, { "epoch": 0.007808656012212993, "grad_norm": 0.96484375, "learning_rate": 0.0019943083362116507, "loss": 0.2863, "step": 4404 }, { "epoch": 0.007812202177522809, "grad_norm": 0.31640625, "learning_rate": 0.001994301651144566, "loss": 0.2622, "step": 4406 }, { "epoch": 0.007815748342832624, "grad_norm": 0.255859375, "learning_rate": 0.0019942949621663255, "loss": 0.3736, "step": 4408 }, { "epoch": 0.007819294508142439, "grad_norm": 0.2890625, "learning_rate": 0.0019942882692769582, "loss": 0.2169, "step": 4410 }, { "epoch": 0.007822840673452253, "grad_norm": 0.8984375, "learning_rate": 0.0019942815724764934, "loss": 0.3357, "step": 4412 }, { "epoch": 0.00782638683876207, "grad_norm": 0.80078125, "learning_rate": 0.0019942748717649604, "loss": 0.2929, "step": 4414 }, { "epoch": 0.007829933004071884, "grad_norm": 0.271484375, "learning_rate": 0.001994268167142389, "loss": 0.2081, "step": 4416 }, { "epoch": 0.007833479169381699, "grad_norm": 0.306640625, "learning_rate": 0.001994261458608808, "loss": 0.3851, "step": 4418 }, { "epoch": 0.007837025334691515, "grad_norm": 0.267578125, "learning_rate": 0.0019942547461642463, "loss": 0.2196, "step": 4420 }, { "epoch": 0.00784057150000133, "grad_norm": 1.34375, "learning_rate": 0.001994248029808734, "loss": 0.3409, "step": 4422 }, { "epoch": 0.007844117665311144, "grad_norm": 0.9609375, "learning_rate": 0.0019942413095423005, "loss": 0.2777, "step": 4424 }, { "epoch": 0.00784766383062096, "grad_norm": 0.5234375, "learning_rate": 0.001994234585364975, "loss": 0.2788, "step": 4426 }, { "epoch": 0.007851209995930775, "grad_norm": 0.7890625, "learning_rate": 0.0019942278572767863, "loss": 0.2346, "step": 4428 }, { "epoch": 0.007854756161240591, "grad_norm": 0.796875, "learning_rate": 0.0019942211252777647, "loss": 0.5888, "step": 4430 }, { "epoch": 0.007858302326550406, "grad_norm": 0.37890625, "learning_rate": 0.0019942143893679396, "loss": 0.2735, "step": 4432 }, { "epoch": 0.00786184849186022, "grad_norm": 0.392578125, "learning_rate": 0.00199420764954734, "loss": 0.3055, "step": 4434 }, { "epoch": 0.007865394657170037, "grad_norm": 0.443359375, "learning_rate": 0.0019942009058159954, "loss": 0.272, "step": 4436 }, { "epoch": 0.007868940822479851, "grad_norm": 0.1787109375, "learning_rate": 0.0019941941581739357, "loss": 0.2479, "step": 4438 }, { "epoch": 0.007872486987789666, "grad_norm": 0.388671875, "learning_rate": 0.00199418740662119, "loss": 0.3031, "step": 4440 }, { "epoch": 0.007876033153099482, "grad_norm": 0.337890625, "learning_rate": 0.001994180651157788, "loss": 0.2757, "step": 4442 }, { "epoch": 0.007879579318409297, "grad_norm": 0.73046875, "learning_rate": 0.001994173891783759, "loss": 0.4637, "step": 4444 }, { "epoch": 0.007883125483719111, "grad_norm": 0.353515625, "learning_rate": 0.001994167128499133, "loss": 0.3172, "step": 4446 }, { "epoch": 0.007886671649028927, "grad_norm": 1.2890625, "learning_rate": 0.0019941603613039395, "loss": 0.2998, "step": 4448 }, { "epoch": 0.007890217814338742, "grad_norm": 0.2734375, "learning_rate": 0.001994153590198208, "loss": 0.2892, "step": 4450 }, { "epoch": 0.007893763979648557, "grad_norm": 0.6328125, "learning_rate": 0.001994146815181968, "loss": 0.3114, "step": 4452 }, { "epoch": 0.007897310144958373, "grad_norm": 0.3671875, "learning_rate": 0.0019941400362552494, "loss": 0.3865, "step": 4454 }, { "epoch": 0.007900856310268187, "grad_norm": 0.341796875, "learning_rate": 0.0019941332534180816, "loss": 0.2601, "step": 4456 }, { "epoch": 0.007904402475578002, "grad_norm": 0.302734375, "learning_rate": 0.0019941264666704936, "loss": 0.2924, "step": 4458 }, { "epoch": 0.007907948640887818, "grad_norm": 0.62890625, "learning_rate": 0.0019941196760125167, "loss": 0.2698, "step": 4460 }, { "epoch": 0.007911494806197633, "grad_norm": 0.388671875, "learning_rate": 0.001994112881444179, "loss": 0.3651, "step": 4462 }, { "epoch": 0.007915040971507449, "grad_norm": 0.36328125, "learning_rate": 0.0019941060829655115, "loss": 0.2912, "step": 4464 }, { "epoch": 0.007918587136817264, "grad_norm": 0.58203125, "learning_rate": 0.0019940992805765434, "loss": 0.2526, "step": 4466 }, { "epoch": 0.007922133302127078, "grad_norm": 0.2158203125, "learning_rate": 0.001994092474277304, "loss": 0.2764, "step": 4468 }, { "epoch": 0.007925679467436894, "grad_norm": 1.125, "learning_rate": 0.0019940856640678233, "loss": 0.2922, "step": 4470 }, { "epoch": 0.007929225632746709, "grad_norm": 0.263671875, "learning_rate": 0.001994078849948132, "loss": 0.2557, "step": 4472 }, { "epoch": 0.007932771798056524, "grad_norm": 0.61328125, "learning_rate": 0.0019940720319182583, "loss": 0.2532, "step": 4474 }, { "epoch": 0.00793631796336634, "grad_norm": 1.109375, "learning_rate": 0.0019940652099782333, "loss": 0.3192, "step": 4476 }, { "epoch": 0.007939864128676154, "grad_norm": 0.32421875, "learning_rate": 0.0019940583841280865, "loss": 0.2245, "step": 4478 }, { "epoch": 0.007943410293985969, "grad_norm": 0.494140625, "learning_rate": 0.001994051554367848, "loss": 0.3043, "step": 4480 }, { "epoch": 0.007946956459295785, "grad_norm": 1.046875, "learning_rate": 0.0019940447206975467, "loss": 0.2861, "step": 4482 }, { "epoch": 0.0079505026246056, "grad_norm": 0.416015625, "learning_rate": 0.001994037883117213, "loss": 0.2432, "step": 4484 }, { "epoch": 0.007954048789915414, "grad_norm": 0.337890625, "learning_rate": 0.001994031041626877, "loss": 0.2839, "step": 4486 }, { "epoch": 0.00795759495522523, "grad_norm": 0.73828125, "learning_rate": 0.001994024196226569, "loss": 0.3339, "step": 4488 }, { "epoch": 0.007961141120535045, "grad_norm": 0.435546875, "learning_rate": 0.0019940173469163184, "loss": 0.2935, "step": 4490 }, { "epoch": 0.00796468728584486, "grad_norm": 0.37109375, "learning_rate": 0.001994010493696155, "loss": 0.2689, "step": 4492 }, { "epoch": 0.007968233451154676, "grad_norm": 1.1796875, "learning_rate": 0.001994003636566109, "loss": 0.614, "step": 4494 }, { "epoch": 0.00797177961646449, "grad_norm": 0.53125, "learning_rate": 0.0019939967755262106, "loss": 0.4595, "step": 4496 }, { "epoch": 0.007975325781774307, "grad_norm": 0.44921875, "learning_rate": 0.001993989910576489, "loss": 0.3048, "step": 4498 }, { "epoch": 0.007978871947084121, "grad_norm": 0.484375, "learning_rate": 0.0019939830417169757, "loss": 0.3015, "step": 4500 }, { "epoch": 0.007982418112393936, "grad_norm": 0.9921875, "learning_rate": 0.0019939761689476993, "loss": 0.3926, "step": 4502 }, { "epoch": 0.007985964277703752, "grad_norm": 0.30859375, "learning_rate": 0.0019939692922686905, "loss": 0.2454, "step": 4504 }, { "epoch": 0.007989510443013567, "grad_norm": 2.28125, "learning_rate": 0.0019939624116799793, "loss": 0.2455, "step": 4506 }, { "epoch": 0.007993056608323381, "grad_norm": 0.306640625, "learning_rate": 0.001993955527181596, "loss": 0.2571, "step": 4508 }, { "epoch": 0.007996602773633198, "grad_norm": 0.8125, "learning_rate": 0.0019939486387735702, "loss": 0.4529, "step": 4510 }, { "epoch": 0.008000148938943012, "grad_norm": 0.265625, "learning_rate": 0.0019939417464559326, "loss": 0.2354, "step": 4512 }, { "epoch": 0.008003695104252827, "grad_norm": 0.3984375, "learning_rate": 0.001993934850228713, "loss": 0.2899, "step": 4514 }, { "epoch": 0.008007241269562643, "grad_norm": 0.314453125, "learning_rate": 0.0019939279500919417, "loss": 0.2149, "step": 4516 }, { "epoch": 0.008010787434872458, "grad_norm": 0.2734375, "learning_rate": 0.0019939210460456487, "loss": 0.2415, "step": 4518 }, { "epoch": 0.008014333600182272, "grad_norm": 0.29296875, "learning_rate": 0.001993914138089864, "loss": 0.3355, "step": 4520 }, { "epoch": 0.008017879765492088, "grad_norm": 1.1328125, "learning_rate": 0.001993907226224618, "loss": 0.3126, "step": 4522 }, { "epoch": 0.008021425930801903, "grad_norm": 0.69921875, "learning_rate": 0.0019939003104499416, "loss": 0.2445, "step": 4524 }, { "epoch": 0.008024972096111718, "grad_norm": 0.4375, "learning_rate": 0.0019938933907658646, "loss": 0.2574, "step": 4526 }, { "epoch": 0.008028518261421534, "grad_norm": 1.421875, "learning_rate": 0.0019938864671724165, "loss": 0.324, "step": 4528 }, { "epoch": 0.008032064426731348, "grad_norm": 0.38671875, "learning_rate": 0.001993879539669629, "loss": 0.2308, "step": 4530 }, { "epoch": 0.008035610592041165, "grad_norm": 2.21875, "learning_rate": 0.001993872608257531, "loss": 0.2725, "step": 4532 }, { "epoch": 0.00803915675735098, "grad_norm": 0.96484375, "learning_rate": 0.0019938656729361535, "loss": 0.325, "step": 4534 }, { "epoch": 0.008042702922660794, "grad_norm": 0.18359375, "learning_rate": 0.001993858733705527, "loss": 0.3296, "step": 4536 }, { "epoch": 0.00804624908797061, "grad_norm": 0.349609375, "learning_rate": 0.0019938517905656815, "loss": 0.3021, "step": 4538 }, { "epoch": 0.008049795253280425, "grad_norm": 0.35546875, "learning_rate": 0.0019938448435166474, "loss": 0.3478, "step": 4540 }, { "epoch": 0.00805334141859024, "grad_norm": 0.212890625, "learning_rate": 0.001993837892558455, "loss": 0.3006, "step": 4542 }, { "epoch": 0.008056887583900055, "grad_norm": 0.5703125, "learning_rate": 0.001993830937691135, "loss": 0.2103, "step": 4544 }, { "epoch": 0.00806043374920987, "grad_norm": 0.671875, "learning_rate": 0.001993823978914718, "loss": 0.2476, "step": 4546 }, { "epoch": 0.008063979914519685, "grad_norm": 0.4296875, "learning_rate": 0.001993817016229234, "loss": 0.2564, "step": 4548 }, { "epoch": 0.0080675260798295, "grad_norm": 0.396484375, "learning_rate": 0.001993810049634713, "loss": 0.5138, "step": 4550 }, { "epoch": 0.008071072245139315, "grad_norm": 0.359375, "learning_rate": 0.0019938030791311866, "loss": 0.2639, "step": 4552 }, { "epoch": 0.00807461841044913, "grad_norm": 0.490234375, "learning_rate": 0.0019937961047186846, "loss": 0.3402, "step": 4554 }, { "epoch": 0.008078164575758946, "grad_norm": 0.51171875, "learning_rate": 0.0019937891263972374, "loss": 0.3265, "step": 4556 }, { "epoch": 0.00808171074106876, "grad_norm": 1.140625, "learning_rate": 0.0019937821441668763, "loss": 0.3948, "step": 4558 }, { "epoch": 0.008085256906378575, "grad_norm": 0.34765625, "learning_rate": 0.001993775158027631, "loss": 0.2407, "step": 4560 }, { "epoch": 0.008088803071688392, "grad_norm": 0.55859375, "learning_rate": 0.0019937681679795317, "loss": 0.3222, "step": 4562 }, { "epoch": 0.008092349236998206, "grad_norm": 0.2451171875, "learning_rate": 0.0019937611740226103, "loss": 0.2497, "step": 4564 }, { "epoch": 0.008095895402308022, "grad_norm": 0.46875, "learning_rate": 0.0019937541761568963, "loss": 0.2194, "step": 4566 }, { "epoch": 0.008099441567617837, "grad_norm": 0.62109375, "learning_rate": 0.001993747174382421, "loss": 0.3039, "step": 4568 }, { "epoch": 0.008102987732927652, "grad_norm": 0.1806640625, "learning_rate": 0.0019937401686992147, "loss": 0.2621, "step": 4570 }, { "epoch": 0.008106533898237468, "grad_norm": 0.470703125, "learning_rate": 0.0019937331591073078, "loss": 0.2045, "step": 4572 }, { "epoch": 0.008110080063547282, "grad_norm": 0.30078125, "learning_rate": 0.0019937261456067315, "loss": 0.294, "step": 4574 }, { "epoch": 0.008113626228857097, "grad_norm": 0.2734375, "learning_rate": 0.001993719128197516, "loss": 0.2457, "step": 4576 }, { "epoch": 0.008117172394166913, "grad_norm": 0.1962890625, "learning_rate": 0.0019937121068796925, "loss": 0.2523, "step": 4578 }, { "epoch": 0.008120718559476728, "grad_norm": 0.34375, "learning_rate": 0.001993705081653291, "loss": 0.2384, "step": 4580 }, { "epoch": 0.008124264724786542, "grad_norm": 0.208984375, "learning_rate": 0.0019936980525183425, "loss": 0.3318, "step": 4582 }, { "epoch": 0.008127810890096359, "grad_norm": 0.73828125, "learning_rate": 0.001993691019474878, "loss": 0.3099, "step": 4584 }, { "epoch": 0.008131357055406173, "grad_norm": 0.2041015625, "learning_rate": 0.001993683982522928, "loss": 0.3007, "step": 4586 }, { "epoch": 0.008134903220715988, "grad_norm": 0.373046875, "learning_rate": 0.0019936769416625238, "loss": 0.2842, "step": 4588 }, { "epoch": 0.008138449386025804, "grad_norm": 0.388671875, "learning_rate": 0.001993669896893695, "loss": 0.2214, "step": 4590 }, { "epoch": 0.008141995551335619, "grad_norm": 0.50390625, "learning_rate": 0.001993662848216474, "loss": 0.2985, "step": 4592 }, { "epoch": 0.008145541716645433, "grad_norm": 2.015625, "learning_rate": 0.0019936557956308906, "loss": 0.404, "step": 4594 }, { "epoch": 0.00814908788195525, "grad_norm": 2.171875, "learning_rate": 0.0019936487391369754, "loss": 0.3872, "step": 4596 }, { "epoch": 0.008152634047265064, "grad_norm": 2.078125, "learning_rate": 0.00199364167873476, "loss": 0.501, "step": 4598 }, { "epoch": 0.00815618021257488, "grad_norm": 0.55859375, "learning_rate": 0.001993634614424275, "loss": 0.2852, "step": 4600 }, { "epoch": 0.008159726377884695, "grad_norm": 0.63671875, "learning_rate": 0.0019936275462055513, "loss": 0.2374, "step": 4602 }, { "epoch": 0.00816327254319451, "grad_norm": 0.458984375, "learning_rate": 0.0019936204740786194, "loss": 0.2571, "step": 4604 }, { "epoch": 0.008166818708504326, "grad_norm": 0.8515625, "learning_rate": 0.0019936133980435113, "loss": 0.2724, "step": 4606 }, { "epoch": 0.00817036487381414, "grad_norm": 0.439453125, "learning_rate": 0.0019936063181002564, "loss": 0.2986, "step": 4608 }, { "epoch": 0.008173911039123955, "grad_norm": 0.99609375, "learning_rate": 0.0019935992342488876, "loss": 0.2031, "step": 4610 }, { "epoch": 0.008177457204433771, "grad_norm": 0.4296875, "learning_rate": 0.001993592146489434, "loss": 0.2599, "step": 4612 }, { "epoch": 0.008181003369743586, "grad_norm": 0.345703125, "learning_rate": 0.001993585054821928, "loss": 0.2936, "step": 4614 }, { "epoch": 0.0081845495350534, "grad_norm": 0.60546875, "learning_rate": 0.0019935779592463996, "loss": 0.2948, "step": 4616 }, { "epoch": 0.008188095700363216, "grad_norm": 0.2333984375, "learning_rate": 0.0019935708597628803, "loss": 0.329, "step": 4618 }, { "epoch": 0.008191641865673031, "grad_norm": 0.5625, "learning_rate": 0.001993563756371401, "loss": 0.259, "step": 4620 }, { "epoch": 0.008195188030982846, "grad_norm": 0.3671875, "learning_rate": 0.0019935566490719933, "loss": 0.2434, "step": 4622 }, { "epoch": 0.008198734196292662, "grad_norm": 0.306640625, "learning_rate": 0.0019935495378646875, "loss": 0.2276, "step": 4624 }, { "epoch": 0.008202280361602476, "grad_norm": 0.490234375, "learning_rate": 0.001993542422749515, "loss": 0.2696, "step": 4626 }, { "epoch": 0.008205826526912291, "grad_norm": 0.35546875, "learning_rate": 0.0019935353037265073, "loss": 0.2543, "step": 4628 }, { "epoch": 0.008209372692222107, "grad_norm": 1.171875, "learning_rate": 0.001993528180795695, "loss": 0.328, "step": 4630 }, { "epoch": 0.008212918857531922, "grad_norm": 1.296875, "learning_rate": 0.0019935210539571094, "loss": 0.2815, "step": 4632 }, { "epoch": 0.008216465022841738, "grad_norm": 0.369140625, "learning_rate": 0.0019935139232107823, "loss": 0.2748, "step": 4634 }, { "epoch": 0.008220011188151553, "grad_norm": 0.2392578125, "learning_rate": 0.0019935067885567437, "loss": 0.2366, "step": 4636 }, { "epoch": 0.008223557353461367, "grad_norm": 0.50390625, "learning_rate": 0.0019934996499950254, "loss": 0.2969, "step": 4638 }, { "epoch": 0.008227103518771183, "grad_norm": 0.447265625, "learning_rate": 0.001993492507525659, "loss": 0.1919, "step": 4640 }, { "epoch": 0.008230649684080998, "grad_norm": 0.494140625, "learning_rate": 0.0019934853611486753, "loss": 0.4802, "step": 4642 }, { "epoch": 0.008234195849390813, "grad_norm": 0.4921875, "learning_rate": 0.001993478210864105, "loss": 0.28, "step": 4644 }, { "epoch": 0.008237742014700629, "grad_norm": 0.62109375, "learning_rate": 0.0019934710566719806, "loss": 0.3622, "step": 4646 }, { "epoch": 0.008241288180010443, "grad_norm": 0.37109375, "learning_rate": 0.001993463898572333, "loss": 0.2593, "step": 4648 }, { "epoch": 0.008244834345320258, "grad_norm": 0.60546875, "learning_rate": 0.0019934567365651927, "loss": 0.2329, "step": 4650 }, { "epoch": 0.008248380510630074, "grad_norm": 0.38671875, "learning_rate": 0.001993449570650592, "loss": 0.2304, "step": 4652 }, { "epoch": 0.008251926675939889, "grad_norm": 1.0703125, "learning_rate": 0.001993442400828562, "loss": 0.3452, "step": 4654 }, { "epoch": 0.008255472841249703, "grad_norm": 0.515625, "learning_rate": 0.0019934352270991333, "loss": 0.2684, "step": 4656 }, { "epoch": 0.00825901900655952, "grad_norm": 0.30078125, "learning_rate": 0.0019934280494623385, "loss": 0.2466, "step": 4658 }, { "epoch": 0.008262565171869334, "grad_norm": 1.2421875, "learning_rate": 0.001993420867918208, "loss": 0.3082, "step": 4660 }, { "epoch": 0.008266111337179149, "grad_norm": 0.26171875, "learning_rate": 0.0019934136824667735, "loss": 0.2719, "step": 4662 }, { "epoch": 0.008269657502488965, "grad_norm": 0.408203125, "learning_rate": 0.001993406493108067, "loss": 0.2737, "step": 4664 }, { "epoch": 0.00827320366779878, "grad_norm": 0.84765625, "learning_rate": 0.001993399299842119, "loss": 0.2495, "step": 4666 }, { "epoch": 0.008276749833108596, "grad_norm": 0.9453125, "learning_rate": 0.0019933921026689615, "loss": 0.4277, "step": 4668 }, { "epoch": 0.00828029599841841, "grad_norm": 0.302734375, "learning_rate": 0.001993384901588626, "loss": 0.2412, "step": 4670 }, { "epoch": 0.008283842163728225, "grad_norm": 0.37890625, "learning_rate": 0.001993377696601144, "loss": 0.2906, "step": 4672 }, { "epoch": 0.008287388329038041, "grad_norm": 0.82421875, "learning_rate": 0.001993370487706547, "loss": 0.2661, "step": 4674 }, { "epoch": 0.008290934494347856, "grad_norm": 0.5625, "learning_rate": 0.001993363274904866, "loss": 0.2185, "step": 4676 }, { "epoch": 0.00829448065965767, "grad_norm": 0.53515625, "learning_rate": 0.001993356058196133, "loss": 0.3298, "step": 4678 }, { "epoch": 0.008298026824967487, "grad_norm": 0.578125, "learning_rate": 0.00199334883758038, "loss": 0.2402, "step": 4680 }, { "epoch": 0.008301572990277301, "grad_norm": 0.953125, "learning_rate": 0.0019933416130576372, "loss": 0.3477, "step": 4682 }, { "epoch": 0.008305119155587116, "grad_norm": 0.8125, "learning_rate": 0.001993334384627938, "loss": 0.3677, "step": 4684 }, { "epoch": 0.008308665320896932, "grad_norm": 0.384765625, "learning_rate": 0.0019933271522913124, "loss": 0.2586, "step": 4686 }, { "epoch": 0.008312211486206747, "grad_norm": 1.8125, "learning_rate": 0.0019933199160477935, "loss": 0.4084, "step": 4688 }, { "epoch": 0.008315757651516561, "grad_norm": 0.4609375, "learning_rate": 0.001993312675897412, "loss": 0.336, "step": 4690 }, { "epoch": 0.008319303816826377, "grad_norm": 2.234375, "learning_rate": 0.0019933054318401994, "loss": 0.3933, "step": 4692 }, { "epoch": 0.008322849982136192, "grad_norm": 0.326171875, "learning_rate": 0.001993298183876188, "loss": 0.2626, "step": 4694 }, { "epoch": 0.008326396147446007, "grad_norm": 1.0078125, "learning_rate": 0.001993290932005409, "loss": 0.3769, "step": 4696 }, { "epoch": 0.008329942312755823, "grad_norm": 0.9140625, "learning_rate": 0.0019932836762278946, "loss": 0.5396, "step": 4698 }, { "epoch": 0.008333488478065637, "grad_norm": 0.490234375, "learning_rate": 0.001993276416543676, "loss": 0.3614, "step": 4700 }, { "epoch": 0.008337034643375454, "grad_norm": 0.58984375, "learning_rate": 0.001993269152952785, "loss": 0.3298, "step": 4702 }, { "epoch": 0.008340580808685268, "grad_norm": 0.5859375, "learning_rate": 0.0019932618854552543, "loss": 0.2188, "step": 4704 }, { "epoch": 0.008344126973995083, "grad_norm": 0.53515625, "learning_rate": 0.0019932546140511145, "loss": 0.3649, "step": 4706 }, { "epoch": 0.008347673139304899, "grad_norm": 0.466796875, "learning_rate": 0.0019932473387403982, "loss": 0.3457, "step": 4708 }, { "epoch": 0.008351219304614714, "grad_norm": 0.318359375, "learning_rate": 0.0019932400595231367, "loss": 0.2567, "step": 4710 }, { "epoch": 0.008354765469924528, "grad_norm": 0.2373046875, "learning_rate": 0.001993232776399362, "loss": 0.2158, "step": 4712 }, { "epoch": 0.008358311635234345, "grad_norm": 0.80859375, "learning_rate": 0.001993225489369106, "loss": 0.2842, "step": 4714 }, { "epoch": 0.008361857800544159, "grad_norm": 0.3984375, "learning_rate": 0.001993218198432401, "loss": 0.2714, "step": 4716 }, { "epoch": 0.008365403965853974, "grad_norm": 0.427734375, "learning_rate": 0.0019932109035892777, "loss": 0.3672, "step": 4718 }, { "epoch": 0.00836895013116379, "grad_norm": 0.353515625, "learning_rate": 0.0019932036048397692, "loss": 0.2279, "step": 4720 }, { "epoch": 0.008372496296473604, "grad_norm": 0.28515625, "learning_rate": 0.001993196302183907, "loss": 0.3203, "step": 4722 }, { "epoch": 0.008376042461783419, "grad_norm": 0.81640625, "learning_rate": 0.001993188995621723, "loss": 0.3099, "step": 4724 }, { "epoch": 0.008379588627093235, "grad_norm": 0.5546875, "learning_rate": 0.001993181685153249, "loss": 0.3665, "step": 4726 }, { "epoch": 0.00838313479240305, "grad_norm": 0.8203125, "learning_rate": 0.001993174370778517, "loss": 0.2998, "step": 4728 }, { "epoch": 0.008386680957712864, "grad_norm": 1.4453125, "learning_rate": 0.0019931670524975594, "loss": 0.2354, "step": 4730 }, { "epoch": 0.00839022712302268, "grad_norm": 1.3046875, "learning_rate": 0.0019931597303104076, "loss": 0.4875, "step": 4732 }, { "epoch": 0.008393773288332495, "grad_norm": 0.412109375, "learning_rate": 0.0019931524042170945, "loss": 0.3313, "step": 4734 }, { "epoch": 0.008397319453642312, "grad_norm": 1.0078125, "learning_rate": 0.001993145074217651, "loss": 0.4375, "step": 4736 }, { "epoch": 0.008400865618952126, "grad_norm": 0.328125, "learning_rate": 0.00199313774031211, "loss": 0.2734, "step": 4738 }, { "epoch": 0.00840441178426194, "grad_norm": 0.640625, "learning_rate": 0.001993130402500503, "loss": 0.3512, "step": 4740 }, { "epoch": 0.008407957949571757, "grad_norm": 0.234375, "learning_rate": 0.001993123060782863, "loss": 0.3138, "step": 4742 }, { "epoch": 0.008411504114881571, "grad_norm": 0.294921875, "learning_rate": 0.0019931157151592215, "loss": 0.2789, "step": 4744 }, { "epoch": 0.008415050280191386, "grad_norm": 0.50390625, "learning_rate": 0.0019931083656296103, "loss": 0.2437, "step": 4746 }, { "epoch": 0.008418596445501202, "grad_norm": 2.90625, "learning_rate": 0.001993101012194062, "loss": 0.4977, "step": 4748 }, { "epoch": 0.008422142610811017, "grad_norm": 0.490234375, "learning_rate": 0.0019930936548526084, "loss": 0.315, "step": 4750 }, { "epoch": 0.008425688776120831, "grad_norm": 0.60546875, "learning_rate": 0.0019930862936052827, "loss": 0.2915, "step": 4752 }, { "epoch": 0.008429234941430648, "grad_norm": 0.984375, "learning_rate": 0.0019930789284521152, "loss": 0.2825, "step": 4754 }, { "epoch": 0.008432781106740462, "grad_norm": 0.326171875, "learning_rate": 0.0019930715593931402, "loss": 0.2295, "step": 4756 }, { "epoch": 0.008436327272050277, "grad_norm": 0.478515625, "learning_rate": 0.0019930641864283885, "loss": 0.3066, "step": 4758 }, { "epoch": 0.008439873437360093, "grad_norm": 4.25, "learning_rate": 0.001993056809557893, "loss": 0.6235, "step": 4760 }, { "epoch": 0.008443419602669908, "grad_norm": 0.2255859375, "learning_rate": 0.0019930494287816853, "loss": 0.1938, "step": 4762 }, { "epoch": 0.008446965767979722, "grad_norm": 0.390625, "learning_rate": 0.001993042044099799, "loss": 0.2948, "step": 4764 }, { "epoch": 0.008450511933289538, "grad_norm": 0.59375, "learning_rate": 0.0019930346555122646, "loss": 0.2259, "step": 4766 }, { "epoch": 0.008454058098599353, "grad_norm": 0.44921875, "learning_rate": 0.0019930272630191157, "loss": 0.2892, "step": 4768 }, { "epoch": 0.00845760426390917, "grad_norm": 0.84375, "learning_rate": 0.0019930198666203843, "loss": 0.3746, "step": 4770 }, { "epoch": 0.008461150429218984, "grad_norm": 0.498046875, "learning_rate": 0.0019930124663161027, "loss": 0.2798, "step": 4772 }, { "epoch": 0.008464696594528798, "grad_norm": 3.15625, "learning_rate": 0.0019930050621063036, "loss": 0.358, "step": 4774 }, { "epoch": 0.008468242759838615, "grad_norm": 0.333984375, "learning_rate": 0.0019929976539910187, "loss": 0.235, "step": 4776 }, { "epoch": 0.00847178892514843, "grad_norm": 0.26953125, "learning_rate": 0.0019929902419702807, "loss": 0.223, "step": 4778 }, { "epoch": 0.008475335090458244, "grad_norm": 0.53515625, "learning_rate": 0.0019929828260441223, "loss": 0.3083, "step": 4780 }, { "epoch": 0.00847888125576806, "grad_norm": 0.38671875, "learning_rate": 0.001992975406212576, "loss": 0.1993, "step": 4782 }, { "epoch": 0.008482427421077875, "grad_norm": 0.462890625, "learning_rate": 0.0019929679824756733, "loss": 0.2866, "step": 4784 }, { "epoch": 0.00848597358638769, "grad_norm": 2.875, "learning_rate": 0.001992960554833448, "loss": 0.3772, "step": 4786 }, { "epoch": 0.008489519751697506, "grad_norm": 0.890625, "learning_rate": 0.001992953123285932, "loss": 0.391, "step": 4788 }, { "epoch": 0.00849306591700732, "grad_norm": 0.2412109375, "learning_rate": 0.001992945687833157, "loss": 0.4167, "step": 4790 }, { "epoch": 0.008496612082317135, "grad_norm": 0.484375, "learning_rate": 0.001992938248475157, "loss": 0.4974, "step": 4792 }, { "epoch": 0.008500158247626951, "grad_norm": 1.2578125, "learning_rate": 0.001992930805211963, "loss": 0.3321, "step": 4794 }, { "epoch": 0.008503704412936765, "grad_norm": 0.3984375, "learning_rate": 0.0019929233580436093, "loss": 0.2286, "step": 4796 }, { "epoch": 0.00850725057824658, "grad_norm": 0.515625, "learning_rate": 0.0019929159069701267, "loss": 0.3003, "step": 4798 }, { "epoch": 0.008510796743556396, "grad_norm": 0.388671875, "learning_rate": 0.001992908451991549, "loss": 0.2634, "step": 4800 }, { "epoch": 0.00851434290886621, "grad_norm": 1.0390625, "learning_rate": 0.001992900993107908, "loss": 0.3445, "step": 4802 }, { "epoch": 0.008517889074176027, "grad_norm": 1.1640625, "learning_rate": 0.0019928935303192372, "loss": 0.2538, "step": 4804 }, { "epoch": 0.008521435239485842, "grad_norm": 1.078125, "learning_rate": 0.0019928860636255685, "loss": 0.2472, "step": 4806 }, { "epoch": 0.008524981404795656, "grad_norm": 0.486328125, "learning_rate": 0.001992878593026935, "loss": 0.2252, "step": 4808 }, { "epoch": 0.008528527570105473, "grad_norm": 0.46484375, "learning_rate": 0.001992871118523369, "loss": 0.2489, "step": 4810 }, { "epoch": 0.008532073735415287, "grad_norm": 0.40625, "learning_rate": 0.001992863640114903, "loss": 0.3192, "step": 4812 }, { "epoch": 0.008535619900725102, "grad_norm": 0.357421875, "learning_rate": 0.0019928561578015707, "loss": 0.2293, "step": 4814 }, { "epoch": 0.008539166066034918, "grad_norm": 0.32421875, "learning_rate": 0.001992848671583404, "loss": 0.2109, "step": 4816 }, { "epoch": 0.008542712231344732, "grad_norm": 0.52734375, "learning_rate": 0.0019928411814604356, "loss": 0.2383, "step": 4818 }, { "epoch": 0.008546258396654547, "grad_norm": 0.416015625, "learning_rate": 0.0019928336874326987, "loss": 0.2808, "step": 4820 }, { "epoch": 0.008549804561964363, "grad_norm": 2.15625, "learning_rate": 0.0019928261895002263, "loss": 0.3658, "step": 4822 }, { "epoch": 0.008553350727274178, "grad_norm": 0.796875, "learning_rate": 0.00199281868766305, "loss": 0.3208, "step": 4824 }, { "epoch": 0.008556896892583992, "grad_norm": 0.7734375, "learning_rate": 0.001992811181921204, "loss": 0.3594, "step": 4826 }, { "epoch": 0.008560443057893809, "grad_norm": 0.5078125, "learning_rate": 0.00199280367227472, "loss": 0.2765, "step": 4828 }, { "epoch": 0.008563989223203623, "grad_norm": 0.62109375, "learning_rate": 0.0019927961587236313, "loss": 0.2422, "step": 4830 }, { "epoch": 0.008567535388513438, "grad_norm": 1.7734375, "learning_rate": 0.001992788641267971, "loss": 0.4287, "step": 4832 }, { "epoch": 0.008571081553823254, "grad_norm": 1.0234375, "learning_rate": 0.001992781119907772, "loss": 0.3503, "step": 4834 }, { "epoch": 0.008574627719133069, "grad_norm": 0.38671875, "learning_rate": 0.0019927735946430668, "loss": 0.22, "step": 4836 }, { "epoch": 0.008578173884442885, "grad_norm": 0.421875, "learning_rate": 0.0019927660654738884, "loss": 0.2637, "step": 4838 }, { "epoch": 0.0085817200497527, "grad_norm": 0.40234375, "learning_rate": 0.00199275853240027, "loss": 0.3468, "step": 4840 }, { "epoch": 0.008585266215062514, "grad_norm": 0.53125, "learning_rate": 0.001992750995422244, "loss": 0.3158, "step": 4842 }, { "epoch": 0.00858881238037233, "grad_norm": 2.578125, "learning_rate": 0.001992743454539844, "loss": 0.2404, "step": 4844 }, { "epoch": 0.008592358545682145, "grad_norm": 0.470703125, "learning_rate": 0.001992735909753103, "loss": 0.3253, "step": 4846 }, { "epoch": 0.00859590471099196, "grad_norm": 1.2578125, "learning_rate": 0.001992728361062053, "loss": 0.2342, "step": 4848 }, { "epoch": 0.008599450876301776, "grad_norm": 0.640625, "learning_rate": 0.001992720808466728, "loss": 0.2632, "step": 4850 }, { "epoch": 0.00860299704161159, "grad_norm": 0.515625, "learning_rate": 0.001992713251967161, "loss": 0.2621, "step": 4852 }, { "epoch": 0.008606543206921405, "grad_norm": 2.96875, "learning_rate": 0.0019927056915633842, "loss": 0.3666, "step": 4854 }, { "epoch": 0.008610089372231221, "grad_norm": 1.5703125, "learning_rate": 0.0019926981272554317, "loss": 0.4931, "step": 4856 }, { "epoch": 0.008613635537541036, "grad_norm": 0.33984375, "learning_rate": 0.001992690559043336, "loss": 0.2825, "step": 4858 }, { "epoch": 0.00861718170285085, "grad_norm": 0.29296875, "learning_rate": 0.0019926829869271307, "loss": 0.2756, "step": 4860 }, { "epoch": 0.008620727868160667, "grad_norm": 2.09375, "learning_rate": 0.0019926754109068482, "loss": 0.273, "step": 4862 }, { "epoch": 0.008624274033470481, "grad_norm": 0.345703125, "learning_rate": 0.001992667830982522, "loss": 0.2479, "step": 4864 }, { "epoch": 0.008627820198780296, "grad_norm": 1.0703125, "learning_rate": 0.001992660247154185, "loss": 0.2527, "step": 4866 }, { "epoch": 0.008631366364090112, "grad_norm": 0.373046875, "learning_rate": 0.001992652659421871, "loss": 0.2085, "step": 4868 }, { "epoch": 0.008634912529399926, "grad_norm": 1.0703125, "learning_rate": 0.0019926450677856125, "loss": 0.4311, "step": 4870 }, { "epoch": 0.008638458694709743, "grad_norm": 0.609375, "learning_rate": 0.001992637472245443, "loss": 0.4236, "step": 4872 }, { "epoch": 0.008642004860019557, "grad_norm": 0.384765625, "learning_rate": 0.001992629872801396, "loss": 0.2683, "step": 4874 }, { "epoch": 0.008645551025329372, "grad_norm": 4.21875, "learning_rate": 0.001992622269453504, "loss": 0.3981, "step": 4876 }, { "epoch": 0.008649097190639188, "grad_norm": 1.0859375, "learning_rate": 0.001992614662201801, "loss": 0.2772, "step": 4878 }, { "epoch": 0.008652643355949003, "grad_norm": 0.2138671875, "learning_rate": 0.00199260705104632, "loss": 0.2528, "step": 4880 }, { "epoch": 0.008656189521258817, "grad_norm": 0.4296875, "learning_rate": 0.001992599435987094, "loss": 0.2588, "step": 4882 }, { "epoch": 0.008659735686568634, "grad_norm": 0.69140625, "learning_rate": 0.0019925918170241573, "loss": 0.2687, "step": 4884 }, { "epoch": 0.008663281851878448, "grad_norm": 0.92578125, "learning_rate": 0.0019925841941575415, "loss": 0.297, "step": 4886 }, { "epoch": 0.008666828017188263, "grad_norm": 0.349609375, "learning_rate": 0.001992576567387281, "loss": 0.2267, "step": 4888 }, { "epoch": 0.008670374182498079, "grad_norm": 0.287109375, "learning_rate": 0.0019925689367134096, "loss": 0.2558, "step": 4890 }, { "epoch": 0.008673920347807893, "grad_norm": 0.330078125, "learning_rate": 0.00199256130213596, "loss": 0.3102, "step": 4892 }, { "epoch": 0.008677466513117708, "grad_norm": 2.734375, "learning_rate": 0.0019925536636549654, "loss": 0.3242, "step": 4894 }, { "epoch": 0.008681012678427524, "grad_norm": 2.796875, "learning_rate": 0.0019925460212704598, "loss": 0.4111, "step": 4896 }, { "epoch": 0.008684558843737339, "grad_norm": 0.53125, "learning_rate": 0.0019925383749824764, "loss": 0.3217, "step": 4898 }, { "epoch": 0.008688105009047153, "grad_norm": 0.9609375, "learning_rate": 0.001992530724791048, "loss": 0.2641, "step": 4900 }, { "epoch": 0.00869165117435697, "grad_norm": 0.625, "learning_rate": 0.0019925230706962093, "loss": 0.3279, "step": 4902 }, { "epoch": 0.008695197339666784, "grad_norm": 0.21875, "learning_rate": 0.0019925154126979932, "loss": 0.2284, "step": 4904 }, { "epoch": 0.0086987435049766, "grad_norm": 2.6875, "learning_rate": 0.0019925077507964325, "loss": 0.5094, "step": 4906 }, { "epoch": 0.008702289670286415, "grad_norm": 0.49609375, "learning_rate": 0.001992500084991562, "loss": 0.2678, "step": 4908 }, { "epoch": 0.00870583583559623, "grad_norm": 0.318359375, "learning_rate": 0.001992492415283414, "loss": 0.2485, "step": 4910 }, { "epoch": 0.008709382000906046, "grad_norm": 1.1484375, "learning_rate": 0.001992484741672023, "loss": 0.2542, "step": 4912 }, { "epoch": 0.00871292816621586, "grad_norm": 0.29296875, "learning_rate": 0.0019924770641574223, "loss": 0.2881, "step": 4914 }, { "epoch": 0.008716474331525675, "grad_norm": 0.45703125, "learning_rate": 0.001992469382739645, "loss": 0.305, "step": 4916 }, { "epoch": 0.008720020496835491, "grad_norm": 0.30078125, "learning_rate": 0.0019924616974187253, "loss": 0.2519, "step": 4918 }, { "epoch": 0.008723566662145306, "grad_norm": 0.4609375, "learning_rate": 0.0019924540081946956, "loss": 0.3043, "step": 4920 }, { "epoch": 0.00872711282745512, "grad_norm": 0.859375, "learning_rate": 0.0019924463150675915, "loss": 0.2178, "step": 4922 }, { "epoch": 0.008730658992764937, "grad_norm": 0.392578125, "learning_rate": 0.001992438618037445, "loss": 0.2876, "step": 4924 }, { "epoch": 0.008734205158074751, "grad_norm": 0.91015625, "learning_rate": 0.001992430917104291, "loss": 0.2921, "step": 4926 }, { "epoch": 0.008737751323384566, "grad_norm": 8.375, "learning_rate": 0.0019924232122681624, "loss": 0.3517, "step": 4928 }, { "epoch": 0.008741297488694382, "grad_norm": 0.59375, "learning_rate": 0.0019924155035290925, "loss": 0.2663, "step": 4930 }, { "epoch": 0.008744843654004197, "grad_norm": 2.828125, "learning_rate": 0.001992407790887116, "loss": 0.2653, "step": 4932 }, { "epoch": 0.008748389819314011, "grad_norm": 0.59765625, "learning_rate": 0.001992400074342266, "loss": 0.3318, "step": 4934 }, { "epoch": 0.008751935984623828, "grad_norm": 0.4609375, "learning_rate": 0.0019923923538945764, "loss": 0.242, "step": 4936 }, { "epoch": 0.008755482149933642, "grad_norm": 0.33203125, "learning_rate": 0.001992384629544081, "loss": 0.2559, "step": 4938 }, { "epoch": 0.008759028315243458, "grad_norm": 0.671875, "learning_rate": 0.0019923769012908142, "loss": 0.2524, "step": 4940 }, { "epoch": 0.008762574480553273, "grad_norm": 0.61328125, "learning_rate": 0.001992369169134809, "loss": 0.297, "step": 4942 }, { "epoch": 0.008766120645863087, "grad_norm": 0.50390625, "learning_rate": 0.0019923614330760986, "loss": 0.3369, "step": 4944 }, { "epoch": 0.008769666811172904, "grad_norm": 1.0078125, "learning_rate": 0.001992353693114719, "loss": 0.2508, "step": 4946 }, { "epoch": 0.008773212976482718, "grad_norm": 1.96875, "learning_rate": 0.0019923459492507014, "loss": 0.4605, "step": 4948 }, { "epoch": 0.008776759141792533, "grad_norm": 0.72265625, "learning_rate": 0.0019923382014840818, "loss": 0.2632, "step": 4950 }, { "epoch": 0.00878030530710235, "grad_norm": 0.306640625, "learning_rate": 0.001992330449814893, "loss": 0.2353, "step": 4952 }, { "epoch": 0.008783851472412164, "grad_norm": 0.302734375, "learning_rate": 0.0019923226942431685, "loss": 0.197, "step": 4954 }, { "epoch": 0.008787397637721978, "grad_norm": 0.3203125, "learning_rate": 0.0019923149347689435, "loss": 0.2633, "step": 4956 }, { "epoch": 0.008790943803031795, "grad_norm": 0.478515625, "learning_rate": 0.001992307171392251, "loss": 0.237, "step": 4958 }, { "epoch": 0.008794489968341609, "grad_norm": 0.9765625, "learning_rate": 0.0019922994041131257, "loss": 0.3999, "step": 4960 }, { "epoch": 0.008798036133651424, "grad_norm": 0.431640625, "learning_rate": 0.0019922916329316006, "loss": 0.2326, "step": 4962 }, { "epoch": 0.00880158229896124, "grad_norm": 0.79296875, "learning_rate": 0.0019922838578477105, "loss": 0.3141, "step": 4964 }, { "epoch": 0.008805128464271055, "grad_norm": 0.5234375, "learning_rate": 0.0019922760788614892, "loss": 0.2723, "step": 4966 }, { "epoch": 0.008808674629580869, "grad_norm": 1.2265625, "learning_rate": 0.0019922682959729703, "loss": 0.2961, "step": 4968 }, { "epoch": 0.008812220794890685, "grad_norm": 0.60546875, "learning_rate": 0.001992260509182188, "loss": 0.3318, "step": 4970 }, { "epoch": 0.0088157669602005, "grad_norm": 0.6328125, "learning_rate": 0.0019922527184891774, "loss": 0.253, "step": 4972 }, { "epoch": 0.008819313125510316, "grad_norm": 0.265625, "learning_rate": 0.0019922449238939707, "loss": 0.2028, "step": 4974 }, { "epoch": 0.00882285929082013, "grad_norm": 2.640625, "learning_rate": 0.0019922371253966037, "loss": 0.3068, "step": 4976 }, { "epoch": 0.008826405456129945, "grad_norm": 0.37890625, "learning_rate": 0.0019922293229971094, "loss": 0.3086, "step": 4978 }, { "epoch": 0.008829951621439762, "grad_norm": 3.3125, "learning_rate": 0.0019922215166955225, "loss": 0.4288, "step": 4980 }, { "epoch": 0.008833497786749576, "grad_norm": 0.28515625, "learning_rate": 0.0019922137064918768, "loss": 0.2756, "step": 4982 }, { "epoch": 0.00883704395205939, "grad_norm": 0.373046875, "learning_rate": 0.001992205892386207, "loss": 0.222, "step": 4984 }, { "epoch": 0.008840590117369207, "grad_norm": 0.39453125, "learning_rate": 0.001992198074378546, "loss": 0.1995, "step": 4986 }, { "epoch": 0.008844136282679022, "grad_norm": 1.453125, "learning_rate": 0.00199219025246893, "loss": 0.4126, "step": 4988 }, { "epoch": 0.008847682447988836, "grad_norm": 0.419921875, "learning_rate": 0.001992182426657391, "loss": 0.2793, "step": 4990 }, { "epoch": 0.008851228613298652, "grad_norm": 0.66796875, "learning_rate": 0.001992174596943965, "loss": 0.214, "step": 4992 }, { "epoch": 0.008854774778608467, "grad_norm": 0.498046875, "learning_rate": 0.0019921667633286855, "loss": 0.2751, "step": 4994 }, { "epoch": 0.008858320943918281, "grad_norm": 0.28515625, "learning_rate": 0.0019921589258115866, "loss": 0.2328, "step": 4996 }, { "epoch": 0.008861867109228098, "grad_norm": 0.2470703125, "learning_rate": 0.001992151084392703, "loss": 0.2115, "step": 4998 }, { "epoch": 0.008865413274537912, "grad_norm": 1.78125, "learning_rate": 0.0019921432390720686, "loss": 0.355, "step": 5000 }, { "epoch": 0.008868959439847727, "grad_norm": 0.2265625, "learning_rate": 0.001992135389849718, "loss": 0.2089, "step": 5002 }, { "epoch": 0.008872505605157543, "grad_norm": 0.55859375, "learning_rate": 0.001992127536725685, "loss": 0.2244, "step": 5004 }, { "epoch": 0.008876051770467358, "grad_norm": 0.40625, "learning_rate": 0.0019921196797000047, "loss": 0.3244, "step": 5006 }, { "epoch": 0.008879597935777174, "grad_norm": 0.46484375, "learning_rate": 0.0019921118187727115, "loss": 0.2634, "step": 5008 }, { "epoch": 0.008883144101086989, "grad_norm": 0.2158203125, "learning_rate": 0.001992103953943839, "loss": 0.2841, "step": 5010 }, { "epoch": 0.008886690266396803, "grad_norm": 0.328125, "learning_rate": 0.001992096085213422, "loss": 0.2345, "step": 5012 }, { "epoch": 0.00889023643170662, "grad_norm": 0.310546875, "learning_rate": 0.001992088212581495, "loss": 0.3092, "step": 5014 }, { "epoch": 0.008893782597016434, "grad_norm": 0.36328125, "learning_rate": 0.0019920803360480924, "loss": 0.2867, "step": 5016 }, { "epoch": 0.008897328762326248, "grad_norm": 0.423828125, "learning_rate": 0.001992072455613248, "loss": 0.2624, "step": 5018 }, { "epoch": 0.008900874927636065, "grad_norm": 0.392578125, "learning_rate": 0.001992064571276998, "loss": 0.278, "step": 5020 }, { "epoch": 0.00890442109294588, "grad_norm": 5.8125, "learning_rate": 0.001992056683039375, "loss": 0.5674, "step": 5022 }, { "epoch": 0.008907967258255694, "grad_norm": 0.69921875, "learning_rate": 0.0019920487909004143, "loss": 0.3603, "step": 5024 }, { "epoch": 0.00891151342356551, "grad_norm": 0.61328125, "learning_rate": 0.0019920408948601504, "loss": 0.2735, "step": 5026 }, { "epoch": 0.008915059588875325, "grad_norm": 0.37890625, "learning_rate": 0.0019920329949186175, "loss": 0.2716, "step": 5028 }, { "epoch": 0.00891860575418514, "grad_norm": 0.953125, "learning_rate": 0.0019920250910758506, "loss": 0.2975, "step": 5030 }, { "epoch": 0.008922151919494956, "grad_norm": 0.48046875, "learning_rate": 0.001992017183331884, "loss": 0.2184, "step": 5032 }, { "epoch": 0.00892569808480477, "grad_norm": 0.546875, "learning_rate": 0.001992009271686753, "loss": 0.2205, "step": 5034 }, { "epoch": 0.008929244250114585, "grad_norm": 0.62109375, "learning_rate": 0.001992001356140491, "loss": 0.2986, "step": 5036 }, { "epoch": 0.008932790415424401, "grad_norm": 0.30078125, "learning_rate": 0.0019919934366931335, "loss": 0.3347, "step": 5038 }, { "epoch": 0.008936336580734216, "grad_norm": 0.30078125, "learning_rate": 0.0019919855133447148, "loss": 0.2387, "step": 5040 }, { "epoch": 0.008939882746044032, "grad_norm": 0.404296875, "learning_rate": 0.0019919775860952693, "loss": 0.3003, "step": 5042 }, { "epoch": 0.008943428911353846, "grad_norm": 0.5234375, "learning_rate": 0.001991969654944832, "loss": 0.2495, "step": 5044 }, { "epoch": 0.008946975076663661, "grad_norm": 0.423828125, "learning_rate": 0.001991961719893438, "loss": 0.2789, "step": 5046 }, { "epoch": 0.008950521241973477, "grad_norm": 0.40234375, "learning_rate": 0.001991953780941121, "loss": 0.2735, "step": 5048 }, { "epoch": 0.008954067407283292, "grad_norm": 0.474609375, "learning_rate": 0.001991945838087917, "loss": 0.3177, "step": 5050 }, { "epoch": 0.008957613572593106, "grad_norm": 0.259765625, "learning_rate": 0.001991937891333859, "loss": 0.2191, "step": 5052 }, { "epoch": 0.008961159737902923, "grad_norm": 0.421875, "learning_rate": 0.001991929940678984, "loss": 0.2816, "step": 5054 }, { "epoch": 0.008964705903212737, "grad_norm": 0.62890625, "learning_rate": 0.0019919219861233243, "loss": 0.4708, "step": 5056 }, { "epoch": 0.008968252068522552, "grad_norm": 0.5546875, "learning_rate": 0.0019919140276669165, "loss": 0.325, "step": 5058 }, { "epoch": 0.008971798233832368, "grad_norm": 0.6953125, "learning_rate": 0.001991906065309795, "loss": 0.2673, "step": 5060 }, { "epoch": 0.008975344399142183, "grad_norm": 0.404296875, "learning_rate": 0.0019918980990519942, "loss": 0.2394, "step": 5062 }, { "epoch": 0.008978890564451997, "grad_norm": 1.2734375, "learning_rate": 0.001991890128893549, "loss": 0.3693, "step": 5064 }, { "epoch": 0.008982436729761813, "grad_norm": 0.44140625, "learning_rate": 0.0019918821548344946, "loss": 0.265, "step": 5066 }, { "epoch": 0.008985982895071628, "grad_norm": 1.140625, "learning_rate": 0.0019918741768748657, "loss": 0.2911, "step": 5068 }, { "epoch": 0.008989529060381442, "grad_norm": 0.703125, "learning_rate": 0.0019918661950146977, "loss": 0.2495, "step": 5070 }, { "epoch": 0.008993075225691259, "grad_norm": 0.57421875, "learning_rate": 0.0019918582092540247, "loss": 0.2729, "step": 5072 }, { "epoch": 0.008996621391001073, "grad_norm": 0.443359375, "learning_rate": 0.0019918502195928815, "loss": 0.2481, "step": 5074 }, { "epoch": 0.00900016755631089, "grad_norm": 0.400390625, "learning_rate": 0.001991842226031304, "loss": 0.2532, "step": 5076 }, { "epoch": 0.009003713721620704, "grad_norm": 0.515625, "learning_rate": 0.0019918342285693267, "loss": 0.4735, "step": 5078 }, { "epoch": 0.009007259886930519, "grad_norm": 0.33984375, "learning_rate": 0.001991826227206984, "loss": 0.1499, "step": 5080 }, { "epoch": 0.009010806052240335, "grad_norm": 1.453125, "learning_rate": 0.001991818221944312, "loss": 0.4161, "step": 5082 }, { "epoch": 0.00901435221755015, "grad_norm": 0.486328125, "learning_rate": 0.0019918102127813447, "loss": 0.3267, "step": 5084 }, { "epoch": 0.009017898382859964, "grad_norm": 0.41015625, "learning_rate": 0.001991802199718118, "loss": 0.3666, "step": 5086 }, { "epoch": 0.00902144454816978, "grad_norm": 0.4296875, "learning_rate": 0.0019917941827546663, "loss": 0.2611, "step": 5088 }, { "epoch": 0.009024990713479595, "grad_norm": 0.44921875, "learning_rate": 0.0019917861618910246, "loss": 0.2883, "step": 5090 }, { "epoch": 0.00902853687878941, "grad_norm": 1.421875, "learning_rate": 0.0019917781371272284, "loss": 0.4393, "step": 5092 }, { "epoch": 0.009032083044099226, "grad_norm": 0.216796875, "learning_rate": 0.001991770108463313, "loss": 0.2813, "step": 5094 }, { "epoch": 0.00903562920940904, "grad_norm": 0.4921875, "learning_rate": 0.0019917620758993127, "loss": 0.2903, "step": 5096 }, { "epoch": 0.009039175374718855, "grad_norm": 0.734375, "learning_rate": 0.0019917540394352633, "loss": 0.34, "step": 5098 }, { "epoch": 0.009042721540028671, "grad_norm": 0.5625, "learning_rate": 0.0019917459990711995, "loss": 0.2248, "step": 5100 }, { "epoch": 0.009046267705338486, "grad_norm": 0.279296875, "learning_rate": 0.001991737954807157, "loss": 0.3489, "step": 5102 }, { "epoch": 0.0090498138706483, "grad_norm": 1.03125, "learning_rate": 0.0019917299066431705, "loss": 0.1884, "step": 5104 }, { "epoch": 0.009053360035958117, "grad_norm": 0.609375, "learning_rate": 0.001991721854579275, "loss": 0.234, "step": 5106 }, { "epoch": 0.009056906201267931, "grad_norm": 0.46484375, "learning_rate": 0.0019917137986155066, "loss": 0.3182, "step": 5108 }, { "epoch": 0.009060452366577747, "grad_norm": 1.515625, "learning_rate": 0.0019917057387519, "loss": 0.4011, "step": 5110 }, { "epoch": 0.009063998531887562, "grad_norm": 0.294921875, "learning_rate": 0.0019916976749884898, "loss": 0.3163, "step": 5112 }, { "epoch": 0.009067544697197377, "grad_norm": 0.478515625, "learning_rate": 0.001991689607325313, "loss": 0.3905, "step": 5114 }, { "epoch": 0.009071090862507193, "grad_norm": 0.765625, "learning_rate": 0.001991681535762403, "loss": 0.2781, "step": 5116 }, { "epoch": 0.009074637027817007, "grad_norm": 0.37109375, "learning_rate": 0.0019916734602997963, "loss": 0.3354, "step": 5118 }, { "epoch": 0.009078183193126822, "grad_norm": 0.357421875, "learning_rate": 0.0019916653809375273, "loss": 0.2352, "step": 5120 }, { "epoch": 0.009081729358436638, "grad_norm": 0.63671875, "learning_rate": 0.0019916572976756324, "loss": 0.2426, "step": 5122 }, { "epoch": 0.009085275523746453, "grad_norm": 1.484375, "learning_rate": 0.0019916492105141463, "loss": 0.4047, "step": 5124 }, { "epoch": 0.009088821689056267, "grad_norm": 0.53515625, "learning_rate": 0.0019916411194531043, "loss": 0.2283, "step": 5126 }, { "epoch": 0.009092367854366084, "grad_norm": 0.953125, "learning_rate": 0.001991633024492542, "loss": 0.3103, "step": 5128 }, { "epoch": 0.009095914019675898, "grad_norm": 0.796875, "learning_rate": 0.001991624925632495, "loss": 0.2451, "step": 5130 }, { "epoch": 0.009099460184985713, "grad_norm": 0.416015625, "learning_rate": 0.0019916168228729983, "loss": 0.3287, "step": 5132 }, { "epoch": 0.009103006350295529, "grad_norm": 2.125, "learning_rate": 0.001991608716214088, "loss": 0.3232, "step": 5134 }, { "epoch": 0.009106552515605344, "grad_norm": 0.69140625, "learning_rate": 0.0019916006056557986, "loss": 0.1628, "step": 5136 }, { "epoch": 0.009110098680915158, "grad_norm": 0.5703125, "learning_rate": 0.001991592491198166, "loss": 0.3016, "step": 5138 }, { "epoch": 0.009113644846224974, "grad_norm": 0.470703125, "learning_rate": 0.001991584372841226, "loss": 0.2512, "step": 5140 }, { "epoch": 0.009117191011534789, "grad_norm": 6.65625, "learning_rate": 0.001991576250585014, "loss": 0.3315, "step": 5142 }, { "epoch": 0.009120737176844605, "grad_norm": 0.578125, "learning_rate": 0.0019915681244295647, "loss": 0.2593, "step": 5144 }, { "epoch": 0.00912428334215442, "grad_norm": 1.2421875, "learning_rate": 0.001991559994374915, "loss": 0.4136, "step": 5146 }, { "epoch": 0.009127829507464234, "grad_norm": 0.30859375, "learning_rate": 0.001991551860421099, "loss": 0.2256, "step": 5148 }, { "epoch": 0.00913137567277405, "grad_norm": 0.5546875, "learning_rate": 0.001991543722568154, "loss": 0.2684, "step": 5150 }, { "epoch": 0.009134921838083865, "grad_norm": 0.6640625, "learning_rate": 0.001991535580816114, "loss": 0.2399, "step": 5152 }, { "epoch": 0.00913846800339368, "grad_norm": 1.171875, "learning_rate": 0.001991527435165015, "loss": 0.2677, "step": 5154 }, { "epoch": 0.009142014168703496, "grad_norm": 2.421875, "learning_rate": 0.0019915192856148935, "loss": 0.4014, "step": 5156 }, { "epoch": 0.00914556033401331, "grad_norm": 0.4765625, "learning_rate": 0.0019915111321657845, "loss": 0.2408, "step": 5158 }, { "epoch": 0.009149106499323125, "grad_norm": 0.451171875, "learning_rate": 0.0019915029748177235, "loss": 0.5697, "step": 5160 }, { "epoch": 0.009152652664632941, "grad_norm": 1.03125, "learning_rate": 0.001991494813570746, "loss": 0.3591, "step": 5162 }, { "epoch": 0.009156198829942756, "grad_norm": 1.7265625, "learning_rate": 0.001991486648424888, "loss": 0.3965, "step": 5164 }, { "epoch": 0.00915974499525257, "grad_norm": 0.41796875, "learning_rate": 0.001991478479380186, "loss": 0.227, "step": 5166 }, { "epoch": 0.009163291160562387, "grad_norm": 0.484375, "learning_rate": 0.001991470306436674, "loss": 0.2379, "step": 5168 }, { "epoch": 0.009166837325872201, "grad_norm": 2.15625, "learning_rate": 0.0019914621295943893, "loss": 0.373, "step": 5170 }, { "epoch": 0.009170383491182016, "grad_norm": 0.4296875, "learning_rate": 0.001991453948853367, "loss": 0.2354, "step": 5172 }, { "epoch": 0.009173929656491832, "grad_norm": 0.70703125, "learning_rate": 0.001991445764213643, "loss": 0.4654, "step": 5174 }, { "epoch": 0.009177475821801647, "grad_norm": 0.2734375, "learning_rate": 0.001991437575675253, "loss": 0.2566, "step": 5176 }, { "epoch": 0.009181021987111463, "grad_norm": 0.359375, "learning_rate": 0.0019914293832382327, "loss": 0.2169, "step": 5178 }, { "epoch": 0.009184568152421278, "grad_norm": 0.98046875, "learning_rate": 0.001991421186902618, "loss": 0.2468, "step": 5180 }, { "epoch": 0.009188114317731092, "grad_norm": 0.2734375, "learning_rate": 0.001991412986668445, "loss": 0.3031, "step": 5182 }, { "epoch": 0.009191660483040908, "grad_norm": 0.58203125, "learning_rate": 0.0019914047825357493, "loss": 0.3574, "step": 5184 }, { "epoch": 0.009195206648350723, "grad_norm": 0.69140625, "learning_rate": 0.001991396574504567, "loss": 0.2387, "step": 5186 }, { "epoch": 0.009198752813660538, "grad_norm": 0.484375, "learning_rate": 0.0019913883625749342, "loss": 0.2928, "step": 5188 }, { "epoch": 0.009202298978970354, "grad_norm": 1.578125, "learning_rate": 0.0019913801467468855, "loss": 0.4186, "step": 5190 }, { "epoch": 0.009205845144280168, "grad_norm": 0.244140625, "learning_rate": 0.0019913719270204587, "loss": 0.2622, "step": 5192 }, { "epoch": 0.009209391309589983, "grad_norm": 0.328125, "learning_rate": 0.001991363703395689, "loss": 0.2574, "step": 5194 }, { "epoch": 0.0092129374748998, "grad_norm": 0.5078125, "learning_rate": 0.0019913554758726115, "loss": 0.289, "step": 5196 }, { "epoch": 0.009216483640209614, "grad_norm": 1.203125, "learning_rate": 0.001991347244451263, "loss": 0.2643, "step": 5198 }, { "epoch": 0.009220029805519428, "grad_norm": 0.28125, "learning_rate": 0.0019913390091316797, "loss": 0.2406, "step": 5200 }, { "epoch": 0.009223575970829245, "grad_norm": 0.8984375, "learning_rate": 0.0019913307699138973, "loss": 0.4134, "step": 5202 }, { "epoch": 0.00922712213613906, "grad_norm": 0.34375, "learning_rate": 0.001991322526797952, "loss": 0.2647, "step": 5204 }, { "epoch": 0.009230668301448874, "grad_norm": 0.41796875, "learning_rate": 0.0019913142797838793, "loss": 0.2725, "step": 5206 }, { "epoch": 0.00923421446675869, "grad_norm": 0.6953125, "learning_rate": 0.0019913060288717158, "loss": 0.2381, "step": 5208 }, { "epoch": 0.009237760632068505, "grad_norm": 0.7265625, "learning_rate": 0.0019912977740614976, "loss": 0.2676, "step": 5210 }, { "epoch": 0.00924130679737832, "grad_norm": 0.5078125, "learning_rate": 0.0019912895153532608, "loss": 0.3104, "step": 5212 }, { "epoch": 0.009244852962688135, "grad_norm": 0.37109375, "learning_rate": 0.001991281252747041, "loss": 0.3894, "step": 5214 }, { "epoch": 0.00924839912799795, "grad_norm": 1.0546875, "learning_rate": 0.001991272986242875, "loss": 0.297, "step": 5216 }, { "epoch": 0.009251945293307766, "grad_norm": 1.6484375, "learning_rate": 0.0019912647158407985, "loss": 0.3694, "step": 5218 }, { "epoch": 0.00925549145861758, "grad_norm": 0.318359375, "learning_rate": 0.001991256441540848, "loss": 0.3923, "step": 5220 }, { "epoch": 0.009259037623927395, "grad_norm": 0.2099609375, "learning_rate": 0.0019912481633430593, "loss": 0.2637, "step": 5222 }, { "epoch": 0.009262583789237212, "grad_norm": 2.359375, "learning_rate": 0.001991239881247469, "loss": 0.3471, "step": 5224 }, { "epoch": 0.009266129954547026, "grad_norm": 0.91015625, "learning_rate": 0.0019912315952541134, "loss": 0.3024, "step": 5226 }, { "epoch": 0.00926967611985684, "grad_norm": 0.373046875, "learning_rate": 0.001991223305363028, "loss": 0.2688, "step": 5228 }, { "epoch": 0.009273222285166657, "grad_norm": 0.2265625, "learning_rate": 0.00199121501157425, "loss": 0.298, "step": 5230 }, { "epoch": 0.009276768450476472, "grad_norm": 0.53125, "learning_rate": 0.001991206713887815, "loss": 0.3344, "step": 5232 }, { "epoch": 0.009280314615786286, "grad_norm": 1.1640625, "learning_rate": 0.0019911984123037593, "loss": 0.3017, "step": 5234 }, { "epoch": 0.009283860781096102, "grad_norm": 0.376953125, "learning_rate": 0.00199119010682212, "loss": 0.3407, "step": 5236 }, { "epoch": 0.009287406946405917, "grad_norm": 0.2890625, "learning_rate": 0.0019911817974429323, "loss": 0.2423, "step": 5238 }, { "epoch": 0.009290953111715732, "grad_norm": 0.33984375, "learning_rate": 0.001991173484166233, "loss": 0.2364, "step": 5240 }, { "epoch": 0.009294499277025548, "grad_norm": 1.390625, "learning_rate": 0.001991165166992059, "loss": 0.3402, "step": 5242 }, { "epoch": 0.009298045442335362, "grad_norm": 0.267578125, "learning_rate": 0.0019911568459204457, "loss": 0.2299, "step": 5244 }, { "epoch": 0.009301591607645179, "grad_norm": 0.421875, "learning_rate": 0.0019911485209514303, "loss": 0.3193, "step": 5246 }, { "epoch": 0.009305137772954993, "grad_norm": 0.376953125, "learning_rate": 0.001991140192085049, "loss": 0.2265, "step": 5248 }, { "epoch": 0.009308683938264808, "grad_norm": 0.37109375, "learning_rate": 0.0019911318593213378, "loss": 0.2498, "step": 5250 }, { "epoch": 0.009312230103574624, "grad_norm": 0.6171875, "learning_rate": 0.0019911235226603343, "loss": 0.2617, "step": 5252 }, { "epoch": 0.009315776268884439, "grad_norm": 0.58984375, "learning_rate": 0.0019911151821020733, "loss": 0.3511, "step": 5254 }, { "epoch": 0.009319322434194253, "grad_norm": 0.55859375, "learning_rate": 0.001991106837646592, "loss": 0.3287, "step": 5256 }, { "epoch": 0.00932286859950407, "grad_norm": 0.279296875, "learning_rate": 0.0019910984892939276, "loss": 0.3323, "step": 5258 }, { "epoch": 0.009326414764813884, "grad_norm": 0.2294921875, "learning_rate": 0.0019910901370441157, "loss": 0.2309, "step": 5260 }, { "epoch": 0.009329960930123699, "grad_norm": 0.283203125, "learning_rate": 0.0019910817808971933, "loss": 0.3175, "step": 5262 }, { "epoch": 0.009333507095433515, "grad_norm": 0.8203125, "learning_rate": 0.001991073420853197, "loss": 0.2664, "step": 5264 }, { "epoch": 0.00933705326074333, "grad_norm": 0.3515625, "learning_rate": 0.0019910650569121627, "loss": 0.2869, "step": 5266 }, { "epoch": 0.009340599426053144, "grad_norm": 0.220703125, "learning_rate": 0.0019910566890741273, "loss": 0.2393, "step": 5268 }, { "epoch": 0.00934414559136296, "grad_norm": 0.267578125, "learning_rate": 0.001991048317339128, "loss": 0.2711, "step": 5270 }, { "epoch": 0.009347691756672775, "grad_norm": 0.671875, "learning_rate": 0.001991039941707201, "loss": 0.3396, "step": 5272 }, { "epoch": 0.00935123792198259, "grad_norm": 0.55859375, "learning_rate": 0.0019910315621783827, "loss": 0.2058, "step": 5274 }, { "epoch": 0.009354784087292406, "grad_norm": 1.0703125, "learning_rate": 0.00199102317875271, "loss": 0.396, "step": 5276 }, { "epoch": 0.00935833025260222, "grad_norm": 1.046875, "learning_rate": 0.001991014791430219, "loss": 0.2666, "step": 5278 }, { "epoch": 0.009361876417912036, "grad_norm": 0.57421875, "learning_rate": 0.0019910064002109473, "loss": 0.2908, "step": 5280 }, { "epoch": 0.009365422583221851, "grad_norm": 1.5234375, "learning_rate": 0.001990998005094931, "loss": 0.3964, "step": 5282 }, { "epoch": 0.009368968748531666, "grad_norm": 2.296875, "learning_rate": 0.0019909896060822073, "loss": 0.3696, "step": 5284 }, { "epoch": 0.009372514913841482, "grad_norm": 0.3671875, "learning_rate": 0.001990981203172812, "loss": 0.264, "step": 5286 }, { "epoch": 0.009376061079151296, "grad_norm": 0.3515625, "learning_rate": 0.001990972796366783, "loss": 0.2277, "step": 5288 }, { "epoch": 0.009379607244461111, "grad_norm": 0.59765625, "learning_rate": 0.0019909643856641564, "loss": 0.2843, "step": 5290 }, { "epoch": 0.009383153409770927, "grad_norm": 0.314453125, "learning_rate": 0.0019909559710649693, "loss": 0.2127, "step": 5292 }, { "epoch": 0.009386699575080742, "grad_norm": 0.4140625, "learning_rate": 0.0019909475525692576, "loss": 0.2601, "step": 5294 }, { "epoch": 0.009390245740390556, "grad_norm": 0.298828125, "learning_rate": 0.00199093913017706, "loss": 0.2017, "step": 5296 }, { "epoch": 0.009393791905700373, "grad_norm": 0.34375, "learning_rate": 0.0019909307038884112, "loss": 0.2771, "step": 5298 }, { "epoch": 0.009397338071010187, "grad_norm": 0.3671875, "learning_rate": 0.001990922273703349, "loss": 0.1868, "step": 5300 }, { "epoch": 0.009400884236320002, "grad_norm": 0.341796875, "learning_rate": 0.001990913839621911, "loss": 0.3177, "step": 5302 }, { "epoch": 0.009404430401629818, "grad_norm": 0.28125, "learning_rate": 0.0019909054016441327, "loss": 0.2739, "step": 5304 }, { "epoch": 0.009407976566939633, "grad_norm": 0.921875, "learning_rate": 0.001990896959770052, "loss": 0.2774, "step": 5306 }, { "epoch": 0.009411522732249447, "grad_norm": 2.9375, "learning_rate": 0.0019908885139997053, "loss": 0.3488, "step": 5308 }, { "epoch": 0.009415068897559263, "grad_norm": 6.625, "learning_rate": 0.00199088006433313, "loss": 0.5172, "step": 5310 }, { "epoch": 0.009418615062869078, "grad_norm": 0.419921875, "learning_rate": 0.0019908716107703626, "loss": 0.2794, "step": 5312 }, { "epoch": 0.009422161228178894, "grad_norm": 1.3203125, "learning_rate": 0.00199086315331144, "loss": 0.3641, "step": 5314 }, { "epoch": 0.009425707393488709, "grad_norm": 0.54296875, "learning_rate": 0.0019908546919564, "loss": 0.2582, "step": 5316 }, { "epoch": 0.009429253558798523, "grad_norm": 0.380859375, "learning_rate": 0.001990846226705279, "loss": 0.3349, "step": 5318 }, { "epoch": 0.00943279972410834, "grad_norm": 1.1640625, "learning_rate": 0.001990837757558114, "loss": 0.2865, "step": 5320 }, { "epoch": 0.009436345889418154, "grad_norm": 0.486328125, "learning_rate": 0.0019908292845149415, "loss": 0.2807, "step": 5322 }, { "epoch": 0.009439892054727969, "grad_norm": 0.7421875, "learning_rate": 0.0019908208075757996, "loss": 0.2805, "step": 5324 }, { "epoch": 0.009443438220037785, "grad_norm": 0.60546875, "learning_rate": 0.001990812326740725, "loss": 0.28, "step": 5326 }, { "epoch": 0.0094469843853476, "grad_norm": 4.875, "learning_rate": 0.001990803842009755, "loss": 0.3472, "step": 5328 }, { "epoch": 0.009450530550657414, "grad_norm": 0.31640625, "learning_rate": 0.001990795353382926, "loss": 0.2013, "step": 5330 }, { "epoch": 0.00945407671596723, "grad_norm": 0.37890625, "learning_rate": 0.0019907868608602755, "loss": 0.2834, "step": 5332 }, { "epoch": 0.009457622881277045, "grad_norm": 0.416015625, "learning_rate": 0.001990778364441841, "loss": 0.2338, "step": 5334 }, { "epoch": 0.00946116904658686, "grad_norm": 0.56640625, "learning_rate": 0.001990769864127659, "loss": 0.2797, "step": 5336 }, { "epoch": 0.009464715211896676, "grad_norm": 0.61328125, "learning_rate": 0.001990761359917767, "loss": 0.2832, "step": 5338 }, { "epoch": 0.00946826137720649, "grad_norm": 1.453125, "learning_rate": 0.001990752851812203, "loss": 0.3602, "step": 5340 }, { "epoch": 0.009471807542516305, "grad_norm": 0.59765625, "learning_rate": 0.0019907443398110027, "loss": 0.3086, "step": 5342 }, { "epoch": 0.009475353707826121, "grad_norm": 0.6953125, "learning_rate": 0.0019907358239142046, "loss": 0.2671, "step": 5344 }, { "epoch": 0.009478899873135936, "grad_norm": 0.4140625, "learning_rate": 0.001990727304121845, "loss": 0.2537, "step": 5346 }, { "epoch": 0.009482446038445752, "grad_norm": 1.84375, "learning_rate": 0.001990718780433962, "loss": 0.3107, "step": 5348 }, { "epoch": 0.009485992203755567, "grad_norm": 0.28515625, "learning_rate": 0.0019907102528505917, "loss": 0.2356, "step": 5350 }, { "epoch": 0.009489538369065381, "grad_norm": 0.71484375, "learning_rate": 0.0019907017213717727, "loss": 0.241, "step": 5352 }, { "epoch": 0.009493084534375197, "grad_norm": 0.9609375, "learning_rate": 0.0019906931859975416, "loss": 0.2512, "step": 5354 }, { "epoch": 0.009496630699685012, "grad_norm": 0.337890625, "learning_rate": 0.001990684646727936, "loss": 0.3035, "step": 5356 }, { "epoch": 0.009500176864994827, "grad_norm": 0.2421875, "learning_rate": 0.0019906761035629926, "loss": 0.2676, "step": 5358 }, { "epoch": 0.009503723030304643, "grad_norm": 0.28515625, "learning_rate": 0.00199066755650275, "loss": 0.2732, "step": 5360 }, { "epoch": 0.009507269195614457, "grad_norm": 0.890625, "learning_rate": 0.0019906590055472446, "loss": 0.3169, "step": 5362 }, { "epoch": 0.009510815360924272, "grad_norm": 0.259765625, "learning_rate": 0.001990650450696514, "loss": 0.2364, "step": 5364 }, { "epoch": 0.009514361526234088, "grad_norm": 1.6640625, "learning_rate": 0.001990641891950596, "loss": 0.2666, "step": 5366 }, { "epoch": 0.009517907691543903, "grad_norm": 2.109375, "learning_rate": 0.001990633329309527, "loss": 0.4231, "step": 5368 }, { "epoch": 0.009521453856853717, "grad_norm": 0.314453125, "learning_rate": 0.0019906247627733457, "loss": 0.2215, "step": 5370 }, { "epoch": 0.009525000022163534, "grad_norm": 6.75, "learning_rate": 0.001990616192342089, "loss": 0.4135, "step": 5372 }, { "epoch": 0.009528546187473348, "grad_norm": 0.55078125, "learning_rate": 0.0019906076180157945, "loss": 0.3483, "step": 5374 }, { "epoch": 0.009532092352783163, "grad_norm": 0.51171875, "learning_rate": 0.0019905990397944993, "loss": 0.3112, "step": 5376 }, { "epoch": 0.009535638518092979, "grad_norm": 0.67578125, "learning_rate": 0.001990590457678241, "loss": 0.3207, "step": 5378 }, { "epoch": 0.009539184683402794, "grad_norm": 0.80078125, "learning_rate": 0.001990581871667058, "loss": 0.2671, "step": 5380 }, { "epoch": 0.00954273084871261, "grad_norm": 0.45703125, "learning_rate": 0.0019905732817609868, "loss": 0.338, "step": 5382 }, { "epoch": 0.009546277014022424, "grad_norm": 0.5078125, "learning_rate": 0.0019905646879600654, "loss": 0.316, "step": 5384 }, { "epoch": 0.009549823179332239, "grad_norm": 0.7578125, "learning_rate": 0.0019905560902643317, "loss": 0.2966, "step": 5386 }, { "epoch": 0.009553369344642055, "grad_norm": 0.88671875, "learning_rate": 0.001990547488673823, "loss": 0.2656, "step": 5388 }, { "epoch": 0.00955691550995187, "grad_norm": 0.546875, "learning_rate": 0.001990538883188576, "loss": 0.2888, "step": 5390 }, { "epoch": 0.009560461675261684, "grad_norm": 0.498046875, "learning_rate": 0.00199053027380863, "loss": 0.3572, "step": 5392 }, { "epoch": 0.0095640078405715, "grad_norm": 1.515625, "learning_rate": 0.001990521660534022, "loss": 0.5049, "step": 5394 }, { "epoch": 0.009567554005881315, "grad_norm": 0.4140625, "learning_rate": 0.001990513043364789, "loss": 0.3315, "step": 5396 }, { "epoch": 0.00957110017119113, "grad_norm": 0.392578125, "learning_rate": 0.001990504422300969, "loss": 0.2469, "step": 5398 }, { "epoch": 0.009574646336500946, "grad_norm": 1.015625, "learning_rate": 0.0019904957973426005, "loss": 0.3216, "step": 5400 }, { "epoch": 0.00957819250181076, "grad_norm": 0.4921875, "learning_rate": 0.0019904871684897204, "loss": 0.3441, "step": 5402 }, { "epoch": 0.009581738667120575, "grad_norm": 0.373046875, "learning_rate": 0.001990478535742367, "loss": 0.3262, "step": 5404 }, { "epoch": 0.009585284832430391, "grad_norm": 0.55859375, "learning_rate": 0.0019904698991005778, "loss": 0.4011, "step": 5406 }, { "epoch": 0.009588830997740206, "grad_norm": 0.33984375, "learning_rate": 0.0019904612585643897, "loss": 0.289, "step": 5408 }, { "epoch": 0.00959237716305002, "grad_norm": 0.76171875, "learning_rate": 0.001990452614133842, "loss": 0.3158, "step": 5410 }, { "epoch": 0.009595923328359837, "grad_norm": 0.330078125, "learning_rate": 0.0019904439658089716, "loss": 0.5208, "step": 5412 }, { "epoch": 0.009599469493669651, "grad_norm": 0.50390625, "learning_rate": 0.0019904353135898165, "loss": 0.313, "step": 5414 }, { "epoch": 0.009603015658979468, "grad_norm": 0.515625, "learning_rate": 0.0019904266574764145, "loss": 0.2822, "step": 5416 }, { "epoch": 0.009606561824289282, "grad_norm": 2.859375, "learning_rate": 0.0019904179974688033, "loss": 0.4619, "step": 5418 }, { "epoch": 0.009610107989599097, "grad_norm": 0.48828125, "learning_rate": 0.0019904093335670215, "loss": 0.2557, "step": 5420 }, { "epoch": 0.009613654154908913, "grad_norm": 0.76953125, "learning_rate": 0.0019904006657711065, "loss": 0.3193, "step": 5422 }, { "epoch": 0.009617200320218728, "grad_norm": 0.53125, "learning_rate": 0.001990391994081096, "loss": 0.2368, "step": 5424 }, { "epoch": 0.009620746485528542, "grad_norm": 0.26953125, "learning_rate": 0.0019903833184970283, "loss": 0.2528, "step": 5426 }, { "epoch": 0.009624292650838358, "grad_norm": 0.431640625, "learning_rate": 0.0019903746390189407, "loss": 0.2841, "step": 5428 }, { "epoch": 0.009627838816148173, "grad_norm": 0.28125, "learning_rate": 0.0019903659556468715, "loss": 0.27, "step": 5430 }, { "epoch": 0.009631384981457988, "grad_norm": 1.7109375, "learning_rate": 0.0019903572683808595, "loss": 0.2637, "step": 5432 }, { "epoch": 0.009634931146767804, "grad_norm": 0.578125, "learning_rate": 0.001990348577220942, "loss": 0.2954, "step": 5434 }, { "epoch": 0.009638477312077618, "grad_norm": 0.302734375, "learning_rate": 0.0019903398821671564, "loss": 0.2343, "step": 5436 }, { "epoch": 0.009642023477387433, "grad_norm": 0.6328125, "learning_rate": 0.0019903311832195417, "loss": 0.27, "step": 5438 }, { "epoch": 0.00964556964269725, "grad_norm": 0.578125, "learning_rate": 0.0019903224803781354, "loss": 0.2144, "step": 5440 }, { "epoch": 0.009649115808007064, "grad_norm": 0.51171875, "learning_rate": 0.0019903137736429757, "loss": 0.2683, "step": 5442 }, { "epoch": 0.009652661973316878, "grad_norm": 0.75, "learning_rate": 0.001990305063014101, "loss": 0.2296, "step": 5444 }, { "epoch": 0.009656208138626695, "grad_norm": 0.46875, "learning_rate": 0.001990296348491549, "loss": 0.2922, "step": 5446 }, { "epoch": 0.00965975430393651, "grad_norm": 0.486328125, "learning_rate": 0.001990287630075357, "loss": 0.2725, "step": 5448 }, { "epoch": 0.009663300469246324, "grad_norm": 0.431640625, "learning_rate": 0.0019902789077655652, "loss": 0.2249, "step": 5450 }, { "epoch": 0.00966684663455614, "grad_norm": 0.546875, "learning_rate": 0.0019902701815622103, "loss": 0.3001, "step": 5452 }, { "epoch": 0.009670392799865955, "grad_norm": 0.51953125, "learning_rate": 0.001990261451465331, "loss": 0.2404, "step": 5454 }, { "epoch": 0.009673938965175771, "grad_norm": 1.71875, "learning_rate": 0.001990252717474965, "loss": 0.2583, "step": 5456 }, { "epoch": 0.009677485130485585, "grad_norm": 0.408203125, "learning_rate": 0.001990243979591151, "loss": 0.2287, "step": 5458 }, { "epoch": 0.0096810312957954, "grad_norm": 0.296875, "learning_rate": 0.001990235237813926, "loss": 0.2383, "step": 5460 }, { "epoch": 0.009684577461105216, "grad_norm": 1.71875, "learning_rate": 0.00199022649214333, "loss": 0.4918, "step": 5462 }, { "epoch": 0.00968812362641503, "grad_norm": 0.71875, "learning_rate": 0.0019902177425794, "loss": 0.2526, "step": 5464 }, { "epoch": 0.009691669791724845, "grad_norm": 0.2333984375, "learning_rate": 0.0019902089891221755, "loss": 0.2841, "step": 5466 }, { "epoch": 0.009695215957034662, "grad_norm": 0.36328125, "learning_rate": 0.001990200231771693, "loss": 0.3146, "step": 5468 }, { "epoch": 0.009698762122344476, "grad_norm": 0.69140625, "learning_rate": 0.001990191470527992, "loss": 0.2704, "step": 5470 }, { "epoch": 0.00970230828765429, "grad_norm": 0.9921875, "learning_rate": 0.001990182705391111, "loss": 0.345, "step": 5472 }, { "epoch": 0.009705854452964107, "grad_norm": 1.265625, "learning_rate": 0.0019901739363610877, "loss": 0.6601, "step": 5474 }, { "epoch": 0.009709400618273922, "grad_norm": 1.0390625, "learning_rate": 0.0019901651634379606, "loss": 0.2783, "step": 5476 }, { "epoch": 0.009712946783583736, "grad_norm": 1.0859375, "learning_rate": 0.0019901563866217683, "loss": 0.2675, "step": 5478 }, { "epoch": 0.009716492948893552, "grad_norm": 0.30078125, "learning_rate": 0.001990147605912549, "loss": 0.251, "step": 5480 }, { "epoch": 0.009720039114203367, "grad_norm": 0.58984375, "learning_rate": 0.001990138821310341, "loss": 0.2504, "step": 5482 }, { "epoch": 0.009723585279513182, "grad_norm": 2.15625, "learning_rate": 0.001990130032815183, "loss": 0.4118, "step": 5484 }, { "epoch": 0.009727131444822998, "grad_norm": 0.3984375, "learning_rate": 0.001990121240427113, "loss": 0.2073, "step": 5486 }, { "epoch": 0.009730677610132812, "grad_norm": 0.96484375, "learning_rate": 0.0019901124441461704, "loss": 0.6021, "step": 5488 }, { "epoch": 0.009734223775442629, "grad_norm": 0.7578125, "learning_rate": 0.0019901036439723923, "loss": 0.2819, "step": 5490 }, { "epoch": 0.009737769940752443, "grad_norm": 0.28125, "learning_rate": 0.0019900948399058185, "loss": 0.2678, "step": 5492 }, { "epoch": 0.009741316106062258, "grad_norm": 0.408203125, "learning_rate": 0.0019900860319464865, "loss": 0.2186, "step": 5494 }, { "epoch": 0.009744862271372074, "grad_norm": 0.359375, "learning_rate": 0.001990077220094435, "loss": 0.3387, "step": 5496 }, { "epoch": 0.009748408436681889, "grad_norm": 0.76171875, "learning_rate": 0.0019900684043497037, "loss": 0.2981, "step": 5498 }, { "epoch": 0.009751954601991703, "grad_norm": 0.2734375, "learning_rate": 0.001990059584712329, "loss": 0.2693, "step": 5500 }, { "epoch": 0.00975550076730152, "grad_norm": 0.283203125, "learning_rate": 0.0019900507611823517, "loss": 0.2463, "step": 5502 }, { "epoch": 0.009759046932611334, "grad_norm": 0.416015625, "learning_rate": 0.0019900419337598087, "loss": 0.2784, "step": 5504 }, { "epoch": 0.009762593097921149, "grad_norm": 0.283203125, "learning_rate": 0.0019900331024447393, "loss": 0.2042, "step": 5506 }, { "epoch": 0.009766139263230965, "grad_norm": 0.52734375, "learning_rate": 0.001990024267237183, "loss": 0.2527, "step": 5508 }, { "epoch": 0.00976968542854078, "grad_norm": 0.67578125, "learning_rate": 0.0019900154281371762, "loss": 0.2652, "step": 5510 }, { "epoch": 0.009773231593850594, "grad_norm": 0.29296875, "learning_rate": 0.00199000658514476, "loss": 0.3096, "step": 5512 }, { "epoch": 0.00977677775916041, "grad_norm": 0.859375, "learning_rate": 0.0019899977382599712, "loss": 0.2882, "step": 5514 }, { "epoch": 0.009780323924470225, "grad_norm": 0.248046875, "learning_rate": 0.0019899888874828496, "loss": 0.3353, "step": 5516 }, { "epoch": 0.00978387008978004, "grad_norm": 0.2578125, "learning_rate": 0.0019899800328134335, "loss": 0.2215, "step": 5518 }, { "epoch": 0.009787416255089856, "grad_norm": 0.314453125, "learning_rate": 0.0019899711742517616, "loss": 0.241, "step": 5520 }, { "epoch": 0.00979096242039967, "grad_norm": 0.890625, "learning_rate": 0.001989962311797873, "loss": 0.3184, "step": 5522 }, { "epoch": 0.009794508585709487, "grad_norm": 0.59765625, "learning_rate": 0.001989953445451806, "loss": 0.272, "step": 5524 }, { "epoch": 0.009798054751019301, "grad_norm": 0.69921875, "learning_rate": 0.0019899445752136, "loss": 0.1761, "step": 5526 }, { "epoch": 0.009801600916329116, "grad_norm": 1.71875, "learning_rate": 0.001989935701083293, "loss": 0.4969, "step": 5528 }, { "epoch": 0.009805147081638932, "grad_norm": 1.2890625, "learning_rate": 0.001989926823060924, "loss": 0.434, "step": 5530 }, { "epoch": 0.009808693246948746, "grad_norm": 0.306640625, "learning_rate": 0.0019899179411465326, "loss": 0.3315, "step": 5532 }, { "epoch": 0.009812239412258561, "grad_norm": 0.39453125, "learning_rate": 0.0019899090553401563, "loss": 0.451, "step": 5534 }, { "epoch": 0.009815785577568377, "grad_norm": 0.58984375, "learning_rate": 0.001989900165641835, "loss": 0.4802, "step": 5536 }, { "epoch": 0.009819331742878192, "grad_norm": 0.2578125, "learning_rate": 0.001989891272051608, "loss": 0.2513, "step": 5538 }, { "epoch": 0.009822877908188006, "grad_norm": 0.224609375, "learning_rate": 0.0019898823745695127, "loss": 0.2198, "step": 5540 }, { "epoch": 0.009826424073497823, "grad_norm": 5.65625, "learning_rate": 0.0019898734731955887, "loss": 0.3903, "step": 5542 }, { "epoch": 0.009829970238807637, "grad_norm": 0.435546875, "learning_rate": 0.0019898645679298755, "loss": 0.2811, "step": 5544 }, { "epoch": 0.009833516404117452, "grad_norm": 0.515625, "learning_rate": 0.001989855658772411, "loss": 0.258, "step": 5546 }, { "epoch": 0.009837062569427268, "grad_norm": 0.50390625, "learning_rate": 0.0019898467457232353, "loss": 0.2887, "step": 5548 }, { "epoch": 0.009840608734737083, "grad_norm": 1.1015625, "learning_rate": 0.0019898378287823864, "loss": 0.3219, "step": 5550 }, { "epoch": 0.009844154900046897, "grad_norm": 1.1015625, "learning_rate": 0.001989828907949904, "loss": 0.6607, "step": 5552 }, { "epoch": 0.009847701065356713, "grad_norm": 0.322265625, "learning_rate": 0.001989819983225827, "loss": 0.27, "step": 5554 }, { "epoch": 0.009851247230666528, "grad_norm": 0.34765625, "learning_rate": 0.001989811054610194, "loss": 0.2863, "step": 5556 }, { "epoch": 0.009854793395976344, "grad_norm": 0.82421875, "learning_rate": 0.0019898021221030444, "loss": 0.3375, "step": 5558 }, { "epoch": 0.009858339561286159, "grad_norm": 1.046875, "learning_rate": 0.0019897931857044172, "loss": 0.2429, "step": 5560 }, { "epoch": 0.009861885726595973, "grad_norm": 1.0703125, "learning_rate": 0.0019897842454143513, "loss": 0.3023, "step": 5562 }, { "epoch": 0.00986543189190579, "grad_norm": 0.6953125, "learning_rate": 0.001989775301232886, "loss": 0.284, "step": 5564 }, { "epoch": 0.009868978057215604, "grad_norm": 0.51171875, "learning_rate": 0.0019897663531600607, "loss": 0.3629, "step": 5566 }, { "epoch": 0.009872524222525419, "grad_norm": 0.703125, "learning_rate": 0.0019897574011959137, "loss": 0.2543, "step": 5568 }, { "epoch": 0.009876070387835235, "grad_norm": 0.33984375, "learning_rate": 0.001989748445340485, "loss": 0.2965, "step": 5570 }, { "epoch": 0.00987961655314505, "grad_norm": 0.49609375, "learning_rate": 0.0019897394855938133, "loss": 0.2777, "step": 5572 }, { "epoch": 0.009883162718454864, "grad_norm": 0.5546875, "learning_rate": 0.001989730521955938, "loss": 0.2765, "step": 5574 }, { "epoch": 0.00988670888376468, "grad_norm": 0.462890625, "learning_rate": 0.001989721554426898, "loss": 0.2722, "step": 5576 }, { "epoch": 0.009890255049074495, "grad_norm": 0.365234375, "learning_rate": 0.001989712583006733, "loss": 0.2657, "step": 5578 }, { "epoch": 0.00989380121438431, "grad_norm": 0.6328125, "learning_rate": 0.001989703607695482, "loss": 0.2019, "step": 5580 }, { "epoch": 0.009897347379694126, "grad_norm": 0.43359375, "learning_rate": 0.001989694628493184, "loss": 0.3284, "step": 5582 }, { "epoch": 0.00990089354500394, "grad_norm": 0.419921875, "learning_rate": 0.0019896856453998782, "loss": 0.2827, "step": 5584 }, { "epoch": 0.009904439710313755, "grad_norm": 0.73046875, "learning_rate": 0.0019896766584156047, "loss": 0.3349, "step": 5586 }, { "epoch": 0.009907985875623571, "grad_norm": 0.453125, "learning_rate": 0.001989667667540402, "loss": 0.2096, "step": 5588 }, { "epoch": 0.009911532040933386, "grad_norm": 0.99609375, "learning_rate": 0.00198965867277431, "loss": 0.4079, "step": 5590 }, { "epoch": 0.009915078206243202, "grad_norm": 2.984375, "learning_rate": 0.0019896496741173674, "loss": 0.2277, "step": 5592 }, { "epoch": 0.009918624371553017, "grad_norm": 0.70703125, "learning_rate": 0.001989640671569614, "loss": 0.3198, "step": 5594 }, { "epoch": 0.009922170536862831, "grad_norm": 1.5625, "learning_rate": 0.0019896316651310895, "loss": 0.3016, "step": 5596 }, { "epoch": 0.009925716702172648, "grad_norm": 0.294921875, "learning_rate": 0.0019896226548018325, "loss": 0.2796, "step": 5598 }, { "epoch": 0.009929262867482462, "grad_norm": 0.58203125, "learning_rate": 0.0019896136405818826, "loss": 0.2614, "step": 5600 }, { "epoch": 0.009932809032792277, "grad_norm": 0.248046875, "learning_rate": 0.0019896046224712792, "loss": 0.2003, "step": 5602 }, { "epoch": 0.009936355198102093, "grad_norm": 3.09375, "learning_rate": 0.0019895956004700624, "loss": 0.3767, "step": 5604 }, { "epoch": 0.009939901363411907, "grad_norm": 0.416015625, "learning_rate": 0.001989586574578271, "loss": 0.2424, "step": 5606 }, { "epoch": 0.009943447528721722, "grad_norm": 3.9375, "learning_rate": 0.0019895775447959447, "loss": 0.2097, "step": 5608 }, { "epoch": 0.009946993694031538, "grad_norm": 0.546875, "learning_rate": 0.001989568511123123, "loss": 0.3036, "step": 5610 }, { "epoch": 0.009950539859341353, "grad_norm": 1.8125, "learning_rate": 0.001989559473559845, "loss": 0.2509, "step": 5612 }, { "epoch": 0.009954086024651167, "grad_norm": 0.64453125, "learning_rate": 0.0019895504321061513, "loss": 0.224, "step": 5614 }, { "epoch": 0.009957632189960984, "grad_norm": 2.53125, "learning_rate": 0.0019895413867620803, "loss": 0.371, "step": 5616 }, { "epoch": 0.009961178355270798, "grad_norm": 0.5, "learning_rate": 0.0019895323375276716, "loss": 0.2906, "step": 5618 }, { "epoch": 0.009964724520580613, "grad_norm": 0.30078125, "learning_rate": 0.001989523284402966, "loss": 0.2248, "step": 5620 }, { "epoch": 0.009968270685890429, "grad_norm": 0.40625, "learning_rate": 0.0019895142273880016, "loss": 0.2878, "step": 5622 }, { "epoch": 0.009971816851200244, "grad_norm": 0.482421875, "learning_rate": 0.0019895051664828188, "loss": 0.2276, "step": 5624 }, { "epoch": 0.00997536301651006, "grad_norm": 0.8125, "learning_rate": 0.0019894961016874574, "loss": 0.3383, "step": 5626 }, { "epoch": 0.009978909181819874, "grad_norm": 0.90234375, "learning_rate": 0.0019894870330019565, "loss": 0.2168, "step": 5628 }, { "epoch": 0.009982455347129689, "grad_norm": 0.6484375, "learning_rate": 0.0019894779604263555, "loss": 0.23, "step": 5630 }, { "epoch": 0.009986001512439505, "grad_norm": 0.359375, "learning_rate": 0.0019894688839606953, "loss": 0.2882, "step": 5632 }, { "epoch": 0.00998954767774932, "grad_norm": 0.55078125, "learning_rate": 0.0019894598036050144, "loss": 0.3022, "step": 5634 }, { "epoch": 0.009993093843059134, "grad_norm": 0.388671875, "learning_rate": 0.0019894507193593536, "loss": 0.2219, "step": 5636 }, { "epoch": 0.00999664000836895, "grad_norm": 0.6875, "learning_rate": 0.0019894416312237514, "loss": 0.2551, "step": 5638 }, { "epoch": 0.010000186173678765, "grad_norm": 0.296875, "learning_rate": 0.001989432539198248, "loss": 0.5288, "step": 5640 }, { "epoch": 0.01000373233898858, "grad_norm": 1.6953125, "learning_rate": 0.001989423443282884, "loss": 0.4341, "step": 5642 }, { "epoch": 0.010007278504298396, "grad_norm": 0.875, "learning_rate": 0.001989414343477698, "loss": 0.275, "step": 5644 }, { "epoch": 0.01001082466960821, "grad_norm": 0.365234375, "learning_rate": 0.00198940523978273, "loss": 0.2078, "step": 5646 }, { "epoch": 0.010014370834918025, "grad_norm": 0.458984375, "learning_rate": 0.001989396132198021, "loss": 0.4167, "step": 5648 }, { "epoch": 0.010017917000227841, "grad_norm": 0.2265625, "learning_rate": 0.0019893870207236095, "loss": 0.2656, "step": 5650 }, { "epoch": 0.010021463165537656, "grad_norm": 2.546875, "learning_rate": 0.0019893779053595357, "loss": 0.3801, "step": 5652 }, { "epoch": 0.01002500933084747, "grad_norm": 0.408203125, "learning_rate": 0.0019893687861058398, "loss": 0.2986, "step": 5654 }, { "epoch": 0.010028555496157287, "grad_norm": 0.419921875, "learning_rate": 0.001989359662962561, "loss": 0.2402, "step": 5656 }, { "epoch": 0.010032101661467101, "grad_norm": 0.4140625, "learning_rate": 0.00198935053592974, "loss": 0.2696, "step": 5658 }, { "epoch": 0.010035647826776918, "grad_norm": 1.1484375, "learning_rate": 0.001989341405007416, "loss": 0.1741, "step": 5660 }, { "epoch": 0.010039193992086732, "grad_norm": 0.59765625, "learning_rate": 0.00198933227019563, "loss": 0.2502, "step": 5662 }, { "epoch": 0.010042740157396547, "grad_norm": 0.3671875, "learning_rate": 0.0019893231314944207, "loss": 0.3084, "step": 5664 }, { "epoch": 0.010046286322706363, "grad_norm": 0.78515625, "learning_rate": 0.0019893139889038285, "loss": 0.4311, "step": 5666 }, { "epoch": 0.010049832488016178, "grad_norm": 0.4609375, "learning_rate": 0.0019893048424238936, "loss": 0.2815, "step": 5668 }, { "epoch": 0.010053378653325992, "grad_norm": 1.609375, "learning_rate": 0.0019892956920546565, "loss": 0.3271, "step": 5670 }, { "epoch": 0.010056924818635809, "grad_norm": 1.2734375, "learning_rate": 0.001989286537796156, "loss": 0.2578, "step": 5672 }, { "epoch": 0.010060470983945623, "grad_norm": 0.52734375, "learning_rate": 0.0019892773796484323, "loss": 0.2424, "step": 5674 }, { "epoch": 0.010064017149255438, "grad_norm": 0.3828125, "learning_rate": 0.0019892682176115267, "loss": 0.2775, "step": 5676 }, { "epoch": 0.010067563314565254, "grad_norm": 0.3984375, "learning_rate": 0.001989259051685478, "loss": 0.3256, "step": 5678 }, { "epoch": 0.010071109479875068, "grad_norm": 0.578125, "learning_rate": 0.001989249881870327, "loss": 0.3471, "step": 5680 }, { "epoch": 0.010074655645184883, "grad_norm": 0.51171875, "learning_rate": 0.0019892407081661136, "loss": 0.2242, "step": 5682 }, { "epoch": 0.0100782018104947, "grad_norm": 0.42578125, "learning_rate": 0.0019892315305728775, "loss": 0.2751, "step": 5684 }, { "epoch": 0.010081747975804514, "grad_norm": 0.51171875, "learning_rate": 0.0019892223490906597, "loss": 0.3133, "step": 5686 }, { "epoch": 0.010085294141114328, "grad_norm": 1.7265625, "learning_rate": 0.0019892131637195, "loss": 0.2903, "step": 5688 }, { "epoch": 0.010088840306424145, "grad_norm": 0.2314453125, "learning_rate": 0.0019892039744594378, "loss": 0.2409, "step": 5690 }, { "epoch": 0.01009238647173396, "grad_norm": 0.435546875, "learning_rate": 0.001989194781310514, "loss": 0.2348, "step": 5692 }, { "epoch": 0.010095932637043776, "grad_norm": 0.490234375, "learning_rate": 0.0019891855842727687, "loss": 0.2984, "step": 5694 }, { "epoch": 0.01009947880235359, "grad_norm": 1.25, "learning_rate": 0.001989176383346243, "loss": 0.3989, "step": 5696 }, { "epoch": 0.010103024967663405, "grad_norm": 0.640625, "learning_rate": 0.0019891671785309756, "loss": 0.289, "step": 5698 }, { "epoch": 0.010106571132973221, "grad_norm": 1.203125, "learning_rate": 0.0019891579698270074, "loss": 0.4843, "step": 5700 }, { "epoch": 0.010110117298283035, "grad_norm": 1.0390625, "learning_rate": 0.0019891487572343785, "loss": 0.4377, "step": 5702 }, { "epoch": 0.01011366346359285, "grad_norm": 0.380859375, "learning_rate": 0.00198913954075313, "loss": 0.2682, "step": 5704 }, { "epoch": 0.010117209628902666, "grad_norm": 0.69921875, "learning_rate": 0.0019891303203833015, "loss": 0.2555, "step": 5706 }, { "epoch": 0.010120755794212481, "grad_norm": 0.494140625, "learning_rate": 0.001989121096124933, "loss": 0.2466, "step": 5708 }, { "epoch": 0.010124301959522295, "grad_norm": 1.1875, "learning_rate": 0.0019891118679780657, "loss": 0.4772, "step": 5710 }, { "epoch": 0.010127848124832112, "grad_norm": 0.2333984375, "learning_rate": 0.0019891026359427394, "loss": 0.2197, "step": 5712 }, { "epoch": 0.010131394290141926, "grad_norm": 2.859375, "learning_rate": 0.001989093400018995, "loss": 0.3882, "step": 5714 }, { "epoch": 0.01013494045545174, "grad_norm": 0.40234375, "learning_rate": 0.0019890841602068724, "loss": 0.2922, "step": 5716 }, { "epoch": 0.010138486620761557, "grad_norm": 0.66015625, "learning_rate": 0.0019890749165064115, "loss": 0.311, "step": 5718 }, { "epoch": 0.010142032786071372, "grad_norm": 0.2470703125, "learning_rate": 0.0019890656689176543, "loss": 0.2461, "step": 5720 }, { "epoch": 0.010145578951381186, "grad_norm": 0.953125, "learning_rate": 0.0019890564174406397, "loss": 0.2785, "step": 5722 }, { "epoch": 0.010149125116691003, "grad_norm": 0.7578125, "learning_rate": 0.0019890471620754085, "loss": 0.2713, "step": 5724 }, { "epoch": 0.010152671282000817, "grad_norm": 0.85546875, "learning_rate": 0.001989037902822002, "loss": 0.3803, "step": 5726 }, { "epoch": 0.010156217447310633, "grad_norm": 0.4375, "learning_rate": 0.00198902863968046, "loss": 0.2137, "step": 5728 }, { "epoch": 0.010159763612620448, "grad_norm": 0.91015625, "learning_rate": 0.0019890193726508224, "loss": 0.3113, "step": 5730 }, { "epoch": 0.010163309777930262, "grad_norm": 0.275390625, "learning_rate": 0.0019890101017331313, "loss": 0.2358, "step": 5732 }, { "epoch": 0.010166855943240079, "grad_norm": 0.298828125, "learning_rate": 0.0019890008269274266, "loss": 0.2295, "step": 5734 }, { "epoch": 0.010170402108549893, "grad_norm": 0.74609375, "learning_rate": 0.001988991548233748, "loss": 0.2922, "step": 5736 }, { "epoch": 0.010173948273859708, "grad_norm": 0.68359375, "learning_rate": 0.0019889822656521373, "loss": 0.2203, "step": 5738 }, { "epoch": 0.010177494439169524, "grad_norm": 0.57421875, "learning_rate": 0.001988972979182634, "loss": 0.2339, "step": 5740 }, { "epoch": 0.010181040604479339, "grad_norm": 1.453125, "learning_rate": 0.0019889636888252796, "loss": 0.2815, "step": 5742 }, { "epoch": 0.010184586769789153, "grad_norm": 0.33984375, "learning_rate": 0.0019889543945801145, "loss": 0.321, "step": 5744 }, { "epoch": 0.01018813293509897, "grad_norm": 3.9375, "learning_rate": 0.001988945096447179, "loss": 0.2809, "step": 5746 }, { "epoch": 0.010191679100408784, "grad_norm": 2.734375, "learning_rate": 0.0019889357944265144, "loss": 0.4626, "step": 5748 }, { "epoch": 0.010195225265718599, "grad_norm": 0.73828125, "learning_rate": 0.0019889264885181606, "loss": 0.2143, "step": 5750 }, { "epoch": 0.010198771431028415, "grad_norm": 0.44140625, "learning_rate": 0.001988917178722159, "loss": 0.1887, "step": 5752 }, { "epoch": 0.01020231759633823, "grad_norm": 0.34375, "learning_rate": 0.0019889078650385497, "loss": 0.3456, "step": 5754 }, { "epoch": 0.010205863761648044, "grad_norm": 0.408203125, "learning_rate": 0.0019888985474673737, "loss": 0.288, "step": 5756 }, { "epoch": 0.01020940992695786, "grad_norm": 0.640625, "learning_rate": 0.001988889226008672, "loss": 0.2324, "step": 5758 }, { "epoch": 0.010212956092267675, "grad_norm": 0.9453125, "learning_rate": 0.001988879900662485, "loss": 0.2174, "step": 5760 }, { "epoch": 0.010216502257577491, "grad_norm": 0.2490234375, "learning_rate": 0.0019888705714288537, "loss": 0.2459, "step": 5762 }, { "epoch": 0.010220048422887306, "grad_norm": 0.380859375, "learning_rate": 0.001988861238307819, "loss": 0.2782, "step": 5764 }, { "epoch": 0.01022359458819712, "grad_norm": 0.5, "learning_rate": 0.001988851901299421, "loss": 0.2213, "step": 5766 }, { "epoch": 0.010227140753506937, "grad_norm": 0.75, "learning_rate": 0.001988842560403702, "loss": 0.2552, "step": 5768 }, { "epoch": 0.010230686918816751, "grad_norm": 0.3984375, "learning_rate": 0.0019888332156207016, "loss": 0.2348, "step": 5770 }, { "epoch": 0.010234233084126566, "grad_norm": 0.361328125, "learning_rate": 0.0019888238669504604, "loss": 0.2418, "step": 5772 }, { "epoch": 0.010237779249436382, "grad_norm": 0.46875, "learning_rate": 0.0019888145143930206, "loss": 0.2455, "step": 5774 }, { "epoch": 0.010241325414746196, "grad_norm": 0.384765625, "learning_rate": 0.001988805157948422, "loss": 0.2102, "step": 5776 }, { "epoch": 0.010244871580056011, "grad_norm": 0.45703125, "learning_rate": 0.001988795797616706, "loss": 0.2653, "step": 5778 }, { "epoch": 0.010248417745365827, "grad_norm": 0.3671875, "learning_rate": 0.0019887864333979137, "loss": 0.2891, "step": 5780 }, { "epoch": 0.010251963910675642, "grad_norm": 0.416015625, "learning_rate": 0.0019887770652920857, "loss": 0.2714, "step": 5782 }, { "epoch": 0.010255510075985456, "grad_norm": 0.302734375, "learning_rate": 0.001988767693299263, "loss": 0.2319, "step": 5784 }, { "epoch": 0.010259056241295273, "grad_norm": 1.359375, "learning_rate": 0.0019887583174194867, "loss": 0.2893, "step": 5786 }, { "epoch": 0.010262602406605087, "grad_norm": 0.2890625, "learning_rate": 0.0019887489376527977, "loss": 0.5093, "step": 5788 }, { "epoch": 0.010266148571914902, "grad_norm": 0.22265625, "learning_rate": 0.001988739553999237, "loss": 0.2232, "step": 5790 }, { "epoch": 0.010269694737224718, "grad_norm": 0.62109375, "learning_rate": 0.001988730166458846, "loss": 0.2264, "step": 5792 }, { "epoch": 0.010273240902534533, "grad_norm": 1.2578125, "learning_rate": 0.0019887207750316654, "loss": 0.374, "step": 5794 }, { "epoch": 0.010276787067844349, "grad_norm": 1.2421875, "learning_rate": 0.001988711379717736, "loss": 0.4457, "step": 5796 }, { "epoch": 0.010280333233154164, "grad_norm": 0.78125, "learning_rate": 0.0019887019805170996, "loss": 0.2772, "step": 5798 }, { "epoch": 0.010283879398463978, "grad_norm": 0.53125, "learning_rate": 0.001988692577429797, "loss": 0.3339, "step": 5800 }, { "epoch": 0.010287425563773794, "grad_norm": 2.703125, "learning_rate": 0.0019886831704558692, "loss": 0.2457, "step": 5802 }, { "epoch": 0.010290971729083609, "grad_norm": 0.546875, "learning_rate": 0.001988673759595358, "loss": 0.2628, "step": 5804 }, { "epoch": 0.010294517894393423, "grad_norm": 0.388671875, "learning_rate": 0.001988664344848303, "loss": 0.256, "step": 5806 }, { "epoch": 0.01029806405970324, "grad_norm": 1.0546875, "learning_rate": 0.0019886549262147467, "loss": 0.3069, "step": 5808 }, { "epoch": 0.010301610225013054, "grad_norm": 0.515625, "learning_rate": 0.0019886455036947303, "loss": 0.3723, "step": 5810 }, { "epoch": 0.010305156390322869, "grad_norm": 0.81640625, "learning_rate": 0.001988636077288294, "loss": 0.3231, "step": 5812 }, { "epoch": 0.010308702555632685, "grad_norm": 0.42578125, "learning_rate": 0.0019886266469954805, "loss": 0.2946, "step": 5814 }, { "epoch": 0.0103122487209425, "grad_norm": 0.486328125, "learning_rate": 0.0019886172128163295, "loss": 0.2659, "step": 5816 }, { "epoch": 0.010315794886252314, "grad_norm": 0.51953125, "learning_rate": 0.001988607774750883, "loss": 0.2917, "step": 5818 }, { "epoch": 0.01031934105156213, "grad_norm": 0.197265625, "learning_rate": 0.0019885983327991826, "loss": 0.2409, "step": 5820 }, { "epoch": 0.010322887216871945, "grad_norm": 0.5234375, "learning_rate": 0.001988588886961269, "loss": 0.2564, "step": 5822 }, { "epoch": 0.01032643338218176, "grad_norm": 0.3515625, "learning_rate": 0.0019885794372371843, "loss": 0.2172, "step": 5824 }, { "epoch": 0.010329979547491576, "grad_norm": 0.88671875, "learning_rate": 0.001988569983626969, "loss": 0.557, "step": 5826 }, { "epoch": 0.01033352571280139, "grad_norm": 0.87109375, "learning_rate": 0.0019885605261306645, "loss": 0.3127, "step": 5828 }, { "epoch": 0.010337071878111207, "grad_norm": 0.515625, "learning_rate": 0.0019885510647483125, "loss": 0.2379, "step": 5830 }, { "epoch": 0.010340618043421021, "grad_norm": 3.0625, "learning_rate": 0.0019885415994799543, "loss": 0.304, "step": 5832 }, { "epoch": 0.010344164208730836, "grad_norm": 1.171875, "learning_rate": 0.0019885321303256312, "loss": 0.344, "step": 5834 }, { "epoch": 0.010347710374040652, "grad_norm": 1.125, "learning_rate": 0.001988522657285385, "loss": 0.5607, "step": 5836 }, { "epoch": 0.010351256539350467, "grad_norm": 1.4140625, "learning_rate": 0.0019885131803592565, "loss": 0.37, "step": 5838 }, { "epoch": 0.010354802704660281, "grad_norm": 0.33203125, "learning_rate": 0.0019885036995472877, "loss": 0.2129, "step": 5840 }, { "epoch": 0.010358348869970098, "grad_norm": 0.3125, "learning_rate": 0.0019884942148495196, "loss": 0.2877, "step": 5842 }, { "epoch": 0.010361895035279912, "grad_norm": 1.25, "learning_rate": 0.001988484726265994, "loss": 0.3503, "step": 5844 }, { "epoch": 0.010365441200589727, "grad_norm": 0.7265625, "learning_rate": 0.001988475233796752, "loss": 0.3453, "step": 5846 }, { "epoch": 0.010368987365899543, "grad_norm": 0.66796875, "learning_rate": 0.001988465737441836, "loss": 0.3034, "step": 5848 }, { "epoch": 0.010372533531209358, "grad_norm": 0.48828125, "learning_rate": 0.001988456237201287, "loss": 0.2343, "step": 5850 }, { "epoch": 0.010376079696519172, "grad_norm": 0.64453125, "learning_rate": 0.0019884467330751458, "loss": 0.3782, "step": 5852 }, { "epoch": 0.010379625861828988, "grad_norm": 0.326171875, "learning_rate": 0.0019884372250634544, "loss": 0.2747, "step": 5854 }, { "epoch": 0.010383172027138803, "grad_norm": 0.310546875, "learning_rate": 0.0019884277131662553, "loss": 0.2435, "step": 5856 }, { "epoch": 0.010386718192448617, "grad_norm": 0.82421875, "learning_rate": 0.0019884181973835896, "loss": 0.414, "step": 5858 }, { "epoch": 0.010390264357758434, "grad_norm": 0.326171875, "learning_rate": 0.0019884086777154984, "loss": 0.2359, "step": 5860 }, { "epoch": 0.010393810523068248, "grad_norm": 0.41796875, "learning_rate": 0.001988399154162024, "loss": 0.2979, "step": 5862 }, { "epoch": 0.010397356688378065, "grad_norm": 0.62890625, "learning_rate": 0.0019883896267232077, "loss": 0.2331, "step": 5864 }, { "epoch": 0.010400902853687879, "grad_norm": 1.1640625, "learning_rate": 0.001988380095399091, "loss": 0.3314, "step": 5866 }, { "epoch": 0.010404449018997694, "grad_norm": 0.5546875, "learning_rate": 0.001988370560189716, "loss": 0.2767, "step": 5868 }, { "epoch": 0.01040799518430751, "grad_norm": 0.357421875, "learning_rate": 0.0019883610210951236, "loss": 0.2005, "step": 5870 }, { "epoch": 0.010411541349617325, "grad_norm": 0.3203125, "learning_rate": 0.001988351478115357, "loss": 0.2644, "step": 5872 }, { "epoch": 0.010415087514927139, "grad_norm": 1.1796875, "learning_rate": 0.0019883419312504563, "loss": 0.3104, "step": 5874 }, { "epoch": 0.010418633680236955, "grad_norm": 0.375, "learning_rate": 0.0019883323805004647, "loss": 0.2735, "step": 5876 }, { "epoch": 0.01042217984554677, "grad_norm": 0.25, "learning_rate": 0.0019883228258654228, "loss": 0.2543, "step": 5878 }, { "epoch": 0.010425726010856584, "grad_norm": 0.578125, "learning_rate": 0.0019883132673453726, "loss": 0.2897, "step": 5880 }, { "epoch": 0.0104292721761664, "grad_norm": 0.65625, "learning_rate": 0.001988303704940357, "loss": 0.2792, "step": 5882 }, { "epoch": 0.010432818341476215, "grad_norm": 0.80078125, "learning_rate": 0.0019882941386504165, "loss": 0.3873, "step": 5884 }, { "epoch": 0.01043636450678603, "grad_norm": 0.66015625, "learning_rate": 0.0019882845684755933, "loss": 0.2324, "step": 5886 }, { "epoch": 0.010439910672095846, "grad_norm": 1.203125, "learning_rate": 0.0019882749944159293, "loss": 0.3238, "step": 5888 }, { "epoch": 0.01044345683740566, "grad_norm": 0.59375, "learning_rate": 0.001988265416471467, "loss": 0.2852, "step": 5890 }, { "epoch": 0.010447003002715475, "grad_norm": 5.4375, "learning_rate": 0.001988255834642247, "loss": 0.3307, "step": 5892 }, { "epoch": 0.010450549168025292, "grad_norm": 2.125, "learning_rate": 0.001988246248928313, "loss": 0.2993, "step": 5894 }, { "epoch": 0.010454095333335106, "grad_norm": 0.53515625, "learning_rate": 0.001988236659329705, "loss": 0.2431, "step": 5896 }, { "epoch": 0.010457641498644922, "grad_norm": 0.8046875, "learning_rate": 0.001988227065846466, "loss": 0.3667, "step": 5898 }, { "epoch": 0.010461187663954737, "grad_norm": 0.69921875, "learning_rate": 0.0019882174684786374, "loss": 0.3712, "step": 5900 }, { "epoch": 0.010464733829264551, "grad_norm": 3.578125, "learning_rate": 0.001988207867226262, "loss": 0.277, "step": 5902 }, { "epoch": 0.010468279994574368, "grad_norm": 0.5859375, "learning_rate": 0.001988198262089381, "loss": 0.3025, "step": 5904 }, { "epoch": 0.010471826159884182, "grad_norm": 0.796875, "learning_rate": 0.0019881886530680373, "loss": 0.2458, "step": 5906 }, { "epoch": 0.010475372325193997, "grad_norm": 0.326171875, "learning_rate": 0.001988179040162272, "loss": 0.3324, "step": 5908 }, { "epoch": 0.010478918490503813, "grad_norm": 0.80859375, "learning_rate": 0.0019881694233721274, "loss": 0.2186, "step": 5910 }, { "epoch": 0.010482464655813628, "grad_norm": 0.38671875, "learning_rate": 0.0019881598026976455, "loss": 0.1934, "step": 5912 }, { "epoch": 0.010486010821123442, "grad_norm": 0.3125, "learning_rate": 0.001988150178138869, "loss": 0.2527, "step": 5914 }, { "epoch": 0.010489556986433259, "grad_norm": 1.0546875, "learning_rate": 0.001988140549695839, "loss": 0.4183, "step": 5916 }, { "epoch": 0.010493103151743073, "grad_norm": 0.546875, "learning_rate": 0.0019881309173685985, "loss": 0.2469, "step": 5918 }, { "epoch": 0.010496649317052888, "grad_norm": 0.443359375, "learning_rate": 0.0019881212811571894, "loss": 0.3144, "step": 5920 }, { "epoch": 0.010500195482362704, "grad_norm": 0.515625, "learning_rate": 0.0019881116410616533, "loss": 0.2961, "step": 5922 }, { "epoch": 0.010503741647672519, "grad_norm": 0.453125, "learning_rate": 0.0019881019970820328, "loss": 0.2274, "step": 5924 }, { "epoch": 0.010507287812982333, "grad_norm": 1.1796875, "learning_rate": 0.0019880923492183703, "loss": 0.2423, "step": 5926 }, { "epoch": 0.01051083397829215, "grad_norm": 0.66796875, "learning_rate": 0.0019880826974707074, "loss": 0.3151, "step": 5928 }, { "epoch": 0.010514380143601964, "grad_norm": 0.55078125, "learning_rate": 0.001988073041839087, "loss": 0.2301, "step": 5930 }, { "epoch": 0.01051792630891178, "grad_norm": 0.328125, "learning_rate": 0.001988063382323551, "loss": 0.2753, "step": 5932 }, { "epoch": 0.010521472474221595, "grad_norm": 2.671875, "learning_rate": 0.001988053718924141, "loss": 0.4232, "step": 5934 }, { "epoch": 0.01052501863953141, "grad_norm": 3.234375, "learning_rate": 0.0019880440516409003, "loss": 0.3246, "step": 5936 }, { "epoch": 0.010528564804841226, "grad_norm": 0.361328125, "learning_rate": 0.001988034380473871, "loss": 0.2806, "step": 5938 }, { "epoch": 0.01053211097015104, "grad_norm": 0.79296875, "learning_rate": 0.0019880247054230946, "loss": 0.2824, "step": 5940 }, { "epoch": 0.010535657135460855, "grad_norm": 0.453125, "learning_rate": 0.0019880150264886143, "loss": 0.2641, "step": 5942 }, { "epoch": 0.010539203300770671, "grad_norm": 0.390625, "learning_rate": 0.0019880053436704724, "loss": 0.2486, "step": 5944 }, { "epoch": 0.010542749466080486, "grad_norm": 0.5546875, "learning_rate": 0.00198799565696871, "loss": 0.2487, "step": 5946 }, { "epoch": 0.0105462956313903, "grad_norm": 0.640625, "learning_rate": 0.0019879859663833716, "loss": 0.5694, "step": 5948 }, { "epoch": 0.010549841796700116, "grad_norm": 0.74609375, "learning_rate": 0.0019879762719144977, "loss": 0.3262, "step": 5950 }, { "epoch": 0.010553387962009931, "grad_norm": 0.322265625, "learning_rate": 0.0019879665735621312, "loss": 0.2522, "step": 5952 }, { "epoch": 0.010556934127319745, "grad_norm": 0.421875, "learning_rate": 0.0019879568713263153, "loss": 0.2822, "step": 5954 }, { "epoch": 0.010560480292629562, "grad_norm": 0.61328125, "learning_rate": 0.0019879471652070914, "loss": 0.2267, "step": 5956 }, { "epoch": 0.010564026457939376, "grad_norm": 1.578125, "learning_rate": 0.0019879374552045025, "loss": 0.339, "step": 5958 }, { "epoch": 0.010567572623249191, "grad_norm": 0.7890625, "learning_rate": 0.0019879277413185907, "loss": 0.2751, "step": 5960 }, { "epoch": 0.010571118788559007, "grad_norm": 0.96484375, "learning_rate": 0.0019879180235493994, "loss": 0.4019, "step": 5962 }, { "epoch": 0.010574664953868822, "grad_norm": 0.54296875, "learning_rate": 0.0019879083018969697, "loss": 0.2823, "step": 5964 }, { "epoch": 0.010578211119178638, "grad_norm": 0.8828125, "learning_rate": 0.001987898576361345, "loss": 0.2677, "step": 5966 }, { "epoch": 0.010581757284488453, "grad_norm": 0.35546875, "learning_rate": 0.001987888846942568, "loss": 0.246, "step": 5968 }, { "epoch": 0.010585303449798267, "grad_norm": 0.306640625, "learning_rate": 0.0019878791136406808, "loss": 0.2474, "step": 5970 }, { "epoch": 0.010588849615108083, "grad_norm": 0.2890625, "learning_rate": 0.0019878693764557257, "loss": 0.2399, "step": 5972 }, { "epoch": 0.010592395780417898, "grad_norm": 0.51171875, "learning_rate": 0.001987859635387746, "loss": 0.2762, "step": 5974 }, { "epoch": 0.010595941945727713, "grad_norm": 0.54296875, "learning_rate": 0.0019878498904367845, "loss": 0.2992, "step": 5976 }, { "epoch": 0.010599488111037529, "grad_norm": 1.140625, "learning_rate": 0.0019878401416028825, "loss": 0.2313, "step": 5978 }, { "epoch": 0.010603034276347343, "grad_norm": 1.2109375, "learning_rate": 0.001987830388886084, "loss": 0.5073, "step": 5980 }, { "epoch": 0.010606580441657158, "grad_norm": 0.400390625, "learning_rate": 0.0019878206322864306, "loss": 0.2226, "step": 5982 }, { "epoch": 0.010610126606966974, "grad_norm": 0.87109375, "learning_rate": 0.001987810871803966, "loss": 0.3639, "step": 5984 }, { "epoch": 0.010613672772276789, "grad_norm": 0.51953125, "learning_rate": 0.001987801107438732, "loss": 0.2666, "step": 5986 }, { "epoch": 0.010617218937586603, "grad_norm": 0.32421875, "learning_rate": 0.0019877913391907714, "loss": 0.2591, "step": 5988 }, { "epoch": 0.01062076510289642, "grad_norm": 4.6875, "learning_rate": 0.0019877815670601277, "loss": 0.2809, "step": 5990 }, { "epoch": 0.010624311268206234, "grad_norm": 0.494140625, "learning_rate": 0.0019877717910468428, "loss": 0.2693, "step": 5992 }, { "epoch": 0.010627857433516049, "grad_norm": 0.75390625, "learning_rate": 0.00198776201115096, "loss": 0.2601, "step": 5994 }, { "epoch": 0.010631403598825865, "grad_norm": 0.29296875, "learning_rate": 0.0019877522273725216, "loss": 0.2682, "step": 5996 }, { "epoch": 0.01063494976413568, "grad_norm": 1.1015625, "learning_rate": 0.0019877424397115708, "loss": 0.2899, "step": 5998 }, { "epoch": 0.010638495929445496, "grad_norm": 0.451171875, "learning_rate": 0.00198773264816815, "loss": 0.2456, "step": 6000 }, { "epoch": 0.01064204209475531, "grad_norm": 0.498046875, "learning_rate": 0.0019877228527423025, "loss": 0.2319, "step": 6002 }, { "epoch": 0.010645588260065125, "grad_norm": 0.5390625, "learning_rate": 0.0019877130534340704, "loss": 0.2378, "step": 6004 }, { "epoch": 0.010649134425374941, "grad_norm": 0.275390625, "learning_rate": 0.0019877032502434978, "loss": 0.2863, "step": 6006 }, { "epoch": 0.010652680590684756, "grad_norm": 0.390625, "learning_rate": 0.0019876934431706265, "loss": 0.2035, "step": 6008 }, { "epoch": 0.01065622675599457, "grad_norm": 0.51953125, "learning_rate": 0.0019876836322154996, "loss": 0.2592, "step": 6010 }, { "epoch": 0.010659772921304387, "grad_norm": 1.296875, "learning_rate": 0.0019876738173781605, "loss": 0.2982, "step": 6012 }, { "epoch": 0.010663319086614201, "grad_norm": 0.228515625, "learning_rate": 0.001987663998658651, "loss": 0.23, "step": 6014 }, { "epoch": 0.010666865251924016, "grad_norm": 0.2412109375, "learning_rate": 0.0019876541760570155, "loss": 0.2322, "step": 6016 }, { "epoch": 0.010670411417233832, "grad_norm": 0.267578125, "learning_rate": 0.001987644349573296, "loss": 0.2854, "step": 6018 }, { "epoch": 0.010673957582543647, "grad_norm": 0.6171875, "learning_rate": 0.0019876345192075357, "loss": 0.3641, "step": 6020 }, { "epoch": 0.010677503747853461, "grad_norm": 0.55078125, "learning_rate": 0.0019876246849597776, "loss": 0.227, "step": 6022 }, { "epoch": 0.010681049913163277, "grad_norm": 0.34765625, "learning_rate": 0.001987614846830065, "loss": 0.2549, "step": 6024 }, { "epoch": 0.010684596078473092, "grad_norm": 0.46875, "learning_rate": 0.00198760500481844, "loss": 0.2421, "step": 6026 }, { "epoch": 0.010688142243782906, "grad_norm": 0.625, "learning_rate": 0.001987595158924947, "loss": 0.2925, "step": 6028 }, { "epoch": 0.010691688409092723, "grad_norm": 2.703125, "learning_rate": 0.001987585309149628, "loss": 0.205, "step": 6030 }, { "epoch": 0.010695234574402537, "grad_norm": 0.384765625, "learning_rate": 0.001987575455492526, "loss": 0.4531, "step": 6032 }, { "epoch": 0.010698780739712354, "grad_norm": 0.2412109375, "learning_rate": 0.001987565597953685, "loss": 0.2631, "step": 6034 }, { "epoch": 0.010702326905022168, "grad_norm": 5.78125, "learning_rate": 0.0019875557365331476, "loss": 0.2783, "step": 6036 }, { "epoch": 0.010705873070331983, "grad_norm": 0.859375, "learning_rate": 0.001987545871230957, "loss": 0.254, "step": 6038 }, { "epoch": 0.010709419235641799, "grad_norm": 0.314453125, "learning_rate": 0.0019875360020471565, "loss": 0.2089, "step": 6040 }, { "epoch": 0.010712965400951614, "grad_norm": 0.3984375, "learning_rate": 0.0019875261289817887, "loss": 0.225, "step": 6042 }, { "epoch": 0.010716511566261428, "grad_norm": 0.578125, "learning_rate": 0.0019875162520348972, "loss": 0.2282, "step": 6044 }, { "epoch": 0.010720057731571244, "grad_norm": 0.71875, "learning_rate": 0.001987506371206525, "loss": 0.2575, "step": 6046 }, { "epoch": 0.010723603896881059, "grad_norm": 0.322265625, "learning_rate": 0.0019874964864967162, "loss": 0.2463, "step": 6048 }, { "epoch": 0.010727150062190874, "grad_norm": 0.578125, "learning_rate": 0.001987486597905513, "loss": 0.3057, "step": 6050 }, { "epoch": 0.01073069622750069, "grad_norm": 0.298828125, "learning_rate": 0.0019874767054329583, "loss": 0.259, "step": 6052 }, { "epoch": 0.010734242392810504, "grad_norm": 0.224609375, "learning_rate": 0.0019874668090790965, "loss": 0.2948, "step": 6054 }, { "epoch": 0.010737788558120319, "grad_norm": 0.474609375, "learning_rate": 0.0019874569088439704, "loss": 0.3562, "step": 6056 }, { "epoch": 0.010741334723430135, "grad_norm": 0.51171875, "learning_rate": 0.001987447004727623, "loss": 0.286, "step": 6058 }, { "epoch": 0.01074488088873995, "grad_norm": 1.0390625, "learning_rate": 0.001987437096730098, "loss": 0.4945, "step": 6060 }, { "epoch": 0.010748427054049764, "grad_norm": 0.466796875, "learning_rate": 0.001987427184851439, "loss": 0.2475, "step": 6062 }, { "epoch": 0.01075197321935958, "grad_norm": 0.60546875, "learning_rate": 0.0019874172690916886, "loss": 0.2243, "step": 6064 }, { "epoch": 0.010755519384669395, "grad_norm": 0.30859375, "learning_rate": 0.0019874073494508906, "loss": 0.3817, "step": 6066 }, { "epoch": 0.010759065549979211, "grad_norm": 1.015625, "learning_rate": 0.001987397425929088, "loss": 0.3127, "step": 6068 }, { "epoch": 0.010762611715289026, "grad_norm": 1.0703125, "learning_rate": 0.001987387498526325, "loss": 0.3228, "step": 6070 }, { "epoch": 0.01076615788059884, "grad_norm": 0.45703125, "learning_rate": 0.0019873775672426446, "loss": 0.2621, "step": 6072 }, { "epoch": 0.010769704045908657, "grad_norm": 0.48828125, "learning_rate": 0.00198736763207809, "loss": 0.2767, "step": 6074 }, { "epoch": 0.010773250211218471, "grad_norm": 0.6875, "learning_rate": 0.0019873576930327045, "loss": 0.2513, "step": 6076 }, { "epoch": 0.010776796376528286, "grad_norm": 1.03125, "learning_rate": 0.0019873477501065324, "loss": 0.3358, "step": 6078 }, { "epoch": 0.010780342541838102, "grad_norm": 0.734375, "learning_rate": 0.0019873378032996165, "loss": 0.2464, "step": 6080 }, { "epoch": 0.010783888707147917, "grad_norm": 0.4453125, "learning_rate": 0.0019873278526120006, "loss": 0.3333, "step": 6082 }, { "epoch": 0.010787434872457731, "grad_norm": 0.75, "learning_rate": 0.0019873178980437272, "loss": 0.2762, "step": 6084 }, { "epoch": 0.010790981037767548, "grad_norm": 0.48046875, "learning_rate": 0.001987307939594842, "loss": 0.2529, "step": 6086 }, { "epoch": 0.010794527203077362, "grad_norm": 0.41015625, "learning_rate": 0.0019872979772653865, "loss": 0.2608, "step": 6088 }, { "epoch": 0.010798073368387177, "grad_norm": 0.39453125, "learning_rate": 0.001987288011055405, "loss": 0.3697, "step": 6090 }, { "epoch": 0.010801619533696993, "grad_norm": 2.671875, "learning_rate": 0.0019872780409649414, "loss": 0.4219, "step": 6092 }, { "epoch": 0.010805165699006808, "grad_norm": 1.5234375, "learning_rate": 0.001987268066994039, "loss": 0.3257, "step": 6094 }, { "epoch": 0.010808711864316622, "grad_norm": 0.26171875, "learning_rate": 0.001987258089142742, "loss": 0.2178, "step": 6096 }, { "epoch": 0.010812258029626438, "grad_norm": 0.2099609375, "learning_rate": 0.0019872481074110927, "loss": 0.2763, "step": 6098 }, { "epoch": 0.010815804194936253, "grad_norm": 0.48046875, "learning_rate": 0.001987238121799136, "loss": 0.2948, "step": 6100 }, { "epoch": 0.01081935036024607, "grad_norm": 0.4921875, "learning_rate": 0.001987228132306915, "loss": 0.4501, "step": 6102 }, { "epoch": 0.010822896525555884, "grad_norm": 1.0625, "learning_rate": 0.0019872181389344735, "loss": 0.3309, "step": 6104 }, { "epoch": 0.010826442690865698, "grad_norm": 5.03125, "learning_rate": 0.0019872081416818553, "loss": 0.4552, "step": 6106 }, { "epoch": 0.010829988856175515, "grad_norm": 1.21875, "learning_rate": 0.001987198140549104, "loss": 0.2493, "step": 6108 }, { "epoch": 0.01083353502148533, "grad_norm": 0.400390625, "learning_rate": 0.001987188135536263, "loss": 0.2925, "step": 6110 }, { "epoch": 0.010837081186795144, "grad_norm": 0.390625, "learning_rate": 0.0019871781266433772, "loss": 0.2401, "step": 6112 }, { "epoch": 0.01084062735210496, "grad_norm": 0.63671875, "learning_rate": 0.001987168113870489, "loss": 0.2624, "step": 6114 }, { "epoch": 0.010844173517414775, "grad_norm": 0.78125, "learning_rate": 0.001987158097217643, "loss": 0.296, "step": 6116 }, { "epoch": 0.010847719682724589, "grad_norm": 0.24609375, "learning_rate": 0.0019871480766848826, "loss": 0.2174, "step": 6118 }, { "epoch": 0.010851265848034405, "grad_norm": 0.341796875, "learning_rate": 0.0019871380522722523, "loss": 0.2534, "step": 6120 }, { "epoch": 0.01085481201334422, "grad_norm": 1.015625, "learning_rate": 0.001987128023979795, "loss": 0.2794, "step": 6122 }, { "epoch": 0.010858358178654035, "grad_norm": 0.34375, "learning_rate": 0.0019871179918075554, "loss": 0.2548, "step": 6124 }, { "epoch": 0.01086190434396385, "grad_norm": 0.47265625, "learning_rate": 0.001987107955755577, "loss": 0.2993, "step": 6126 }, { "epoch": 0.010865450509273665, "grad_norm": 0.4765625, "learning_rate": 0.001987097915823903, "loss": 0.2443, "step": 6128 }, { "epoch": 0.01086899667458348, "grad_norm": 0.3125, "learning_rate": 0.0019870878720125787, "loss": 0.2306, "step": 6130 }, { "epoch": 0.010872542839893296, "grad_norm": 0.4765625, "learning_rate": 0.001987077824321647, "loss": 0.2146, "step": 6132 }, { "epoch": 0.01087608900520311, "grad_norm": 5.875, "learning_rate": 0.0019870677727511525, "loss": 0.3652, "step": 6134 }, { "epoch": 0.010879635170512927, "grad_norm": 1.203125, "learning_rate": 0.001987057717301138, "loss": 0.2946, "step": 6136 }, { "epoch": 0.010883181335822742, "grad_norm": 0.4921875, "learning_rate": 0.0019870476579716494, "loss": 0.264, "step": 6138 }, { "epoch": 0.010886727501132556, "grad_norm": 0.25390625, "learning_rate": 0.001987037594762729, "loss": 0.2724, "step": 6140 }, { "epoch": 0.010890273666442372, "grad_norm": 0.380859375, "learning_rate": 0.0019870275276744217, "loss": 0.2383, "step": 6142 }, { "epoch": 0.010893819831752187, "grad_norm": 0.625, "learning_rate": 0.0019870174567067716, "loss": 0.2365, "step": 6144 }, { "epoch": 0.010897365997062002, "grad_norm": 0.37890625, "learning_rate": 0.0019870073818598214, "loss": 0.2421, "step": 6146 }, { "epoch": 0.010900912162371818, "grad_norm": 0.65234375, "learning_rate": 0.0019869973031336166, "loss": 0.2806, "step": 6148 }, { "epoch": 0.010904458327681632, "grad_norm": 0.77734375, "learning_rate": 0.001986987220528201, "loss": 0.3103, "step": 6150 }, { "epoch": 0.010908004492991447, "grad_norm": 1.0078125, "learning_rate": 0.0019869771340436187, "loss": 0.2912, "step": 6152 }, { "epoch": 0.010911550658301263, "grad_norm": 1.25, "learning_rate": 0.001986967043679913, "loss": 0.3896, "step": 6154 }, { "epoch": 0.010915096823611078, "grad_norm": 1.59375, "learning_rate": 0.001986956949437129, "loss": 0.4143, "step": 6156 }, { "epoch": 0.010918642988920892, "grad_norm": 0.29296875, "learning_rate": 0.0019869468513153106, "loss": 0.2488, "step": 6158 }, { "epoch": 0.010922189154230709, "grad_norm": 1.1328125, "learning_rate": 0.001986936749314502, "loss": 0.2611, "step": 6160 }, { "epoch": 0.010925735319540523, "grad_norm": 0.435546875, "learning_rate": 0.001986926643434747, "loss": 0.2172, "step": 6162 }, { "epoch": 0.010929281484850338, "grad_norm": 0.67578125, "learning_rate": 0.00198691653367609, "loss": 0.3055, "step": 6164 }, { "epoch": 0.010932827650160154, "grad_norm": 0.416015625, "learning_rate": 0.001986906420038575, "loss": 0.2987, "step": 6166 }, { "epoch": 0.010936373815469969, "grad_norm": 0.796875, "learning_rate": 0.001986896302522247, "loss": 0.2669, "step": 6168 }, { "epoch": 0.010939919980779785, "grad_norm": 0.4765625, "learning_rate": 0.0019868861811271495, "loss": 0.2294, "step": 6170 }, { "epoch": 0.0109434661460896, "grad_norm": 0.5234375, "learning_rate": 0.001986876055853327, "loss": 0.2986, "step": 6172 }, { "epoch": 0.010947012311399414, "grad_norm": 0.49609375, "learning_rate": 0.001986865926700824, "loss": 0.2823, "step": 6174 }, { "epoch": 0.01095055847670923, "grad_norm": 0.48828125, "learning_rate": 0.0019868557936696847, "loss": 0.2529, "step": 6176 }, { "epoch": 0.010954104642019045, "grad_norm": 0.7578125, "learning_rate": 0.001986845656759953, "loss": 0.2319, "step": 6178 }, { "epoch": 0.01095765080732886, "grad_norm": 0.42578125, "learning_rate": 0.0019868355159716735, "loss": 0.2275, "step": 6180 }, { "epoch": 0.010961196972638676, "grad_norm": 0.298828125, "learning_rate": 0.0019868253713048907, "loss": 0.1985, "step": 6182 }, { "epoch": 0.01096474313794849, "grad_norm": 0.5234375, "learning_rate": 0.001986815222759649, "loss": 0.2351, "step": 6184 }, { "epoch": 0.010968289303258305, "grad_norm": 0.3671875, "learning_rate": 0.001986805070335992, "loss": 0.2689, "step": 6186 }, { "epoch": 0.010971835468568121, "grad_norm": 0.259765625, "learning_rate": 0.0019867949140339658, "loss": 0.2128, "step": 6188 }, { "epoch": 0.010975381633877936, "grad_norm": 0.42578125, "learning_rate": 0.001986784753853613, "loss": 0.2121, "step": 6190 }, { "epoch": 0.01097892779918775, "grad_norm": 0.408203125, "learning_rate": 0.001986774589794979, "loss": 0.1796, "step": 6192 }, { "epoch": 0.010982473964497566, "grad_norm": 0.8984375, "learning_rate": 0.001986764421858108, "loss": 0.3173, "step": 6194 }, { "epoch": 0.010986020129807381, "grad_norm": 0.294921875, "learning_rate": 0.0019867542500430447, "loss": 0.2264, "step": 6196 }, { "epoch": 0.010989566295117196, "grad_norm": 2.171875, "learning_rate": 0.001986744074349833, "loss": 0.3214, "step": 6198 }, { "epoch": 0.010993112460427012, "grad_norm": 0.41796875, "learning_rate": 0.0019867338947785183, "loss": 0.2817, "step": 6200 }, { "epoch": 0.010996658625736826, "grad_norm": 0.84765625, "learning_rate": 0.001986723711329144, "loss": 0.206, "step": 6202 }, { "epoch": 0.011000204791046643, "grad_norm": 1.015625, "learning_rate": 0.001986713524001756, "loss": 0.3411, "step": 6204 }, { "epoch": 0.011003750956356457, "grad_norm": 0.515625, "learning_rate": 0.0019867033327963975, "loss": 0.3907, "step": 6206 }, { "epoch": 0.011007297121666272, "grad_norm": 1.5078125, "learning_rate": 0.001986693137713114, "loss": 0.4127, "step": 6208 }, { "epoch": 0.011010843286976088, "grad_norm": 0.482421875, "learning_rate": 0.0019866829387519495, "loss": 0.2162, "step": 6210 }, { "epoch": 0.011014389452285903, "grad_norm": 2.1875, "learning_rate": 0.001986672735912949, "loss": 0.4836, "step": 6212 }, { "epoch": 0.011017935617595717, "grad_norm": 0.9609375, "learning_rate": 0.001986662529196157, "loss": 0.2363, "step": 6214 }, { "epoch": 0.011021481782905533, "grad_norm": 1.1875, "learning_rate": 0.0019866523186016184, "loss": 0.6834, "step": 6216 }, { "epoch": 0.011025027948215348, "grad_norm": 0.45703125, "learning_rate": 0.0019866421041293773, "loss": 0.2378, "step": 6218 }, { "epoch": 0.011028574113525163, "grad_norm": 0.59765625, "learning_rate": 0.001986631885779479, "loss": 0.2674, "step": 6220 }, { "epoch": 0.011032120278834979, "grad_norm": 0.349609375, "learning_rate": 0.0019866216635519673, "loss": 0.3375, "step": 6222 }, { "epoch": 0.011035666444144793, "grad_norm": 0.3125, "learning_rate": 0.001986611437446888, "loss": 0.4332, "step": 6224 }, { "epoch": 0.011039212609454608, "grad_norm": 0.33984375, "learning_rate": 0.0019866012074642846, "loss": 0.3157, "step": 6226 }, { "epoch": 0.011042758774764424, "grad_norm": 0.345703125, "learning_rate": 0.001986590973604203, "loss": 0.2539, "step": 6228 }, { "epoch": 0.011046304940074239, "grad_norm": 0.2451171875, "learning_rate": 0.001986580735866688, "loss": 0.257, "step": 6230 }, { "epoch": 0.011049851105384053, "grad_norm": 0.31640625, "learning_rate": 0.0019865704942517827, "loss": 0.244, "step": 6232 }, { "epoch": 0.01105339727069387, "grad_norm": 0.40234375, "learning_rate": 0.001986560248759534, "loss": 0.2814, "step": 6234 }, { "epoch": 0.011056943436003684, "grad_norm": 0.427734375, "learning_rate": 0.001986549999389985, "loss": 0.2961, "step": 6236 }, { "epoch": 0.0110604896013135, "grad_norm": 0.5703125, "learning_rate": 0.0019865397461431814, "loss": 0.3519, "step": 6238 }, { "epoch": 0.011064035766623315, "grad_norm": 0.1689453125, "learning_rate": 0.0019865294890191684, "loss": 0.1827, "step": 6240 }, { "epoch": 0.01106758193193313, "grad_norm": 0.283203125, "learning_rate": 0.00198651922801799, "loss": 0.2183, "step": 6242 }, { "epoch": 0.011071128097242946, "grad_norm": 1.4609375, "learning_rate": 0.0019865089631396914, "loss": 0.3905, "step": 6244 }, { "epoch": 0.01107467426255276, "grad_norm": 0.4765625, "learning_rate": 0.0019864986943843176, "loss": 0.2733, "step": 6246 }, { "epoch": 0.011078220427862575, "grad_norm": 0.435546875, "learning_rate": 0.0019864884217519136, "loss": 0.2972, "step": 6248 }, { "epoch": 0.011081766593172391, "grad_norm": 0.478515625, "learning_rate": 0.001986478145242524, "loss": 0.329, "step": 6250 }, { "epoch": 0.011085312758482206, "grad_norm": 0.8203125, "learning_rate": 0.0019864678648561945, "loss": 0.363, "step": 6252 }, { "epoch": 0.01108885892379202, "grad_norm": 0.70703125, "learning_rate": 0.0019864575805929687, "loss": 0.3268, "step": 6254 }, { "epoch": 0.011092405089101837, "grad_norm": 0.59765625, "learning_rate": 0.0019864472924528928, "loss": 0.2758, "step": 6256 }, { "epoch": 0.011095951254411651, "grad_norm": 1.3359375, "learning_rate": 0.0019864370004360112, "loss": 0.2462, "step": 6258 }, { "epoch": 0.011099497419721466, "grad_norm": 0.98828125, "learning_rate": 0.0019864267045423692, "loss": 0.2855, "step": 6260 }, { "epoch": 0.011103043585031282, "grad_norm": 0.3515625, "learning_rate": 0.0019864164047720114, "loss": 0.289, "step": 6262 }, { "epoch": 0.011106589750341097, "grad_norm": 0.625, "learning_rate": 0.0019864061011249834, "loss": 0.6329, "step": 6264 }, { "epoch": 0.011110135915650911, "grad_norm": 0.392578125, "learning_rate": 0.00198639579360133, "loss": 0.241, "step": 6266 }, { "epoch": 0.011113682080960727, "grad_norm": 0.26171875, "learning_rate": 0.001986385482201096, "loss": 0.236, "step": 6268 }, { "epoch": 0.011117228246270542, "grad_norm": 1.28125, "learning_rate": 0.001986375166924327, "loss": 0.314, "step": 6270 }, { "epoch": 0.011120774411580358, "grad_norm": 0.365234375, "learning_rate": 0.001986364847771068, "loss": 0.2481, "step": 6272 }, { "epoch": 0.011124320576890173, "grad_norm": 0.369140625, "learning_rate": 0.0019863545247413637, "loss": 0.2299, "step": 6274 }, { "epoch": 0.011127866742199987, "grad_norm": 0.87890625, "learning_rate": 0.00198634419783526, "loss": 0.2389, "step": 6276 }, { "epoch": 0.011131412907509804, "grad_norm": 0.3359375, "learning_rate": 0.0019863338670528014, "loss": 0.2651, "step": 6278 }, { "epoch": 0.011134959072819618, "grad_norm": 4.59375, "learning_rate": 0.001986323532394033, "loss": 0.2686, "step": 6280 }, { "epoch": 0.011138505238129433, "grad_norm": 0.306640625, "learning_rate": 0.0019863131938590004, "loss": 0.2385, "step": 6282 }, { "epoch": 0.011142051403439249, "grad_norm": 0.3046875, "learning_rate": 0.0019863028514477492, "loss": 0.3189, "step": 6284 }, { "epoch": 0.011145597568749064, "grad_norm": 1.1796875, "learning_rate": 0.001986292505160324, "loss": 0.3359, "step": 6286 }, { "epoch": 0.011149143734058878, "grad_norm": 0.8671875, "learning_rate": 0.00198628215499677, "loss": 0.2666, "step": 6288 }, { "epoch": 0.011152689899368694, "grad_norm": 0.625, "learning_rate": 0.0019862718009571326, "loss": 0.3466, "step": 6290 }, { "epoch": 0.011156236064678509, "grad_norm": 0.4375, "learning_rate": 0.0019862614430414573, "loss": 0.2416, "step": 6292 }, { "epoch": 0.011159782229988324, "grad_norm": 0.30859375, "learning_rate": 0.001986251081249789, "loss": 0.209, "step": 6294 }, { "epoch": 0.01116332839529814, "grad_norm": 0.6484375, "learning_rate": 0.0019862407155821736, "loss": 0.259, "step": 6296 }, { "epoch": 0.011166874560607954, "grad_norm": 1.78125, "learning_rate": 0.001986230346038656, "loss": 0.3313, "step": 6298 }, { "epoch": 0.011170420725917769, "grad_norm": 0.6953125, "learning_rate": 0.0019862199726192816, "loss": 0.2995, "step": 6300 }, { "epoch": 0.011173966891227585, "grad_norm": 0.3046875, "learning_rate": 0.001986209595324096, "loss": 0.2362, "step": 6302 }, { "epoch": 0.0111775130565374, "grad_norm": 1.2890625, "learning_rate": 0.001986199214153144, "loss": 0.291, "step": 6304 }, { "epoch": 0.011181059221847216, "grad_norm": 0.79296875, "learning_rate": 0.0019861888291064717, "loss": 0.2559, "step": 6306 }, { "epoch": 0.01118460538715703, "grad_norm": 0.2060546875, "learning_rate": 0.0019861784401841243, "loss": 0.2296, "step": 6308 }, { "epoch": 0.011188151552466845, "grad_norm": 0.439453125, "learning_rate": 0.001986168047386147, "loss": 0.2676, "step": 6310 }, { "epoch": 0.011191697717776661, "grad_norm": 1.6953125, "learning_rate": 0.0019861576507125855, "loss": 0.5058, "step": 6312 }, { "epoch": 0.011195243883086476, "grad_norm": 0.51171875, "learning_rate": 0.001986147250163485, "loss": 0.2751, "step": 6314 }, { "epoch": 0.01119879004839629, "grad_norm": 0.59765625, "learning_rate": 0.0019861368457388916, "loss": 0.3495, "step": 6316 }, { "epoch": 0.011202336213706107, "grad_norm": 0.2353515625, "learning_rate": 0.00198612643743885, "loss": 0.354, "step": 6318 }, { "epoch": 0.011205882379015921, "grad_norm": 0.484375, "learning_rate": 0.001986116025263406, "loss": 0.2894, "step": 6320 }, { "epoch": 0.011209428544325736, "grad_norm": 0.45703125, "learning_rate": 0.0019861056092126054, "loss": 0.2521, "step": 6322 }, { "epoch": 0.011212974709635552, "grad_norm": 0.69140625, "learning_rate": 0.0019860951892864934, "loss": 0.2555, "step": 6324 }, { "epoch": 0.011216520874945367, "grad_norm": 0.8359375, "learning_rate": 0.001986084765485116, "loss": 0.3349, "step": 6326 }, { "epoch": 0.011220067040255181, "grad_norm": 0.30078125, "learning_rate": 0.0019860743378085186, "loss": 0.2099, "step": 6328 }, { "epoch": 0.011223613205564998, "grad_norm": 0.8125, "learning_rate": 0.0019860639062567464, "loss": 0.24, "step": 6330 }, { "epoch": 0.011227159370874812, "grad_norm": 0.51171875, "learning_rate": 0.001986053470829846, "loss": 0.2166, "step": 6332 }, { "epoch": 0.011230705536184627, "grad_norm": 0.68359375, "learning_rate": 0.0019860430315278618, "loss": 0.2391, "step": 6334 }, { "epoch": 0.011234251701494443, "grad_norm": 0.56640625, "learning_rate": 0.00198603258835084, "loss": 0.3065, "step": 6336 }, { "epoch": 0.011237797866804258, "grad_norm": 1.2734375, "learning_rate": 0.0019860221412988264, "loss": 0.3638, "step": 6338 }, { "epoch": 0.011241344032114074, "grad_norm": 0.6875, "learning_rate": 0.0019860116903718666, "loss": 0.2502, "step": 6340 }, { "epoch": 0.011244890197423888, "grad_norm": 0.80859375, "learning_rate": 0.0019860012355700065, "loss": 0.2676, "step": 6342 }, { "epoch": 0.011248436362733703, "grad_norm": 0.91796875, "learning_rate": 0.001985990776893292, "loss": 0.2933, "step": 6344 }, { "epoch": 0.01125198252804352, "grad_norm": 0.70703125, "learning_rate": 0.001985980314341768, "loss": 0.2384, "step": 6346 }, { "epoch": 0.011255528693353334, "grad_norm": 0.4140625, "learning_rate": 0.0019859698479154806, "loss": 0.2574, "step": 6348 }, { "epoch": 0.011259074858663148, "grad_norm": 1.03125, "learning_rate": 0.001985959377614476, "loss": 0.255, "step": 6350 }, { "epoch": 0.011262621023972965, "grad_norm": 1.4296875, "learning_rate": 0.0019859489034387994, "loss": 0.5737, "step": 6352 }, { "epoch": 0.01126616718928278, "grad_norm": 0.384765625, "learning_rate": 0.0019859384253884975, "loss": 0.2619, "step": 6354 }, { "epoch": 0.011269713354592594, "grad_norm": 0.69921875, "learning_rate": 0.001985927943463615, "loss": 0.2124, "step": 6356 }, { "epoch": 0.01127325951990241, "grad_norm": 17.0, "learning_rate": 0.001985917457664199, "loss": 0.3274, "step": 6358 }, { "epoch": 0.011276805685212225, "grad_norm": 0.87890625, "learning_rate": 0.0019859069679902938, "loss": 0.2299, "step": 6360 }, { "epoch": 0.01128035185052204, "grad_norm": 1.15625, "learning_rate": 0.0019858964744419467, "loss": 0.2498, "step": 6362 }, { "epoch": 0.011283898015831855, "grad_norm": 2.28125, "learning_rate": 0.0019858859770192027, "loss": 0.3732, "step": 6364 }, { "epoch": 0.01128744418114167, "grad_norm": 0.53125, "learning_rate": 0.001985875475722108, "loss": 0.3462, "step": 6366 }, { "epoch": 0.011290990346451485, "grad_norm": 0.1943359375, "learning_rate": 0.0019858649705507088, "loss": 0.2156, "step": 6368 }, { "epoch": 0.0112945365117613, "grad_norm": 0.30078125, "learning_rate": 0.0019858544615050503, "loss": 0.3415, "step": 6370 }, { "epoch": 0.011298082677071115, "grad_norm": 1.46875, "learning_rate": 0.0019858439485851793, "loss": 0.2704, "step": 6372 }, { "epoch": 0.011301628842380932, "grad_norm": 0.42578125, "learning_rate": 0.0019858334317911413, "loss": 0.3162, "step": 6374 }, { "epoch": 0.011305175007690746, "grad_norm": 0.5703125, "learning_rate": 0.0019858229111229826, "loss": 0.2298, "step": 6376 }, { "epoch": 0.01130872117300056, "grad_norm": 0.396484375, "learning_rate": 0.0019858123865807487, "loss": 0.2279, "step": 6378 }, { "epoch": 0.011312267338310377, "grad_norm": 0.4609375, "learning_rate": 0.0019858018581644862, "loss": 0.3286, "step": 6380 }, { "epoch": 0.011315813503620192, "grad_norm": 0.53125, "learning_rate": 0.0019857913258742414, "loss": 0.4062, "step": 6382 }, { "epoch": 0.011319359668930006, "grad_norm": 0.55078125, "learning_rate": 0.0019857807897100594, "loss": 0.2201, "step": 6384 }, { "epoch": 0.011322905834239822, "grad_norm": 0.6171875, "learning_rate": 0.0019857702496719866, "loss": 0.2164, "step": 6386 }, { "epoch": 0.011326451999549637, "grad_norm": 0.1630859375, "learning_rate": 0.0019857597057600694, "loss": 0.217, "step": 6388 }, { "epoch": 0.011329998164859452, "grad_norm": 0.318359375, "learning_rate": 0.001985749157974354, "loss": 0.2746, "step": 6390 }, { "epoch": 0.011333544330169268, "grad_norm": 1.2109375, "learning_rate": 0.001985738606314886, "loss": 0.6119, "step": 6392 }, { "epoch": 0.011337090495479082, "grad_norm": 0.5, "learning_rate": 0.0019857280507817117, "loss": 0.2621, "step": 6394 }, { "epoch": 0.011340636660788897, "grad_norm": 0.99609375, "learning_rate": 0.0019857174913748775, "loss": 0.2558, "step": 6396 }, { "epoch": 0.011344182826098713, "grad_norm": 0.58984375, "learning_rate": 0.0019857069280944297, "loss": 0.279, "step": 6398 }, { "epoch": 0.011347728991408528, "grad_norm": 0.7421875, "learning_rate": 0.0019856963609404137, "loss": 0.2397, "step": 6400 }, { "epoch": 0.011351275156718342, "grad_norm": 0.318359375, "learning_rate": 0.001985685789912877, "loss": 0.1866, "step": 6402 }, { "epoch": 0.011354821322028159, "grad_norm": 0.4453125, "learning_rate": 0.0019856752150118648, "loss": 0.3086, "step": 6404 }, { "epoch": 0.011358367487337973, "grad_norm": 0.71875, "learning_rate": 0.0019856646362374237, "loss": 0.2461, "step": 6406 }, { "epoch": 0.01136191365264779, "grad_norm": 0.98828125, "learning_rate": 0.0019856540535896, "loss": 0.3312, "step": 6408 }, { "epoch": 0.011365459817957604, "grad_norm": 0.400390625, "learning_rate": 0.00198564346706844, "loss": 0.2237, "step": 6410 }, { "epoch": 0.011369005983267419, "grad_norm": 0.64453125, "learning_rate": 0.0019856328766739897, "loss": 0.3129, "step": 6412 }, { "epoch": 0.011372552148577235, "grad_norm": 0.400390625, "learning_rate": 0.0019856222824062957, "loss": 0.2855, "step": 6414 }, { "epoch": 0.01137609831388705, "grad_norm": 0.75390625, "learning_rate": 0.0019856116842654043, "loss": 0.2575, "step": 6416 }, { "epoch": 0.011379644479196864, "grad_norm": 1.3828125, "learning_rate": 0.0019856010822513616, "loss": 0.509, "step": 6418 }, { "epoch": 0.01138319064450668, "grad_norm": 1.0234375, "learning_rate": 0.0019855904763642143, "loss": 0.2234, "step": 6420 }, { "epoch": 0.011386736809816495, "grad_norm": 0.5859375, "learning_rate": 0.001985579866604009, "loss": 0.2209, "step": 6422 }, { "epoch": 0.01139028297512631, "grad_norm": 0.337890625, "learning_rate": 0.0019855692529707914, "loss": 0.3473, "step": 6424 }, { "epoch": 0.011393829140436126, "grad_norm": 0.41015625, "learning_rate": 0.0019855586354646086, "loss": 0.272, "step": 6426 }, { "epoch": 0.01139737530574594, "grad_norm": 0.38671875, "learning_rate": 0.0019855480140855064, "loss": 0.2868, "step": 6428 }, { "epoch": 0.011400921471055755, "grad_norm": 0.365234375, "learning_rate": 0.0019855373888335317, "loss": 0.3183, "step": 6430 }, { "epoch": 0.011404467636365571, "grad_norm": 0.53515625, "learning_rate": 0.001985526759708731, "loss": 0.2578, "step": 6432 }, { "epoch": 0.011408013801675386, "grad_norm": 0.67578125, "learning_rate": 0.0019855161267111504, "loss": 0.2703, "step": 6434 }, { "epoch": 0.0114115599669852, "grad_norm": 0.43359375, "learning_rate": 0.0019855054898408366, "loss": 0.2035, "step": 6436 }, { "epoch": 0.011415106132295016, "grad_norm": 0.32421875, "learning_rate": 0.0019854948490978363, "loss": 0.2835, "step": 6438 }, { "epoch": 0.011418652297604831, "grad_norm": 0.3984375, "learning_rate": 0.001985484204482196, "loss": 0.2202, "step": 6440 }, { "epoch": 0.011422198462914647, "grad_norm": 0.546875, "learning_rate": 0.001985473555993962, "loss": 0.2038, "step": 6442 }, { "epoch": 0.011425744628224462, "grad_norm": 0.3671875, "learning_rate": 0.001985462903633181, "loss": 0.2454, "step": 6444 }, { "epoch": 0.011429290793534276, "grad_norm": 0.78515625, "learning_rate": 0.0019854522473998996, "loss": 0.3056, "step": 6446 }, { "epoch": 0.011432836958844093, "grad_norm": 13.25, "learning_rate": 0.0019854415872941644, "loss": 0.3753, "step": 6448 }, { "epoch": 0.011436383124153907, "grad_norm": 0.482421875, "learning_rate": 0.001985430923316022, "loss": 0.2377, "step": 6450 }, { "epoch": 0.011439929289463722, "grad_norm": 0.453125, "learning_rate": 0.001985420255465519, "loss": 0.352, "step": 6452 }, { "epoch": 0.011443475454773538, "grad_norm": 0.43359375, "learning_rate": 0.0019854095837427022, "loss": 0.3862, "step": 6454 }, { "epoch": 0.011447021620083353, "grad_norm": 0.6640625, "learning_rate": 0.001985398908147618, "loss": 0.3506, "step": 6456 }, { "epoch": 0.011450567785393167, "grad_norm": 0.486328125, "learning_rate": 0.0019853882286803137, "loss": 0.2485, "step": 6458 }, { "epoch": 0.011454113950702983, "grad_norm": 0.408203125, "learning_rate": 0.001985377545340835, "loss": 0.2664, "step": 6460 }, { "epoch": 0.011457660116012798, "grad_norm": 1.046875, "learning_rate": 0.0019853668581292297, "loss": 0.2627, "step": 6462 }, { "epoch": 0.011461206281322613, "grad_norm": 1.734375, "learning_rate": 0.001985356167045544, "loss": 0.5401, "step": 6464 }, { "epoch": 0.011464752446632429, "grad_norm": 0.396484375, "learning_rate": 0.0019853454720898246, "loss": 0.2897, "step": 6466 }, { "epoch": 0.011468298611942243, "grad_norm": 0.7890625, "learning_rate": 0.0019853347732621185, "loss": 0.2578, "step": 6468 }, { "epoch": 0.011471844777252058, "grad_norm": 0.3671875, "learning_rate": 0.0019853240705624718, "loss": 0.3041, "step": 6470 }, { "epoch": 0.011475390942561874, "grad_norm": 1.7109375, "learning_rate": 0.0019853133639909327, "loss": 0.2205, "step": 6472 }, { "epoch": 0.011478937107871689, "grad_norm": 10.125, "learning_rate": 0.001985302653547547, "loss": 0.2979, "step": 6474 }, { "epoch": 0.011482483273181505, "grad_norm": 0.392578125, "learning_rate": 0.0019852919392323613, "loss": 0.2948, "step": 6476 }, { "epoch": 0.01148602943849132, "grad_norm": 0.27734375, "learning_rate": 0.001985281221045423, "loss": 0.3383, "step": 6478 }, { "epoch": 0.011489575603801134, "grad_norm": 6.125, "learning_rate": 0.001985270498986779, "loss": 0.5451, "step": 6480 }, { "epoch": 0.01149312176911095, "grad_norm": 2.015625, "learning_rate": 0.001985259773056476, "loss": 0.3665, "step": 6482 }, { "epoch": 0.011496667934420765, "grad_norm": 0.40234375, "learning_rate": 0.0019852490432545615, "loss": 0.2287, "step": 6484 }, { "epoch": 0.01150021409973058, "grad_norm": 0.8828125, "learning_rate": 0.001985238309581081, "loss": 0.2771, "step": 6486 }, { "epoch": 0.011503760265040396, "grad_norm": 0.494140625, "learning_rate": 0.001985227572036083, "loss": 0.2507, "step": 6488 }, { "epoch": 0.01150730643035021, "grad_norm": 1.0546875, "learning_rate": 0.0019852168306196136, "loss": 0.2873, "step": 6490 }, { "epoch": 0.011510852595660025, "grad_norm": 1.4609375, "learning_rate": 0.0019852060853317198, "loss": 0.3437, "step": 6492 }, { "epoch": 0.011514398760969841, "grad_norm": 0.37890625, "learning_rate": 0.001985195336172449, "loss": 0.2354, "step": 6494 }, { "epoch": 0.011517944926279656, "grad_norm": 0.26171875, "learning_rate": 0.001985184583141848, "loss": 0.2171, "step": 6496 }, { "epoch": 0.01152149109158947, "grad_norm": 3.046875, "learning_rate": 0.001985173826239964, "loss": 0.3967, "step": 6498 }, { "epoch": 0.011525037256899287, "grad_norm": 3.34375, "learning_rate": 0.001985163065466843, "loss": 0.6066, "step": 6500 }, { "epoch": 0.011528583422209101, "grad_norm": 1.375, "learning_rate": 0.001985152300822534, "loss": 0.3862, "step": 6502 }, { "epoch": 0.011532129587518916, "grad_norm": 0.39453125, "learning_rate": 0.0019851415323070823, "loss": 0.2662, "step": 6504 }, { "epoch": 0.011535675752828732, "grad_norm": 0.625, "learning_rate": 0.001985130759920536, "loss": 0.2981, "step": 6506 }, { "epoch": 0.011539221918138547, "grad_norm": 1.3203125, "learning_rate": 0.0019851199836629415, "loss": 0.2541, "step": 6508 }, { "epoch": 0.011542768083448363, "grad_norm": 2.515625, "learning_rate": 0.0019851092035343466, "loss": 0.2319, "step": 6510 }, { "epoch": 0.011546314248758177, "grad_norm": 1.015625, "learning_rate": 0.0019850984195347986, "loss": 0.3721, "step": 6512 }, { "epoch": 0.011549860414067992, "grad_norm": 0.42578125, "learning_rate": 0.0019850876316643436, "loss": 0.3427, "step": 6514 }, { "epoch": 0.011553406579377808, "grad_norm": 2.96875, "learning_rate": 0.0019850768399230297, "loss": 0.3732, "step": 6516 }, { "epoch": 0.011556952744687623, "grad_norm": 0.44140625, "learning_rate": 0.0019850660443109036, "loss": 0.3362, "step": 6518 }, { "epoch": 0.011560498909997437, "grad_norm": 0.6171875, "learning_rate": 0.001985055244828013, "loss": 0.3409, "step": 6520 }, { "epoch": 0.011564045075307254, "grad_norm": 0.7265625, "learning_rate": 0.001985044441474405, "loss": 0.3454, "step": 6522 }, { "epoch": 0.011567591240617068, "grad_norm": 0.306640625, "learning_rate": 0.001985033634250126, "loss": 0.3016, "step": 6524 }, { "epoch": 0.011571137405926883, "grad_norm": 0.3359375, "learning_rate": 0.0019850228231552245, "loss": 0.4201, "step": 6526 }, { "epoch": 0.011574683571236699, "grad_norm": 0.76953125, "learning_rate": 0.001985012008189747, "loss": 0.2919, "step": 6528 }, { "epoch": 0.011578229736546514, "grad_norm": 0.50390625, "learning_rate": 0.0019850011893537416, "loss": 0.3108, "step": 6530 }, { "epoch": 0.011581775901856328, "grad_norm": 0.66015625, "learning_rate": 0.0019849903666472545, "loss": 0.3685, "step": 6532 }, { "epoch": 0.011585322067166145, "grad_norm": 0.43359375, "learning_rate": 0.001984979540070334, "loss": 0.3112, "step": 6534 }, { "epoch": 0.011588868232475959, "grad_norm": 3.234375, "learning_rate": 0.0019849687096230267, "loss": 0.4082, "step": 6536 }, { "epoch": 0.011592414397785774, "grad_norm": 1.3515625, "learning_rate": 0.0019849578753053806, "loss": 0.6489, "step": 6538 }, { "epoch": 0.01159596056309559, "grad_norm": 1.0703125, "learning_rate": 0.0019849470371174427, "loss": 0.288, "step": 6540 }, { "epoch": 0.011599506728405404, "grad_norm": 0.68359375, "learning_rate": 0.0019849361950592605, "loss": 0.2855, "step": 6542 }, { "epoch": 0.01160305289371522, "grad_norm": 1.171875, "learning_rate": 0.0019849253491308816, "loss": 0.2629, "step": 6544 }, { "epoch": 0.011606599059025035, "grad_norm": 0.5703125, "learning_rate": 0.0019849144993323528, "loss": 0.1863, "step": 6546 }, { "epoch": 0.01161014522433485, "grad_norm": 0.54296875, "learning_rate": 0.0019849036456637222, "loss": 0.2295, "step": 6548 }, { "epoch": 0.011613691389644666, "grad_norm": 1.6328125, "learning_rate": 0.001984892788125037, "loss": 0.3621, "step": 6550 }, { "epoch": 0.01161723755495448, "grad_norm": 0.59765625, "learning_rate": 0.001984881926716345, "loss": 0.2898, "step": 6552 }, { "epoch": 0.011620783720264295, "grad_norm": 0.5546875, "learning_rate": 0.001984871061437693, "loss": 0.2619, "step": 6554 }, { "epoch": 0.011624329885574112, "grad_norm": 0.365234375, "learning_rate": 0.0019848601922891292, "loss": 0.2608, "step": 6556 }, { "epoch": 0.011627876050883926, "grad_norm": 0.625, "learning_rate": 0.0019848493192707014, "loss": 0.4518, "step": 6558 }, { "epoch": 0.01163142221619374, "grad_norm": 2.171875, "learning_rate": 0.0019848384423824562, "loss": 0.3408, "step": 6560 }, { "epoch": 0.011634968381503557, "grad_norm": 0.462890625, "learning_rate": 0.0019848275616244416, "loss": 0.2635, "step": 6562 }, { "epoch": 0.011638514546813371, "grad_norm": 0.482421875, "learning_rate": 0.001984816676996705, "loss": 0.2266, "step": 6564 }, { "epoch": 0.011642060712123186, "grad_norm": 0.41796875, "learning_rate": 0.0019848057884992946, "loss": 0.2707, "step": 6566 }, { "epoch": 0.011645606877433002, "grad_norm": 0.47265625, "learning_rate": 0.0019847948961322576, "loss": 0.3033, "step": 6568 }, { "epoch": 0.011649153042742817, "grad_norm": 0.416015625, "learning_rate": 0.001984783999895642, "loss": 0.3255, "step": 6570 }, { "epoch": 0.011652699208052631, "grad_norm": 0.515625, "learning_rate": 0.0019847730997894944, "loss": 0.3003, "step": 6572 }, { "epoch": 0.011656245373362448, "grad_norm": 0.63671875, "learning_rate": 0.0019847621958138635, "loss": 0.277, "step": 6574 }, { "epoch": 0.011659791538672262, "grad_norm": 0.32421875, "learning_rate": 0.0019847512879687967, "loss": 0.2551, "step": 6576 }, { "epoch": 0.011663337703982079, "grad_norm": 0.71875, "learning_rate": 0.001984740376254342, "loss": 0.2087, "step": 6578 }, { "epoch": 0.011666883869291893, "grad_norm": 1.3203125, "learning_rate": 0.0019847294606705466, "loss": 0.3965, "step": 6580 }, { "epoch": 0.011670430034601708, "grad_norm": 1.0078125, "learning_rate": 0.0019847185412174583, "loss": 0.3613, "step": 6582 }, { "epoch": 0.011673976199911524, "grad_norm": 0.4375, "learning_rate": 0.001984707617895125, "loss": 0.3577, "step": 6584 }, { "epoch": 0.011677522365221338, "grad_norm": 0.53515625, "learning_rate": 0.0019846966907035943, "loss": 0.1842, "step": 6586 }, { "epoch": 0.011681068530531153, "grad_norm": 0.82421875, "learning_rate": 0.0019846857596429145, "loss": 0.3677, "step": 6588 }, { "epoch": 0.01168461469584097, "grad_norm": 0.310546875, "learning_rate": 0.0019846748247131334, "loss": 0.2998, "step": 6590 }, { "epoch": 0.011688160861150784, "grad_norm": 0.515625, "learning_rate": 0.001984663885914298, "loss": 0.2502, "step": 6592 }, { "epoch": 0.011691707026460598, "grad_norm": 1.3046875, "learning_rate": 0.001984652943246457, "loss": 0.3343, "step": 6594 }, { "epoch": 0.011695253191770415, "grad_norm": 1.4921875, "learning_rate": 0.001984641996709657, "loss": 0.3007, "step": 6596 }, { "epoch": 0.01169879935708023, "grad_norm": 2.171875, "learning_rate": 0.001984631046303948, "loss": 0.4247, "step": 6598 }, { "epoch": 0.011702345522390044, "grad_norm": 0.51953125, "learning_rate": 0.0019846200920293757, "loss": 0.207, "step": 6600 }, { "epoch": 0.01170589168769986, "grad_norm": 5.0625, "learning_rate": 0.0019846091338859896, "loss": 0.5443, "step": 6602 }, { "epoch": 0.011709437853009675, "grad_norm": 0.71484375, "learning_rate": 0.0019845981718738365, "loss": 0.2723, "step": 6604 }, { "epoch": 0.01171298401831949, "grad_norm": 2.15625, "learning_rate": 0.0019845872059929648, "loss": 0.2741, "step": 6606 }, { "epoch": 0.011716530183629306, "grad_norm": 1.875, "learning_rate": 0.001984576236243423, "loss": 0.2785, "step": 6608 }, { "epoch": 0.01172007634893912, "grad_norm": 0.41015625, "learning_rate": 0.0019845652626252585, "loss": 0.2408, "step": 6610 }, { "epoch": 0.011723622514248936, "grad_norm": 0.51171875, "learning_rate": 0.001984554285138519, "loss": 0.357, "step": 6612 }, { "epoch": 0.011727168679558751, "grad_norm": 0.515625, "learning_rate": 0.001984543303783253, "loss": 0.245, "step": 6614 }, { "epoch": 0.011730714844868565, "grad_norm": 0.29296875, "learning_rate": 0.0019845323185595084, "loss": 0.1853, "step": 6616 }, { "epoch": 0.011734261010178382, "grad_norm": 0.859375, "learning_rate": 0.001984521329467333, "loss": 0.2566, "step": 6618 }, { "epoch": 0.011737807175488196, "grad_norm": 0.76953125, "learning_rate": 0.0019845103365067757, "loss": 0.2525, "step": 6620 }, { "epoch": 0.01174135334079801, "grad_norm": 0.61328125, "learning_rate": 0.0019844993396778833, "loss": 0.2193, "step": 6622 }, { "epoch": 0.011744899506107827, "grad_norm": 0.474609375, "learning_rate": 0.001984488338980705, "loss": 0.2799, "step": 6624 }, { "epoch": 0.011748445671417642, "grad_norm": 0.5859375, "learning_rate": 0.0019844773344152885, "loss": 0.2092, "step": 6626 }, { "epoch": 0.011751991836727456, "grad_norm": 0.419921875, "learning_rate": 0.0019844663259816816, "loss": 0.2723, "step": 6628 }, { "epoch": 0.011755538002037273, "grad_norm": 0.6328125, "learning_rate": 0.001984455313679933, "loss": 0.2936, "step": 6630 }, { "epoch": 0.011759084167347087, "grad_norm": 0.890625, "learning_rate": 0.0019844442975100905, "loss": 0.2109, "step": 6632 }, { "epoch": 0.011762630332656902, "grad_norm": 1.28125, "learning_rate": 0.0019844332774722026, "loss": 0.3474, "step": 6634 }, { "epoch": 0.011766176497966718, "grad_norm": 0.4296875, "learning_rate": 0.001984422253566317, "loss": 0.3077, "step": 6636 }, { "epoch": 0.011769722663276532, "grad_norm": 0.40234375, "learning_rate": 0.001984411225792482, "loss": 0.1891, "step": 6638 }, { "epoch": 0.011773268828586347, "grad_norm": 1.6015625, "learning_rate": 0.001984400194150747, "loss": 0.2661, "step": 6640 }, { "epoch": 0.011776814993896163, "grad_norm": 0.349609375, "learning_rate": 0.0019843891586411584, "loss": 0.2582, "step": 6642 }, { "epoch": 0.011780361159205978, "grad_norm": 0.48828125, "learning_rate": 0.0019843781192637657, "loss": 0.2815, "step": 6644 }, { "epoch": 0.011783907324515792, "grad_norm": 0.341796875, "learning_rate": 0.0019843670760186167, "loss": 0.2513, "step": 6646 }, { "epoch": 0.011787453489825609, "grad_norm": 1.671875, "learning_rate": 0.0019843560289057594, "loss": 0.3517, "step": 6648 }, { "epoch": 0.011790999655135423, "grad_norm": 1.0625, "learning_rate": 0.001984344977925243, "loss": 0.217, "step": 6650 }, { "epoch": 0.01179454582044524, "grad_norm": 0.255859375, "learning_rate": 0.001984333923077115, "loss": 0.2576, "step": 6652 }, { "epoch": 0.011798091985755054, "grad_norm": 0.625, "learning_rate": 0.001984322864361424, "loss": 0.2353, "step": 6654 }, { "epoch": 0.011801638151064869, "grad_norm": 0.408203125, "learning_rate": 0.0019843118017782192, "loss": 0.2349, "step": 6656 }, { "epoch": 0.011805184316374685, "grad_norm": 0.25390625, "learning_rate": 0.001984300735327548, "loss": 0.2386, "step": 6658 }, { "epoch": 0.0118087304816845, "grad_norm": 0.4140625, "learning_rate": 0.0019842896650094587, "loss": 0.2554, "step": 6660 }, { "epoch": 0.011812276646994314, "grad_norm": 0.255859375, "learning_rate": 0.0019842785908240003, "loss": 0.239, "step": 6662 }, { "epoch": 0.01181582281230413, "grad_norm": 0.28125, "learning_rate": 0.001984267512771221, "loss": 0.2206, "step": 6664 }, { "epoch": 0.011819368977613945, "grad_norm": 1.90625, "learning_rate": 0.001984256430851169, "loss": 0.3458, "step": 6666 }, { "epoch": 0.01182291514292376, "grad_norm": 0.259765625, "learning_rate": 0.0019842453450638936, "loss": 0.2103, "step": 6668 }, { "epoch": 0.011826461308233576, "grad_norm": 0.8984375, "learning_rate": 0.0019842342554094424, "loss": 0.3074, "step": 6670 }, { "epoch": 0.01183000747354339, "grad_norm": 0.7421875, "learning_rate": 0.001984223161887864, "loss": 0.2499, "step": 6672 }, { "epoch": 0.011833553638853205, "grad_norm": 0.86328125, "learning_rate": 0.001984212064499207, "loss": 0.271, "step": 6674 }, { "epoch": 0.011837099804163021, "grad_norm": 0.435546875, "learning_rate": 0.0019842009632435203, "loss": 0.4066, "step": 6676 }, { "epoch": 0.011840645969472836, "grad_norm": 0.443359375, "learning_rate": 0.0019841898581208525, "loss": 0.2566, "step": 6678 }, { "epoch": 0.01184419213478265, "grad_norm": 0.85546875, "learning_rate": 0.0019841787491312515, "loss": 0.265, "step": 6680 }, { "epoch": 0.011847738300092467, "grad_norm": 1.0390625, "learning_rate": 0.0019841676362747657, "loss": 0.2992, "step": 6682 }, { "epoch": 0.011851284465402281, "grad_norm": 0.54296875, "learning_rate": 0.001984156519551445, "loss": 0.3472, "step": 6684 }, { "epoch": 0.011854830630712097, "grad_norm": 0.396484375, "learning_rate": 0.001984145398961337, "loss": 0.2074, "step": 6686 }, { "epoch": 0.011858376796021912, "grad_norm": 0.265625, "learning_rate": 0.001984134274504491, "loss": 0.2638, "step": 6688 }, { "epoch": 0.011861922961331726, "grad_norm": 0.28125, "learning_rate": 0.001984123146180955, "loss": 0.2428, "step": 6690 }, { "epoch": 0.011865469126641543, "grad_norm": 0.38671875, "learning_rate": 0.0019841120139907774, "loss": 0.2761, "step": 6692 }, { "epoch": 0.011869015291951357, "grad_norm": 0.49609375, "learning_rate": 0.001984100877934008, "loss": 0.3499, "step": 6694 }, { "epoch": 0.011872561457261172, "grad_norm": 0.50390625, "learning_rate": 0.0019840897380106947, "loss": 0.2732, "step": 6696 }, { "epoch": 0.011876107622570988, "grad_norm": 0.8203125, "learning_rate": 0.0019840785942208867, "loss": 0.2414, "step": 6698 }, { "epoch": 0.011879653787880803, "grad_norm": 0.7734375, "learning_rate": 0.001984067446564632, "loss": 0.2762, "step": 6700 }, { "epoch": 0.011883199953190617, "grad_norm": 0.42578125, "learning_rate": 0.00198405629504198, "loss": 0.2606, "step": 6702 }, { "epoch": 0.011886746118500434, "grad_norm": 0.6875, "learning_rate": 0.0019840451396529795, "loss": 0.2388, "step": 6704 }, { "epoch": 0.011890292283810248, "grad_norm": 0.51953125, "learning_rate": 0.001984033980397679, "loss": 0.2446, "step": 6706 }, { "epoch": 0.011893838449120063, "grad_norm": 0.36328125, "learning_rate": 0.0019840228172761268, "loss": 0.3288, "step": 6708 }, { "epoch": 0.011897384614429879, "grad_norm": 0.330078125, "learning_rate": 0.001984011650288373, "loss": 0.1916, "step": 6710 }, { "epoch": 0.011900930779739693, "grad_norm": 0.4140625, "learning_rate": 0.0019840004794344653, "loss": 0.2786, "step": 6712 }, { "epoch": 0.011904476945049508, "grad_norm": 0.55078125, "learning_rate": 0.001983989304714453, "loss": 0.2285, "step": 6714 }, { "epoch": 0.011908023110359324, "grad_norm": 1.3125, "learning_rate": 0.0019839781261283855, "loss": 0.2872, "step": 6716 }, { "epoch": 0.011911569275669139, "grad_norm": 0.6328125, "learning_rate": 0.001983966943676311, "loss": 0.2707, "step": 6718 }, { "epoch": 0.011915115440978955, "grad_norm": 0.57421875, "learning_rate": 0.001983955757358278, "loss": 0.1967, "step": 6720 }, { "epoch": 0.01191866160628877, "grad_norm": 0.45703125, "learning_rate": 0.0019839445671743366, "loss": 0.3742, "step": 6722 }, { "epoch": 0.011922207771598584, "grad_norm": 0.7734375, "learning_rate": 0.001983933373124535, "loss": 0.2414, "step": 6724 }, { "epoch": 0.0119257539369084, "grad_norm": 0.4296875, "learning_rate": 0.001983922175208922, "loss": 0.2942, "step": 6726 }, { "epoch": 0.011929300102218215, "grad_norm": 0.734375, "learning_rate": 0.001983910973427547, "loss": 0.3007, "step": 6728 }, { "epoch": 0.01193284626752803, "grad_norm": 0.462890625, "learning_rate": 0.001983899767780459, "loss": 0.2479, "step": 6730 }, { "epoch": 0.011936392432837846, "grad_norm": 0.6484375, "learning_rate": 0.0019838885582677064, "loss": 0.2173, "step": 6732 }, { "epoch": 0.01193993859814766, "grad_norm": 0.671875, "learning_rate": 0.001983877344889339, "loss": 0.278, "step": 6734 }, { "epoch": 0.011943484763457475, "grad_norm": 0.330078125, "learning_rate": 0.0019838661276454055, "loss": 0.2704, "step": 6736 }, { "epoch": 0.011947030928767291, "grad_norm": 0.72265625, "learning_rate": 0.001983854906535955, "loss": 0.2654, "step": 6738 }, { "epoch": 0.011950577094077106, "grad_norm": 0.7109375, "learning_rate": 0.0019838436815610362, "loss": 0.2121, "step": 6740 }, { "epoch": 0.01195412325938692, "grad_norm": 2.09375, "learning_rate": 0.001983832452720699, "loss": 0.2342, "step": 6742 }, { "epoch": 0.011957669424696737, "grad_norm": 1.515625, "learning_rate": 0.0019838212200149917, "loss": 0.2736, "step": 6744 }, { "epoch": 0.011961215590006551, "grad_norm": 0.470703125, "learning_rate": 0.0019838099834439643, "loss": 0.2863, "step": 6746 }, { "epoch": 0.011964761755316366, "grad_norm": 0.4140625, "learning_rate": 0.0019837987430076647, "loss": 0.2547, "step": 6748 }, { "epoch": 0.011968307920626182, "grad_norm": 4.0, "learning_rate": 0.001983787498706143, "loss": 0.3155, "step": 6750 }, { "epoch": 0.011971854085935997, "grad_norm": 0.80859375, "learning_rate": 0.0019837762505394486, "loss": 0.2375, "step": 6752 }, { "epoch": 0.011975400251245813, "grad_norm": 1.0390625, "learning_rate": 0.0019837649985076297, "loss": 0.3251, "step": 6754 }, { "epoch": 0.011978946416555628, "grad_norm": 0.447265625, "learning_rate": 0.0019837537426107364, "loss": 0.273, "step": 6756 }, { "epoch": 0.011982492581865442, "grad_norm": 0.28125, "learning_rate": 0.001983742482848817, "loss": 0.4972, "step": 6758 }, { "epoch": 0.011986038747175258, "grad_norm": 0.314453125, "learning_rate": 0.0019837312192219217, "loss": 0.3103, "step": 6760 }, { "epoch": 0.011989584912485073, "grad_norm": 1.1171875, "learning_rate": 0.0019837199517301, "loss": 0.2812, "step": 6762 }, { "epoch": 0.011993131077794887, "grad_norm": 0.796875, "learning_rate": 0.0019837086803734, "loss": 0.2744, "step": 6764 }, { "epoch": 0.011996677243104704, "grad_norm": 0.494140625, "learning_rate": 0.0019836974051518712, "loss": 0.2333, "step": 6766 }, { "epoch": 0.012000223408414518, "grad_norm": 0.3984375, "learning_rate": 0.0019836861260655635, "loss": 0.3304, "step": 6768 }, { "epoch": 0.012003769573724333, "grad_norm": 0.2470703125, "learning_rate": 0.001983674843114526, "loss": 0.242, "step": 6770 }, { "epoch": 0.01200731573903415, "grad_norm": 0.359375, "learning_rate": 0.0019836635562988083, "loss": 0.1962, "step": 6772 }, { "epoch": 0.012010861904343964, "grad_norm": 0.5234375, "learning_rate": 0.001983652265618459, "loss": 0.2323, "step": 6774 }, { "epoch": 0.012014408069653778, "grad_norm": 0.31640625, "learning_rate": 0.0019836409710735285, "loss": 0.2398, "step": 6776 }, { "epoch": 0.012017954234963595, "grad_norm": 0.3828125, "learning_rate": 0.0019836296726640653, "loss": 0.2744, "step": 6778 }, { "epoch": 0.012021500400273409, "grad_norm": 0.83984375, "learning_rate": 0.001983618370390119, "loss": 0.2671, "step": 6780 }, { "epoch": 0.012025046565583224, "grad_norm": 1.3359375, "learning_rate": 0.00198360706425174, "loss": 0.5433, "step": 6782 }, { "epoch": 0.01202859273089304, "grad_norm": 3.03125, "learning_rate": 0.0019835957542489765, "loss": 0.4246, "step": 6784 }, { "epoch": 0.012032138896202854, "grad_norm": 0.5234375, "learning_rate": 0.001983584440381878, "loss": 0.3178, "step": 6786 }, { "epoch": 0.01203568506151267, "grad_norm": 0.3828125, "learning_rate": 0.0019835731226504954, "loss": 0.2733, "step": 6788 }, { "epoch": 0.012039231226822485, "grad_norm": 0.8515625, "learning_rate": 0.0019835618010548765, "loss": 0.3158, "step": 6790 }, { "epoch": 0.0120427773921323, "grad_norm": 0.609375, "learning_rate": 0.0019835504755950717, "loss": 0.2384, "step": 6792 }, { "epoch": 0.012046323557442116, "grad_norm": 0.5859375, "learning_rate": 0.00198353914627113, "loss": 0.2731, "step": 6794 }, { "epoch": 0.01204986972275193, "grad_norm": 0.2490234375, "learning_rate": 0.0019835278130831014, "loss": 0.1919, "step": 6796 }, { "epoch": 0.012053415888061745, "grad_norm": 0.251953125, "learning_rate": 0.0019835164760310356, "loss": 0.2322, "step": 6798 }, { "epoch": 0.012056962053371562, "grad_norm": 0.4140625, "learning_rate": 0.001983505135114982, "loss": 0.2973, "step": 6800 }, { "epoch": 0.012060508218681376, "grad_norm": 0.345703125, "learning_rate": 0.00198349379033499, "loss": 0.2675, "step": 6802 }, { "epoch": 0.01206405438399119, "grad_norm": 0.765625, "learning_rate": 0.00198348244169111, "loss": 0.2733, "step": 6804 }, { "epoch": 0.012067600549301007, "grad_norm": 0.33984375, "learning_rate": 0.00198347108918339, "loss": 0.2579, "step": 6806 }, { "epoch": 0.012071146714610822, "grad_norm": 0.32421875, "learning_rate": 0.001983459732811881, "loss": 0.316, "step": 6808 }, { "epoch": 0.012074692879920636, "grad_norm": 0.3046875, "learning_rate": 0.0019834483725766324, "loss": 0.2426, "step": 6810 }, { "epoch": 0.012078239045230452, "grad_norm": 0.609375, "learning_rate": 0.0019834370084776936, "loss": 0.3107, "step": 6812 }, { "epoch": 0.012081785210540267, "grad_norm": 0.462890625, "learning_rate": 0.001983425640515115, "loss": 0.1981, "step": 6814 }, { "epoch": 0.012085331375850081, "grad_norm": 0.6328125, "learning_rate": 0.001983414268688945, "loss": 0.27, "step": 6816 }, { "epoch": 0.012088877541159898, "grad_norm": 0.71875, "learning_rate": 0.0019834028929992344, "loss": 0.2811, "step": 6818 }, { "epoch": 0.012092423706469712, "grad_norm": 0.89453125, "learning_rate": 0.001983391513446033, "loss": 0.2405, "step": 6820 }, { "epoch": 0.012095969871779529, "grad_norm": 0.44921875, "learning_rate": 0.00198338013002939, "loss": 0.2651, "step": 6822 }, { "epoch": 0.012099516037089343, "grad_norm": 0.52734375, "learning_rate": 0.0019833687427493556, "loss": 0.3148, "step": 6824 }, { "epoch": 0.012103062202399158, "grad_norm": 0.87109375, "learning_rate": 0.0019833573516059794, "loss": 0.264, "step": 6826 }, { "epoch": 0.012106608367708974, "grad_norm": 0.3359375, "learning_rate": 0.001983345956599311, "loss": 0.2805, "step": 6828 }, { "epoch": 0.012110154533018789, "grad_norm": 1.21875, "learning_rate": 0.0019833345577294006, "loss": 0.3786, "step": 6830 }, { "epoch": 0.012113700698328603, "grad_norm": 0.2158203125, "learning_rate": 0.001983323154996298, "loss": 0.2356, "step": 6832 }, { "epoch": 0.01211724686363842, "grad_norm": 0.357421875, "learning_rate": 0.0019833117484000535, "loss": 0.2613, "step": 6834 }, { "epoch": 0.012120793028948234, "grad_norm": 0.4140625, "learning_rate": 0.001983300337940716, "loss": 0.2556, "step": 6836 }, { "epoch": 0.012124339194258048, "grad_norm": 6.59375, "learning_rate": 0.0019832889236183356, "loss": 0.3318, "step": 6838 }, { "epoch": 0.012127885359567865, "grad_norm": 0.9765625, "learning_rate": 0.001983277505432963, "loss": 0.1884, "step": 6840 }, { "epoch": 0.01213143152487768, "grad_norm": 0.1826171875, "learning_rate": 0.0019832660833846473, "loss": 0.2034, "step": 6842 }, { "epoch": 0.012134977690187494, "grad_norm": 0.58203125, "learning_rate": 0.0019832546574734397, "loss": 0.2472, "step": 6844 }, { "epoch": 0.01213852385549731, "grad_norm": 0.2421875, "learning_rate": 0.0019832432276993884, "loss": 0.2302, "step": 6846 }, { "epoch": 0.012142070020807125, "grad_norm": 0.392578125, "learning_rate": 0.0019832317940625447, "loss": 0.2369, "step": 6848 }, { "epoch": 0.01214561618611694, "grad_norm": 0.484375, "learning_rate": 0.001983220356562958, "loss": 0.3195, "step": 6850 }, { "epoch": 0.012149162351426756, "grad_norm": 0.380859375, "learning_rate": 0.0019832089152006785, "loss": 0.2562, "step": 6852 }, { "epoch": 0.01215270851673657, "grad_norm": 0.58984375, "learning_rate": 0.001983197469975756, "loss": 0.2764, "step": 6854 }, { "epoch": 0.012156254682046386, "grad_norm": 0.337890625, "learning_rate": 0.001983186020888241, "loss": 0.2632, "step": 6856 }, { "epoch": 0.012159800847356201, "grad_norm": 0.46875, "learning_rate": 0.0019831745679381833, "loss": 0.2197, "step": 6858 }, { "epoch": 0.012163347012666016, "grad_norm": 1.421875, "learning_rate": 0.001983163111125633, "loss": 0.2786, "step": 6860 }, { "epoch": 0.012166893177975832, "grad_norm": 0.68359375, "learning_rate": 0.0019831516504506407, "loss": 0.2758, "step": 6862 }, { "epoch": 0.012170439343285646, "grad_norm": 0.56640625, "learning_rate": 0.0019831401859132553, "loss": 0.2994, "step": 6864 }, { "epoch": 0.012173985508595461, "grad_norm": 0.353515625, "learning_rate": 0.0019831287175135284, "loss": 0.2428, "step": 6866 }, { "epoch": 0.012177531673905277, "grad_norm": 0.55859375, "learning_rate": 0.001983117245251509, "loss": 0.2468, "step": 6868 }, { "epoch": 0.012181077839215092, "grad_norm": 0.439453125, "learning_rate": 0.001983105769127248, "loss": 0.2521, "step": 6870 }, { "epoch": 0.012184624004524906, "grad_norm": 0.251953125, "learning_rate": 0.001983094289140796, "loss": 0.2257, "step": 6872 }, { "epoch": 0.012188170169834723, "grad_norm": 0.93359375, "learning_rate": 0.0019830828052922016, "loss": 0.224, "step": 6874 }, { "epoch": 0.012191716335144537, "grad_norm": 0.70703125, "learning_rate": 0.001983071317581516, "loss": 0.2698, "step": 6876 }, { "epoch": 0.012195262500454352, "grad_norm": 0.38671875, "learning_rate": 0.0019830598260087897, "loss": 0.3066, "step": 6878 }, { "epoch": 0.012198808665764168, "grad_norm": 0.3828125, "learning_rate": 0.0019830483305740727, "loss": 0.2594, "step": 6880 }, { "epoch": 0.012202354831073983, "grad_norm": 0.66796875, "learning_rate": 0.001983036831277415, "loss": 0.2496, "step": 6882 }, { "epoch": 0.012205900996383797, "grad_norm": 0.2236328125, "learning_rate": 0.0019830253281188674, "loss": 0.1695, "step": 6884 }, { "epoch": 0.012209447161693613, "grad_norm": 0.25, "learning_rate": 0.0019830138210984796, "loss": 0.2206, "step": 6886 }, { "epoch": 0.012212993327003428, "grad_norm": 0.71875, "learning_rate": 0.0019830023102163025, "loss": 0.277, "step": 6888 }, { "epoch": 0.012216539492313244, "grad_norm": 2.328125, "learning_rate": 0.0019829907954723863, "loss": 0.3232, "step": 6890 }, { "epoch": 0.012220085657623059, "grad_norm": 0.5703125, "learning_rate": 0.001982979276866781, "loss": 0.1966, "step": 6892 }, { "epoch": 0.012223631822932873, "grad_norm": 0.30859375, "learning_rate": 0.0019829677543995376, "loss": 0.2742, "step": 6894 }, { "epoch": 0.01222717798824269, "grad_norm": 0.369140625, "learning_rate": 0.0019829562280707057, "loss": 0.3344, "step": 6896 }, { "epoch": 0.012230724153552504, "grad_norm": 0.3359375, "learning_rate": 0.0019829446978803364, "loss": 0.2517, "step": 6898 }, { "epoch": 0.012234270318862319, "grad_norm": 11.25, "learning_rate": 0.00198293316382848, "loss": 0.3401, "step": 6900 }, { "epoch": 0.012237816484172135, "grad_norm": 0.318359375, "learning_rate": 0.0019829216259151863, "loss": 0.1887, "step": 6902 }, { "epoch": 0.01224136264948195, "grad_norm": 0.37109375, "learning_rate": 0.0019829100841405067, "loss": 0.1818, "step": 6904 }, { "epoch": 0.012244908814791764, "grad_norm": 4.0, "learning_rate": 0.0019828985385044913, "loss": 0.3568, "step": 6906 }, { "epoch": 0.01224845498010158, "grad_norm": 0.21875, "learning_rate": 0.00198288698900719, "loss": 0.3633, "step": 6908 }, { "epoch": 0.012252001145411395, "grad_norm": 1.4140625, "learning_rate": 0.0019828754356486546, "loss": 0.4123, "step": 6910 }, { "epoch": 0.01225554731072121, "grad_norm": 0.2041015625, "learning_rate": 0.0019828638784289347, "loss": 0.2238, "step": 6912 }, { "epoch": 0.012259093476031026, "grad_norm": 1.0546875, "learning_rate": 0.0019828523173480808, "loss": 0.2899, "step": 6914 }, { "epoch": 0.01226263964134084, "grad_norm": 0.427734375, "learning_rate": 0.0019828407524061435, "loss": 0.2454, "step": 6916 }, { "epoch": 0.012266185806650655, "grad_norm": 0.49609375, "learning_rate": 0.0019828291836031737, "loss": 0.2928, "step": 6918 }, { "epoch": 0.012269731971960471, "grad_norm": 0.25390625, "learning_rate": 0.001982817610939222, "loss": 0.2622, "step": 6920 }, { "epoch": 0.012273278137270286, "grad_norm": 0.67578125, "learning_rate": 0.0019828060344143387, "loss": 0.3946, "step": 6922 }, { "epoch": 0.012276824302580102, "grad_norm": 0.56640625, "learning_rate": 0.0019827944540285747, "loss": 0.215, "step": 6924 }, { "epoch": 0.012280370467889917, "grad_norm": 0.87109375, "learning_rate": 0.0019827828697819806, "loss": 0.4017, "step": 6926 }, { "epoch": 0.012283916633199731, "grad_norm": 0.765625, "learning_rate": 0.0019827712816746067, "loss": 0.3001, "step": 6928 }, { "epoch": 0.012287462798509547, "grad_norm": 0.314453125, "learning_rate": 0.0019827596897065048, "loss": 0.2846, "step": 6930 }, { "epoch": 0.012291008963819362, "grad_norm": 0.83203125, "learning_rate": 0.0019827480938777236, "loss": 0.3703, "step": 6932 }, { "epoch": 0.012294555129129177, "grad_norm": 0.64453125, "learning_rate": 0.001982736494188316, "loss": 0.2309, "step": 6934 }, { "epoch": 0.012298101294438993, "grad_norm": 1.7109375, "learning_rate": 0.0019827248906383317, "loss": 0.3196, "step": 6936 }, { "epoch": 0.012301647459748807, "grad_norm": 0.2177734375, "learning_rate": 0.0019827132832278206, "loss": 0.291, "step": 6938 }, { "epoch": 0.012305193625058622, "grad_norm": 0.322265625, "learning_rate": 0.001982701671956835, "loss": 0.2409, "step": 6940 }, { "epoch": 0.012308739790368438, "grad_norm": 0.55078125, "learning_rate": 0.001982690056825425, "loss": 0.2597, "step": 6942 }, { "epoch": 0.012312285955678253, "grad_norm": 0.27734375, "learning_rate": 0.001982678437833641, "loss": 0.2432, "step": 6944 }, { "epoch": 0.012315832120988067, "grad_norm": 0.361328125, "learning_rate": 0.0019826668149815346, "loss": 0.2968, "step": 6946 }, { "epoch": 0.012319378286297884, "grad_norm": 0.380859375, "learning_rate": 0.001982655188269156, "loss": 0.2664, "step": 6948 }, { "epoch": 0.012322924451607698, "grad_norm": 0.27734375, "learning_rate": 0.001982643557696557, "loss": 0.2904, "step": 6950 }, { "epoch": 0.012326470616917513, "grad_norm": 0.2255859375, "learning_rate": 0.0019826319232637874, "loss": 0.2259, "step": 6952 }, { "epoch": 0.012330016782227329, "grad_norm": 0.365234375, "learning_rate": 0.001982620284970898, "loss": 0.3693, "step": 6954 }, { "epoch": 0.012333562947537144, "grad_norm": 0.6015625, "learning_rate": 0.0019826086428179407, "loss": 0.1981, "step": 6956 }, { "epoch": 0.01233710911284696, "grad_norm": 1.9375, "learning_rate": 0.001982596996804966, "loss": 0.3156, "step": 6958 }, { "epoch": 0.012340655278156774, "grad_norm": 0.294921875, "learning_rate": 0.001982585346932024, "loss": 0.2237, "step": 6960 }, { "epoch": 0.012344201443466589, "grad_norm": 0.4609375, "learning_rate": 0.001982573693199167, "loss": 0.2888, "step": 6962 }, { "epoch": 0.012347747608776405, "grad_norm": 0.3515625, "learning_rate": 0.001982562035606445, "loss": 0.2527, "step": 6964 }, { "epoch": 0.01235129377408622, "grad_norm": 0.287109375, "learning_rate": 0.0019825503741539097, "loss": 0.2258, "step": 6966 }, { "epoch": 0.012354839939396034, "grad_norm": 0.29296875, "learning_rate": 0.0019825387088416115, "loss": 0.2394, "step": 6968 }, { "epoch": 0.01235838610470585, "grad_norm": 0.3828125, "learning_rate": 0.0019825270396696014, "loss": 0.2348, "step": 6970 }, { "epoch": 0.012361932270015665, "grad_norm": 0.48828125, "learning_rate": 0.0019825153666379303, "loss": 0.2296, "step": 6972 }, { "epoch": 0.01236547843532548, "grad_norm": 0.32421875, "learning_rate": 0.0019825036897466505, "loss": 0.1985, "step": 6974 }, { "epoch": 0.012369024600635296, "grad_norm": 0.3203125, "learning_rate": 0.0019824920089958117, "loss": 0.269, "step": 6976 }, { "epoch": 0.01237257076594511, "grad_norm": 0.224609375, "learning_rate": 0.0019824803243854655, "loss": 0.2197, "step": 6978 }, { "epoch": 0.012376116931254925, "grad_norm": 2.546875, "learning_rate": 0.0019824686359156633, "loss": 0.4333, "step": 6980 }, { "epoch": 0.012379663096564741, "grad_norm": 1.1484375, "learning_rate": 0.0019824569435864556, "loss": 0.2689, "step": 6982 }, { "epoch": 0.012383209261874556, "grad_norm": 0.1953125, "learning_rate": 0.001982445247397894, "loss": 0.2146, "step": 6984 }, { "epoch": 0.01238675542718437, "grad_norm": 0.97265625, "learning_rate": 0.001982433547350029, "loss": 0.1743, "step": 6986 }, { "epoch": 0.012390301592494187, "grad_norm": 0.49609375, "learning_rate": 0.0019824218434429126, "loss": 0.2409, "step": 6988 }, { "epoch": 0.012393847757804001, "grad_norm": 0.1650390625, "learning_rate": 0.0019824101356765954, "loss": 0.2394, "step": 6990 }, { "epoch": 0.012397393923113818, "grad_norm": 0.3828125, "learning_rate": 0.001982398424051129, "loss": 0.2411, "step": 6992 }, { "epoch": 0.012400940088423632, "grad_norm": 0.369140625, "learning_rate": 0.001982386708566565, "loss": 0.2816, "step": 6994 }, { "epoch": 0.012404486253733447, "grad_norm": 0.2421875, "learning_rate": 0.0019823749892229534, "loss": 0.2394, "step": 6996 }, { "epoch": 0.012408032419043263, "grad_norm": 0.4375, "learning_rate": 0.001982363266020346, "loss": 0.2617, "step": 6998 }, { "epoch": 0.012411578584353078, "grad_norm": 0.6171875, "learning_rate": 0.0019823515389587945, "loss": 0.2929, "step": 7000 }, { "epoch": 0.012415124749662892, "grad_norm": 0.65625, "learning_rate": 0.0019823398080383503, "loss": 0.4538, "step": 7002 }, { "epoch": 0.012418670914972708, "grad_norm": 8.125, "learning_rate": 0.001982328073259064, "loss": 0.2788, "step": 7004 }, { "epoch": 0.012422217080282523, "grad_norm": 1.9296875, "learning_rate": 0.0019823163346209868, "loss": 0.3532, "step": 7006 }, { "epoch": 0.012425763245592338, "grad_norm": 0.6875, "learning_rate": 0.0019823045921241707, "loss": 0.2222, "step": 7008 }, { "epoch": 0.012429309410902154, "grad_norm": 0.2138671875, "learning_rate": 0.001982292845768667, "loss": 0.2737, "step": 7010 }, { "epoch": 0.012432855576211968, "grad_norm": 0.2734375, "learning_rate": 0.0019822810955545268, "loss": 0.3135, "step": 7012 }, { "epoch": 0.012436401741521783, "grad_norm": 0.365234375, "learning_rate": 0.0019822693414818016, "loss": 0.2874, "step": 7014 }, { "epoch": 0.0124399479068316, "grad_norm": 0.34375, "learning_rate": 0.001982257583550543, "loss": 0.2411, "step": 7016 }, { "epoch": 0.012443494072141414, "grad_norm": 0.26953125, "learning_rate": 0.001982245821760802, "loss": 0.2644, "step": 7018 }, { "epoch": 0.012447040237451228, "grad_norm": 0.72265625, "learning_rate": 0.00198223405611263, "loss": 0.2268, "step": 7020 }, { "epoch": 0.012450586402761045, "grad_norm": 0.6640625, "learning_rate": 0.0019822222866060788, "loss": 0.254, "step": 7022 }, { "epoch": 0.01245413256807086, "grad_norm": 0.251953125, "learning_rate": 0.0019822105132412, "loss": 0.287, "step": 7024 }, { "epoch": 0.012457678733380675, "grad_norm": 1.8515625, "learning_rate": 0.001982198736018045, "loss": 0.2902, "step": 7026 }, { "epoch": 0.01246122489869049, "grad_norm": 0.71484375, "learning_rate": 0.001982186954936665, "loss": 0.5944, "step": 7028 }, { "epoch": 0.012464771064000305, "grad_norm": 2.28125, "learning_rate": 0.001982175169997111, "loss": 0.4291, "step": 7030 }, { "epoch": 0.01246831722931012, "grad_norm": 2.328125, "learning_rate": 0.001982163381199436, "loss": 0.3188, "step": 7032 }, { "epoch": 0.012471863394619935, "grad_norm": 0.31640625, "learning_rate": 0.001982151588543691, "loss": 0.2657, "step": 7034 }, { "epoch": 0.01247540955992975, "grad_norm": 0.52734375, "learning_rate": 0.001982139792029927, "loss": 0.262, "step": 7036 }, { "epoch": 0.012478955725239566, "grad_norm": 0.47265625, "learning_rate": 0.001982127991658196, "loss": 0.2731, "step": 7038 }, { "epoch": 0.01248250189054938, "grad_norm": 0.23046875, "learning_rate": 0.0019821161874285496, "loss": 0.208, "step": 7040 }, { "epoch": 0.012486048055859195, "grad_norm": 0.56640625, "learning_rate": 0.001982104379341039, "loss": 0.3549, "step": 7042 }, { "epoch": 0.012489594221169012, "grad_norm": 0.4921875, "learning_rate": 0.0019820925673957168, "loss": 0.2285, "step": 7044 }, { "epoch": 0.012493140386478826, "grad_norm": 0.306640625, "learning_rate": 0.0019820807515926334, "loss": 0.2439, "step": 7046 }, { "epoch": 0.01249668655178864, "grad_norm": 0.419921875, "learning_rate": 0.0019820689319318416, "loss": 0.2338, "step": 7048 }, { "epoch": 0.012500232717098457, "grad_norm": 0.44140625, "learning_rate": 0.001982057108413393, "loss": 0.4401, "step": 7050 }, { "epoch": 0.012503778882408272, "grad_norm": 0.484375, "learning_rate": 0.001982045281037339, "loss": 0.2782, "step": 7052 }, { "epoch": 0.012507325047718086, "grad_norm": 0.5390625, "learning_rate": 0.0019820334498037305, "loss": 0.2343, "step": 7054 }, { "epoch": 0.012510871213027902, "grad_norm": 0.408203125, "learning_rate": 0.0019820216147126207, "loss": 0.2572, "step": 7056 }, { "epoch": 0.012514417378337717, "grad_norm": 0.302734375, "learning_rate": 0.0019820097757640605, "loss": 0.2594, "step": 7058 }, { "epoch": 0.012517963543647533, "grad_norm": 0.5859375, "learning_rate": 0.0019819979329581015, "loss": 0.2041, "step": 7060 }, { "epoch": 0.012521509708957348, "grad_norm": 0.369140625, "learning_rate": 0.0019819860862947966, "loss": 0.2931, "step": 7062 }, { "epoch": 0.012525055874267162, "grad_norm": 0.41015625, "learning_rate": 0.0019819742357741962, "loss": 0.2811, "step": 7064 }, { "epoch": 0.012528602039576979, "grad_norm": 1.0546875, "learning_rate": 0.001981962381396353, "loss": 0.3539, "step": 7066 }, { "epoch": 0.012532148204886793, "grad_norm": 0.2734375, "learning_rate": 0.0019819505231613186, "loss": 0.2371, "step": 7068 }, { "epoch": 0.012535694370196608, "grad_norm": 0.267578125, "learning_rate": 0.001981938661069145, "loss": 0.2808, "step": 7070 }, { "epoch": 0.012539240535506424, "grad_norm": 0.5390625, "learning_rate": 0.001981926795119884, "loss": 0.4738, "step": 7072 }, { "epoch": 0.012542786700816239, "grad_norm": 0.2734375, "learning_rate": 0.001981914925313588, "loss": 0.3067, "step": 7074 }, { "epoch": 0.012546332866126053, "grad_norm": 0.734375, "learning_rate": 0.001981903051650308, "loss": 0.4865, "step": 7076 }, { "epoch": 0.01254987903143587, "grad_norm": 0.32421875, "learning_rate": 0.001981891174130096, "loss": 0.2807, "step": 7078 }, { "epoch": 0.012553425196745684, "grad_norm": 0.51171875, "learning_rate": 0.0019818792927530043, "loss": 0.2514, "step": 7080 }, { "epoch": 0.012556971362055499, "grad_norm": 0.390625, "learning_rate": 0.0019818674075190853, "loss": 0.2599, "step": 7082 }, { "epoch": 0.012560517527365315, "grad_norm": 0.490234375, "learning_rate": 0.0019818555184283903, "loss": 0.2465, "step": 7084 }, { "epoch": 0.01256406369267513, "grad_norm": 0.99609375, "learning_rate": 0.0019818436254809713, "loss": 0.293, "step": 7086 }, { "epoch": 0.012567609857984944, "grad_norm": 0.3828125, "learning_rate": 0.0019818317286768804, "loss": 0.3002, "step": 7088 }, { "epoch": 0.01257115602329476, "grad_norm": 0.28515625, "learning_rate": 0.0019818198280161705, "loss": 0.3467, "step": 7090 }, { "epoch": 0.012574702188604575, "grad_norm": 0.353515625, "learning_rate": 0.0019818079234988923, "loss": 0.2387, "step": 7092 }, { "epoch": 0.012578248353914391, "grad_norm": 13.4375, "learning_rate": 0.0019817960151250983, "loss": 0.445, "step": 7094 }, { "epoch": 0.012581794519224206, "grad_norm": 0.328125, "learning_rate": 0.0019817841028948414, "loss": 0.2352, "step": 7096 }, { "epoch": 0.01258534068453402, "grad_norm": 0.2314453125, "learning_rate": 0.0019817721868081724, "loss": 0.1747, "step": 7098 }, { "epoch": 0.012588886849843836, "grad_norm": 0.263671875, "learning_rate": 0.001981760266865144, "loss": 0.2832, "step": 7100 }, { "epoch": 0.012592433015153651, "grad_norm": 0.30078125, "learning_rate": 0.001981748343065809, "loss": 0.2656, "step": 7102 }, { "epoch": 0.012595979180463466, "grad_norm": 0.69140625, "learning_rate": 0.0019817364154102184, "loss": 0.2461, "step": 7104 }, { "epoch": 0.012599525345773282, "grad_norm": 0.54296875, "learning_rate": 0.001981724483898425, "loss": 0.2458, "step": 7106 }, { "epoch": 0.012603071511083096, "grad_norm": 3.265625, "learning_rate": 0.001981712548530481, "loss": 0.5001, "step": 7108 }, { "epoch": 0.012606617676392911, "grad_norm": 0.470703125, "learning_rate": 0.0019817006093064385, "loss": 0.3432, "step": 7110 }, { "epoch": 0.012610163841702727, "grad_norm": 1.265625, "learning_rate": 0.0019816886662263494, "loss": 0.2609, "step": 7112 }, { "epoch": 0.012613710007012542, "grad_norm": 0.30078125, "learning_rate": 0.001981676719290267, "loss": 0.2565, "step": 7114 }, { "epoch": 0.012617256172322356, "grad_norm": 0.66796875, "learning_rate": 0.001981664768498242, "loss": 0.2765, "step": 7116 }, { "epoch": 0.012620802337632173, "grad_norm": 0.5234375, "learning_rate": 0.0019816528138503274, "loss": 0.2701, "step": 7118 }, { "epoch": 0.012624348502941987, "grad_norm": 0.6875, "learning_rate": 0.001981640855346576, "loss": 0.2449, "step": 7120 }, { "epoch": 0.012627894668251802, "grad_norm": 0.205078125, "learning_rate": 0.0019816288929870394, "loss": 0.2288, "step": 7122 }, { "epoch": 0.012631440833561618, "grad_norm": 0.52734375, "learning_rate": 0.0019816169267717703, "loss": 0.2604, "step": 7124 }, { "epoch": 0.012634986998871433, "grad_norm": 0.625, "learning_rate": 0.0019816049567008202, "loss": 0.2875, "step": 7126 }, { "epoch": 0.012638533164181249, "grad_norm": 0.609375, "learning_rate": 0.0019815929827742425, "loss": 0.5593, "step": 7128 }, { "epoch": 0.012642079329491063, "grad_norm": 0.240234375, "learning_rate": 0.0019815810049920897, "loss": 0.1914, "step": 7130 }, { "epoch": 0.012645625494800878, "grad_norm": 0.25, "learning_rate": 0.0019815690233544133, "loss": 0.2331, "step": 7132 }, { "epoch": 0.012649171660110694, "grad_norm": 0.380859375, "learning_rate": 0.001981557037861266, "loss": 0.3437, "step": 7134 }, { "epoch": 0.012652717825420509, "grad_norm": 0.32421875, "learning_rate": 0.0019815450485127, "loss": 0.2202, "step": 7136 }, { "epoch": 0.012656263990730323, "grad_norm": 0.50390625, "learning_rate": 0.0019815330553087686, "loss": 0.2587, "step": 7138 }, { "epoch": 0.01265981015604014, "grad_norm": 0.52734375, "learning_rate": 0.0019815210582495232, "loss": 0.2261, "step": 7140 }, { "epoch": 0.012663356321349954, "grad_norm": 0.87890625, "learning_rate": 0.001981509057335017, "loss": 0.383, "step": 7142 }, { "epoch": 0.012666902486659769, "grad_norm": 0.58984375, "learning_rate": 0.001981497052565302, "loss": 0.2801, "step": 7144 }, { "epoch": 0.012670448651969585, "grad_norm": 0.40625, "learning_rate": 0.001981485043940431, "loss": 0.2346, "step": 7146 }, { "epoch": 0.0126739948172794, "grad_norm": 0.85546875, "learning_rate": 0.0019814730314604567, "loss": 0.3033, "step": 7148 }, { "epoch": 0.012677540982589214, "grad_norm": 0.43359375, "learning_rate": 0.001981461015125431, "loss": 0.2223, "step": 7150 }, { "epoch": 0.01268108714789903, "grad_norm": 0.7734375, "learning_rate": 0.0019814489949354073, "loss": 0.2798, "step": 7152 }, { "epoch": 0.012684633313208845, "grad_norm": 0.421875, "learning_rate": 0.0019814369708904375, "loss": 0.2874, "step": 7154 }, { "epoch": 0.01268817947851866, "grad_norm": 0.2275390625, "learning_rate": 0.0019814249429905744, "loss": 0.2074, "step": 7156 }, { "epoch": 0.012691725643828476, "grad_norm": 0.349609375, "learning_rate": 0.0019814129112358703, "loss": 0.3419, "step": 7158 }, { "epoch": 0.01269527180913829, "grad_norm": 0.2734375, "learning_rate": 0.001981400875626378, "loss": 0.2377, "step": 7160 }, { "epoch": 0.012698817974448107, "grad_norm": 0.40234375, "learning_rate": 0.001981388836162151, "loss": 0.256, "step": 7162 }, { "epoch": 0.012702364139757921, "grad_norm": 0.6484375, "learning_rate": 0.0019813767928432407, "loss": 0.2384, "step": 7164 }, { "epoch": 0.012705910305067736, "grad_norm": 0.486328125, "learning_rate": 0.0019813647456697002, "loss": 0.2071, "step": 7166 }, { "epoch": 0.012709456470377552, "grad_norm": 0.55859375, "learning_rate": 0.0019813526946415826, "loss": 0.2576, "step": 7168 }, { "epoch": 0.012713002635687367, "grad_norm": 2.234375, "learning_rate": 0.00198134063975894, "loss": 0.5217, "step": 7170 }, { "epoch": 0.012716548800997181, "grad_norm": 0.28125, "learning_rate": 0.0019813285810218254, "loss": 0.2629, "step": 7172 }, { "epoch": 0.012720094966306997, "grad_norm": 0.337890625, "learning_rate": 0.0019813165184302916, "loss": 0.2286, "step": 7174 }, { "epoch": 0.012723641131616812, "grad_norm": 1.375, "learning_rate": 0.0019813044519843915, "loss": 0.2999, "step": 7176 }, { "epoch": 0.012727187296926627, "grad_norm": 0.6875, "learning_rate": 0.0019812923816841773, "loss": 0.2969, "step": 7178 }, { "epoch": 0.012730733462236443, "grad_norm": 0.416015625, "learning_rate": 0.001981280307529702, "loss": 0.2352, "step": 7180 }, { "epoch": 0.012734279627546257, "grad_norm": 0.3984375, "learning_rate": 0.001981268229521019, "loss": 0.329, "step": 7182 }, { "epoch": 0.012737825792856072, "grad_norm": 0.369140625, "learning_rate": 0.0019812561476581806, "loss": 0.229, "step": 7184 }, { "epoch": 0.012741371958165888, "grad_norm": 1.1328125, "learning_rate": 0.001981244061941239, "loss": 0.2523, "step": 7186 }, { "epoch": 0.012744918123475703, "grad_norm": 2.265625, "learning_rate": 0.001981231972370249, "loss": 0.3817, "step": 7188 }, { "epoch": 0.012748464288785517, "grad_norm": 0.4140625, "learning_rate": 0.0019812198789452614, "loss": 0.2409, "step": 7190 }, { "epoch": 0.012752010454095334, "grad_norm": 0.6796875, "learning_rate": 0.00198120778166633, "loss": 0.3105, "step": 7192 }, { "epoch": 0.012755556619405148, "grad_norm": 0.251953125, "learning_rate": 0.0019811956805335074, "loss": 0.268, "step": 7194 }, { "epoch": 0.012759102784714964, "grad_norm": 0.40625, "learning_rate": 0.0019811835755468472, "loss": 0.2836, "step": 7196 }, { "epoch": 0.012762648950024779, "grad_norm": 0.310546875, "learning_rate": 0.0019811714667064017, "loss": 0.2399, "step": 7198 }, { "epoch": 0.012766195115334594, "grad_norm": 0.33203125, "learning_rate": 0.001981159354012224, "loss": 0.2313, "step": 7200 }, { "epoch": 0.01276974128064441, "grad_norm": 0.3046875, "learning_rate": 0.0019811472374643676, "loss": 0.3612, "step": 7202 }, { "epoch": 0.012773287445954224, "grad_norm": 0.466796875, "learning_rate": 0.0019811351170628847, "loss": 0.2715, "step": 7204 }, { "epoch": 0.012776833611264039, "grad_norm": 0.57421875, "learning_rate": 0.0019811229928078287, "loss": 0.2722, "step": 7206 }, { "epoch": 0.012780379776573855, "grad_norm": 0.3984375, "learning_rate": 0.001981110864699252, "loss": 0.249, "step": 7208 }, { "epoch": 0.01278392594188367, "grad_norm": 0.255859375, "learning_rate": 0.0019810987327372087, "loss": 0.3748, "step": 7210 }, { "epoch": 0.012787472107193484, "grad_norm": 1.5703125, "learning_rate": 0.001981086596921751, "loss": 0.3629, "step": 7212 }, { "epoch": 0.0127910182725033, "grad_norm": 1.4765625, "learning_rate": 0.001981074457252933, "loss": 0.3046, "step": 7214 }, { "epoch": 0.012794564437813115, "grad_norm": 0.49609375, "learning_rate": 0.0019810623137308065, "loss": 0.2893, "step": 7216 }, { "epoch": 0.01279811060312293, "grad_norm": 0.2578125, "learning_rate": 0.001981050166355426, "loss": 0.1819, "step": 7218 }, { "epoch": 0.012801656768432746, "grad_norm": 0.384765625, "learning_rate": 0.001981038015126843, "loss": 0.2163, "step": 7220 }, { "epoch": 0.01280520293374256, "grad_norm": 0.68359375, "learning_rate": 0.0019810258600451115, "loss": 0.2835, "step": 7222 }, { "epoch": 0.012808749099052375, "grad_norm": 0.306640625, "learning_rate": 0.001981013701110285, "loss": 0.2571, "step": 7224 }, { "epoch": 0.012812295264362191, "grad_norm": 0.53125, "learning_rate": 0.001981001538322416, "loss": 0.2347, "step": 7226 }, { "epoch": 0.012815841429672006, "grad_norm": 0.333984375, "learning_rate": 0.001980989371681558, "loss": 0.2317, "step": 7228 }, { "epoch": 0.012819387594981822, "grad_norm": 0.4140625, "learning_rate": 0.001980977201187765, "loss": 0.2647, "step": 7230 }, { "epoch": 0.012822933760291637, "grad_norm": 0.400390625, "learning_rate": 0.0019809650268410887, "loss": 0.3027, "step": 7232 }, { "epoch": 0.012826479925601451, "grad_norm": 0.30078125, "learning_rate": 0.0019809528486415835, "loss": 0.2051, "step": 7234 }, { "epoch": 0.012830026090911268, "grad_norm": 0.3125, "learning_rate": 0.001980940666589302, "loss": 0.326, "step": 7236 }, { "epoch": 0.012833572256221082, "grad_norm": 0.375, "learning_rate": 0.0019809284806842978, "loss": 0.2098, "step": 7238 }, { "epoch": 0.012837118421530897, "grad_norm": 0.1904296875, "learning_rate": 0.001980916290926624, "loss": 0.2324, "step": 7240 }, { "epoch": 0.012840664586840713, "grad_norm": 0.376953125, "learning_rate": 0.001980904097316334, "loss": 0.2191, "step": 7242 }, { "epoch": 0.012844210752150528, "grad_norm": 0.3515625, "learning_rate": 0.001980891899853482, "loss": 0.2297, "step": 7244 }, { "epoch": 0.012847756917460342, "grad_norm": 0.7734375, "learning_rate": 0.0019808796985381193, "loss": 0.2185, "step": 7246 }, { "epoch": 0.012851303082770158, "grad_norm": 0.7578125, "learning_rate": 0.001980867493370301, "loss": 0.247, "step": 7248 }, { "epoch": 0.012854849248079973, "grad_norm": 0.38671875, "learning_rate": 0.00198085528435008, "loss": 0.2162, "step": 7250 }, { "epoch": 0.012858395413389788, "grad_norm": 0.5625, "learning_rate": 0.0019808430714775096, "loss": 0.2796, "step": 7252 }, { "epoch": 0.012861941578699604, "grad_norm": 1.3359375, "learning_rate": 0.0019808308547526435, "loss": 0.2255, "step": 7254 }, { "epoch": 0.012865487744009418, "grad_norm": 0.1826171875, "learning_rate": 0.0019808186341755346, "loss": 0.2234, "step": 7256 }, { "epoch": 0.012869033909319233, "grad_norm": 0.921875, "learning_rate": 0.001980806409746237, "loss": 0.3388, "step": 7258 }, { "epoch": 0.01287258007462905, "grad_norm": 0.341796875, "learning_rate": 0.0019807941814648034, "loss": 0.3057, "step": 7260 }, { "epoch": 0.012876126239938864, "grad_norm": 0.67578125, "learning_rate": 0.0019807819493312877, "loss": 0.292, "step": 7262 }, { "epoch": 0.01287967240524868, "grad_norm": 0.298828125, "learning_rate": 0.0019807697133457434, "loss": 0.2776, "step": 7264 }, { "epoch": 0.012883218570558495, "grad_norm": 2.03125, "learning_rate": 0.001980757473508224, "loss": 0.3523, "step": 7266 }, { "epoch": 0.01288676473586831, "grad_norm": 0.1513671875, "learning_rate": 0.0019807452298187833, "loss": 0.2111, "step": 7268 }, { "epoch": 0.012890310901178125, "grad_norm": 0.6484375, "learning_rate": 0.0019807329822774744, "loss": 0.3608, "step": 7270 }, { "epoch": 0.01289385706648794, "grad_norm": 0.302734375, "learning_rate": 0.0019807207308843505, "loss": 0.5, "step": 7272 }, { "epoch": 0.012897403231797755, "grad_norm": 1.5390625, "learning_rate": 0.0019807084756394665, "loss": 0.3164, "step": 7274 }, { "epoch": 0.012900949397107571, "grad_norm": 0.255859375, "learning_rate": 0.001980696216542875, "loss": 0.2405, "step": 7276 }, { "epoch": 0.012904495562417385, "grad_norm": 0.9140625, "learning_rate": 0.0019806839535946295, "loss": 0.2778, "step": 7278 }, { "epoch": 0.0129080417277272, "grad_norm": 0.2470703125, "learning_rate": 0.001980671686794784, "loss": 0.2086, "step": 7280 }, { "epoch": 0.012911587893037016, "grad_norm": 0.30078125, "learning_rate": 0.0019806594161433924, "loss": 0.3295, "step": 7282 }, { "epoch": 0.01291513405834683, "grad_norm": 1.0078125, "learning_rate": 0.001980647141640508, "loss": 0.301, "step": 7284 }, { "epoch": 0.012918680223656645, "grad_norm": 0.439453125, "learning_rate": 0.001980634863286184, "loss": 0.3033, "step": 7286 }, { "epoch": 0.012922226388966462, "grad_norm": 0.41796875, "learning_rate": 0.0019806225810804754, "loss": 0.2387, "step": 7288 }, { "epoch": 0.012925772554276276, "grad_norm": 0.2216796875, "learning_rate": 0.0019806102950234348, "loss": 0.2494, "step": 7290 }, { "epoch": 0.01292931871958609, "grad_norm": 0.3671875, "learning_rate": 0.0019805980051151163, "loss": 0.2023, "step": 7292 }, { "epoch": 0.012932864884895907, "grad_norm": 0.4453125, "learning_rate": 0.001980585711355574, "loss": 0.2692, "step": 7294 }, { "epoch": 0.012936411050205722, "grad_norm": 0.296875, "learning_rate": 0.0019805734137448607, "loss": 0.2295, "step": 7296 }, { "epoch": 0.012939957215515538, "grad_norm": 0.265625, "learning_rate": 0.001980561112283031, "loss": 0.2222, "step": 7298 }, { "epoch": 0.012943503380825352, "grad_norm": 0.416015625, "learning_rate": 0.0019805488069701387, "loss": 0.3077, "step": 7300 }, { "epoch": 0.012947049546135167, "grad_norm": 0.4453125, "learning_rate": 0.001980536497806237, "loss": 0.2136, "step": 7302 }, { "epoch": 0.012950595711444983, "grad_norm": 0.65625, "learning_rate": 0.0019805241847913805, "loss": 0.257, "step": 7304 }, { "epoch": 0.012954141876754798, "grad_norm": 0.890625, "learning_rate": 0.0019805118679256223, "loss": 0.2409, "step": 7306 }, { "epoch": 0.012957688042064612, "grad_norm": 3.328125, "learning_rate": 0.001980499547209017, "loss": 0.4833, "step": 7308 }, { "epoch": 0.012961234207374429, "grad_norm": 0.2421875, "learning_rate": 0.0019804872226416178, "loss": 0.2097, "step": 7310 }, { "epoch": 0.012964780372684243, "grad_norm": 0.58203125, "learning_rate": 0.0019804748942234794, "loss": 0.3131, "step": 7312 }, { "epoch": 0.012968326537994058, "grad_norm": 0.3984375, "learning_rate": 0.0019804625619546552, "loss": 0.2601, "step": 7314 }, { "epoch": 0.012971872703303874, "grad_norm": 1.21875, "learning_rate": 0.001980450225835199, "loss": 0.2731, "step": 7316 }, { "epoch": 0.012975418868613689, "grad_norm": 0.59375, "learning_rate": 0.001980437885865165, "loss": 0.4425, "step": 7318 }, { "epoch": 0.012978965033923503, "grad_norm": 2.328125, "learning_rate": 0.0019804255420446067, "loss": 0.2614, "step": 7320 }, { "epoch": 0.01298251119923332, "grad_norm": 0.5625, "learning_rate": 0.001980413194373579, "loss": 0.4741, "step": 7322 }, { "epoch": 0.012986057364543134, "grad_norm": 0.455078125, "learning_rate": 0.001980400842852135, "loss": 0.2157, "step": 7324 }, { "epoch": 0.012989603529852949, "grad_norm": 0.3828125, "learning_rate": 0.0019803884874803296, "loss": 0.2669, "step": 7326 }, { "epoch": 0.012993149695162765, "grad_norm": 0.40234375, "learning_rate": 0.0019803761282582164, "loss": 0.209, "step": 7328 }, { "epoch": 0.01299669586047258, "grad_norm": 0.37109375, "learning_rate": 0.001980363765185849, "loss": 0.3217, "step": 7330 }, { "epoch": 0.013000242025782396, "grad_norm": 1.71875, "learning_rate": 0.0019803513982632817, "loss": 0.2535, "step": 7332 }, { "epoch": 0.01300378819109221, "grad_norm": 0.86328125, "learning_rate": 0.001980339027490569, "loss": 0.2472, "step": 7334 }, { "epoch": 0.013007334356402025, "grad_norm": 1.578125, "learning_rate": 0.0019803266528677648, "loss": 0.4076, "step": 7336 }, { "epoch": 0.013010880521711841, "grad_norm": 0.341796875, "learning_rate": 0.0019803142743949234, "loss": 0.1899, "step": 7338 }, { "epoch": 0.013014426687021656, "grad_norm": 0.185546875, "learning_rate": 0.0019803018920720983, "loss": 0.239, "step": 7340 }, { "epoch": 0.01301797285233147, "grad_norm": 1.640625, "learning_rate": 0.001980289505899344, "loss": 0.3933, "step": 7342 }, { "epoch": 0.013021519017641286, "grad_norm": 0.380859375, "learning_rate": 0.0019802771158767152, "loss": 0.2236, "step": 7344 }, { "epoch": 0.013025065182951101, "grad_norm": 0.28125, "learning_rate": 0.001980264722004265, "loss": 0.2797, "step": 7346 }, { "epoch": 0.013028611348260916, "grad_norm": 0.46875, "learning_rate": 0.0019802523242820488, "loss": 0.5111, "step": 7348 }, { "epoch": 0.013032157513570732, "grad_norm": 0.4765625, "learning_rate": 0.0019802399227101205, "loss": 0.293, "step": 7350 }, { "epoch": 0.013035703678880546, "grad_norm": 0.8828125, "learning_rate": 0.0019802275172885334, "loss": 0.279, "step": 7352 }, { "epoch": 0.013039249844190361, "grad_norm": 1.6171875, "learning_rate": 0.0019802151080173425, "loss": 0.3866, "step": 7354 }, { "epoch": 0.013042796009500177, "grad_norm": 0.53125, "learning_rate": 0.0019802026948966024, "loss": 0.2419, "step": 7356 }, { "epoch": 0.013046342174809992, "grad_norm": 0.4375, "learning_rate": 0.001980190277926367, "loss": 0.3264, "step": 7358 }, { "epoch": 0.013049888340119806, "grad_norm": 0.6171875, "learning_rate": 0.0019801778571066904, "loss": 0.2487, "step": 7360 }, { "epoch": 0.013053434505429623, "grad_norm": 0.33203125, "learning_rate": 0.001980165432437627, "loss": 0.2401, "step": 7362 }, { "epoch": 0.013056980670739437, "grad_norm": 0.515625, "learning_rate": 0.0019801530039192314, "loss": 0.3544, "step": 7364 }, { "epoch": 0.013060526836049254, "grad_norm": 0.73046875, "learning_rate": 0.0019801405715515574, "loss": 0.3347, "step": 7366 }, { "epoch": 0.013064073001359068, "grad_norm": 0.83984375, "learning_rate": 0.0019801281353346604, "loss": 0.2402, "step": 7368 }, { "epoch": 0.013067619166668883, "grad_norm": 1.140625, "learning_rate": 0.0019801156952685937, "loss": 0.2637, "step": 7370 }, { "epoch": 0.013071165331978699, "grad_norm": 0.322265625, "learning_rate": 0.0019801032513534125, "loss": 0.2193, "step": 7372 }, { "epoch": 0.013074711497288513, "grad_norm": 2.296875, "learning_rate": 0.001980090803589171, "loss": 0.2314, "step": 7374 }, { "epoch": 0.013078257662598328, "grad_norm": 0.921875, "learning_rate": 0.001980078351975923, "loss": 0.2706, "step": 7376 }, { "epoch": 0.013081803827908144, "grad_norm": 0.2275390625, "learning_rate": 0.001980065896513724, "loss": 0.2573, "step": 7378 }, { "epoch": 0.013085349993217959, "grad_norm": 1.8984375, "learning_rate": 0.001980053437202628, "loss": 0.3453, "step": 7380 }, { "epoch": 0.013088896158527773, "grad_norm": 7.6875, "learning_rate": 0.001980040974042689, "loss": 0.3178, "step": 7382 }, { "epoch": 0.01309244232383759, "grad_norm": 0.55078125, "learning_rate": 0.0019800285070339626, "loss": 0.291, "step": 7384 }, { "epoch": 0.013095988489147404, "grad_norm": 0.53125, "learning_rate": 0.001980016036176502, "loss": 0.1913, "step": 7386 }, { "epoch": 0.013099534654457219, "grad_norm": 0.373046875, "learning_rate": 0.001980003561470363, "loss": 0.1708, "step": 7388 }, { "epoch": 0.013103080819767035, "grad_norm": 1.65625, "learning_rate": 0.0019799910829155993, "loss": 0.2892, "step": 7390 }, { "epoch": 0.01310662698507685, "grad_norm": 1.4375, "learning_rate": 0.001979978600512266, "loss": 0.2598, "step": 7392 }, { "epoch": 0.013110173150386664, "grad_norm": 0.30859375, "learning_rate": 0.001979966114260417, "loss": 0.2125, "step": 7394 }, { "epoch": 0.01311371931569648, "grad_norm": 0.66796875, "learning_rate": 0.0019799536241601077, "loss": 0.2582, "step": 7396 }, { "epoch": 0.013117265481006295, "grad_norm": 0.8828125, "learning_rate": 0.0019799411302113927, "loss": 0.2937, "step": 7398 }, { "epoch": 0.013120811646316111, "grad_norm": 0.306640625, "learning_rate": 0.001979928632414326, "loss": 0.3104, "step": 7400 }, { "epoch": 0.013124357811625926, "grad_norm": 1.125, "learning_rate": 0.0019799161307689625, "loss": 0.2228, "step": 7402 }, { "epoch": 0.01312790397693574, "grad_norm": 0.57421875, "learning_rate": 0.001979903625275357, "loss": 0.3244, "step": 7404 }, { "epoch": 0.013131450142245557, "grad_norm": 0.359375, "learning_rate": 0.001979891115933564, "loss": 0.202, "step": 7406 }, { "epoch": 0.013134996307555371, "grad_norm": 0.349609375, "learning_rate": 0.0019798786027436384, "loss": 0.246, "step": 7408 }, { "epoch": 0.013138542472865186, "grad_norm": 0.51953125, "learning_rate": 0.001979866085705635, "loss": 0.2544, "step": 7410 }, { "epoch": 0.013142088638175002, "grad_norm": 0.5078125, "learning_rate": 0.0019798535648196084, "loss": 0.1871, "step": 7412 }, { "epoch": 0.013145634803484817, "grad_norm": 1.5625, "learning_rate": 0.0019798410400856136, "loss": 0.4191, "step": 7414 }, { "epoch": 0.013149180968794631, "grad_norm": 0.6953125, "learning_rate": 0.0019798285115037047, "loss": 0.3978, "step": 7416 }, { "epoch": 0.013152727134104448, "grad_norm": 1.2734375, "learning_rate": 0.0019798159790739376, "loss": 0.4776, "step": 7418 }, { "epoch": 0.013156273299414262, "grad_norm": 0.4375, "learning_rate": 0.001979803442796366, "loss": 0.2267, "step": 7420 }, { "epoch": 0.013159819464724077, "grad_norm": 0.458984375, "learning_rate": 0.001979790902671045, "loss": 0.2982, "step": 7422 }, { "epoch": 0.013163365630033893, "grad_norm": 0.8828125, "learning_rate": 0.0019797783586980302, "loss": 0.3043, "step": 7424 }, { "epoch": 0.013166911795343707, "grad_norm": 1.078125, "learning_rate": 0.0019797658108773753, "loss": 0.3142, "step": 7426 }, { "epoch": 0.013170457960653522, "grad_norm": 0.4296875, "learning_rate": 0.0019797532592091367, "loss": 0.2354, "step": 7428 }, { "epoch": 0.013174004125963338, "grad_norm": 0.361328125, "learning_rate": 0.0019797407036933677, "loss": 0.406, "step": 7430 }, { "epoch": 0.013177550291273153, "grad_norm": 1.2734375, "learning_rate": 0.001979728144330124, "loss": 0.2033, "step": 7432 }, { "epoch": 0.01318109645658297, "grad_norm": 1.2890625, "learning_rate": 0.0019797155811194602, "loss": 0.2822, "step": 7434 }, { "epoch": 0.013184642621892784, "grad_norm": 0.640625, "learning_rate": 0.0019797030140614315, "loss": 0.2742, "step": 7436 }, { "epoch": 0.013188188787202598, "grad_norm": 1.6328125, "learning_rate": 0.001979690443156093, "loss": 0.5442, "step": 7438 }, { "epoch": 0.013191734952512415, "grad_norm": 0.474609375, "learning_rate": 0.001979677868403499, "loss": 0.2336, "step": 7440 }, { "epoch": 0.013195281117822229, "grad_norm": 0.349609375, "learning_rate": 0.0019796652898037056, "loss": 0.2037, "step": 7442 }, { "epoch": 0.013198827283132044, "grad_norm": 0.341796875, "learning_rate": 0.001979652707356767, "loss": 0.2163, "step": 7444 }, { "epoch": 0.01320237344844186, "grad_norm": 0.310546875, "learning_rate": 0.0019796401210627384, "loss": 0.1865, "step": 7446 }, { "epoch": 0.013205919613751674, "grad_norm": 0.92578125, "learning_rate": 0.0019796275309216745, "loss": 0.4165, "step": 7448 }, { "epoch": 0.013209465779061489, "grad_norm": 0.404296875, "learning_rate": 0.0019796149369336316, "loss": 0.2459, "step": 7450 }, { "epoch": 0.013213011944371305, "grad_norm": 3.234375, "learning_rate": 0.0019796023390986632, "loss": 0.7228, "step": 7452 }, { "epoch": 0.01321655810968112, "grad_norm": 0.3984375, "learning_rate": 0.0019795897374168254, "loss": 0.281, "step": 7454 }, { "epoch": 0.013220104274990934, "grad_norm": 0.455078125, "learning_rate": 0.001979577131888173, "loss": 0.2723, "step": 7456 }, { "epoch": 0.01322365044030075, "grad_norm": 0.734375, "learning_rate": 0.0019795645225127606, "loss": 0.2263, "step": 7458 }, { "epoch": 0.013227196605610565, "grad_norm": 0.4140625, "learning_rate": 0.0019795519092906445, "loss": 0.2266, "step": 7460 }, { "epoch": 0.01323074277092038, "grad_norm": 2.515625, "learning_rate": 0.0019795392922218793, "loss": 0.3231, "step": 7462 }, { "epoch": 0.013234288936230196, "grad_norm": 0.40625, "learning_rate": 0.00197952667130652, "loss": 0.2287, "step": 7464 }, { "epoch": 0.01323783510154001, "grad_norm": 0.443359375, "learning_rate": 0.0019795140465446214, "loss": 0.2105, "step": 7466 }, { "epoch": 0.013241381266849827, "grad_norm": 1.09375, "learning_rate": 0.00197950141793624, "loss": 0.3577, "step": 7468 }, { "epoch": 0.013244927432159641, "grad_norm": 0.97265625, "learning_rate": 0.00197948878548143, "loss": 0.3316, "step": 7470 }, { "epoch": 0.013248473597469456, "grad_norm": 1.203125, "learning_rate": 0.0019794761491802467, "loss": 0.316, "step": 7472 }, { "epoch": 0.013252019762779272, "grad_norm": 0.318359375, "learning_rate": 0.001979463509032746, "loss": 0.2238, "step": 7474 }, { "epoch": 0.013255565928089087, "grad_norm": 1.515625, "learning_rate": 0.0019794508650389825, "loss": 0.2358, "step": 7476 }, { "epoch": 0.013259112093398901, "grad_norm": 1.0703125, "learning_rate": 0.0019794382171990114, "loss": 0.311, "step": 7478 }, { "epoch": 0.013262658258708718, "grad_norm": 0.298828125, "learning_rate": 0.001979425565512889, "loss": 0.3175, "step": 7480 }, { "epoch": 0.013266204424018532, "grad_norm": 0.41796875, "learning_rate": 0.00197941290998067, "loss": 0.2808, "step": 7482 }, { "epoch": 0.013269750589328347, "grad_norm": 0.78125, "learning_rate": 0.0019794002506024096, "loss": 0.1972, "step": 7484 }, { "epoch": 0.013273296754638163, "grad_norm": 0.27734375, "learning_rate": 0.001979387587378163, "loss": 0.1635, "step": 7486 }, { "epoch": 0.013276842919947978, "grad_norm": 1.0703125, "learning_rate": 0.0019793749203079864, "loss": 0.3265, "step": 7488 }, { "epoch": 0.013280389085257792, "grad_norm": 2.90625, "learning_rate": 0.0019793622493919343, "loss": 0.2144, "step": 7490 }, { "epoch": 0.013283935250567609, "grad_norm": 0.349609375, "learning_rate": 0.0019793495746300623, "loss": 0.245, "step": 7492 }, { "epoch": 0.013287481415877423, "grad_norm": 0.466796875, "learning_rate": 0.0019793368960224268, "loss": 0.2353, "step": 7494 }, { "epoch": 0.013291027581187238, "grad_norm": 0.546875, "learning_rate": 0.0019793242135690823, "loss": 0.2539, "step": 7496 }, { "epoch": 0.013294573746497054, "grad_norm": 0.431640625, "learning_rate": 0.001979311527270084, "loss": 0.2414, "step": 7498 }, { "epoch": 0.013298119911806868, "grad_norm": 0.75390625, "learning_rate": 0.0019792988371254883, "loss": 0.3139, "step": 7500 }, { "epoch": 0.013301666077116685, "grad_norm": 0.302734375, "learning_rate": 0.0019792861431353497, "loss": 0.2354, "step": 7502 }, { "epoch": 0.0133052122424265, "grad_norm": 0.451171875, "learning_rate": 0.0019792734452997248, "loss": 0.1684, "step": 7504 }, { "epoch": 0.013308758407736314, "grad_norm": 0.55078125, "learning_rate": 0.0019792607436186684, "loss": 0.2232, "step": 7506 }, { "epoch": 0.01331230457304613, "grad_norm": 0.51953125, "learning_rate": 0.0019792480380922367, "loss": 0.1877, "step": 7508 }, { "epoch": 0.013315850738355945, "grad_norm": 0.91015625, "learning_rate": 0.001979235328720484, "loss": 0.2452, "step": 7510 }, { "epoch": 0.01331939690366576, "grad_norm": 0.5078125, "learning_rate": 0.0019792226155034673, "loss": 0.3374, "step": 7512 }, { "epoch": 0.013322943068975576, "grad_norm": 0.53125, "learning_rate": 0.001979209898441241, "loss": 0.2133, "step": 7514 }, { "epoch": 0.01332648923428539, "grad_norm": 0.458984375, "learning_rate": 0.0019791971775338616, "loss": 0.2446, "step": 7516 }, { "epoch": 0.013330035399595205, "grad_norm": 0.337890625, "learning_rate": 0.0019791844527813846, "loss": 0.201, "step": 7518 }, { "epoch": 0.013333581564905021, "grad_norm": 0.458984375, "learning_rate": 0.0019791717241838657, "loss": 0.2199, "step": 7520 }, { "epoch": 0.013337127730214835, "grad_norm": 0.34375, "learning_rate": 0.00197915899174136, "loss": 0.2111, "step": 7522 }, { "epoch": 0.01334067389552465, "grad_norm": 1.3671875, "learning_rate": 0.001979146255453924, "loss": 0.2488, "step": 7524 }, { "epoch": 0.013344220060834466, "grad_norm": 0.416015625, "learning_rate": 0.0019791335153216123, "loss": 0.3305, "step": 7526 }, { "epoch": 0.013347766226144281, "grad_norm": 1.5234375, "learning_rate": 0.0019791207713444814, "loss": 0.273, "step": 7528 }, { "epoch": 0.013351312391454095, "grad_norm": 0.3046875, "learning_rate": 0.001979108023522587, "loss": 0.1712, "step": 7530 }, { "epoch": 0.013354858556763912, "grad_norm": 0.271484375, "learning_rate": 0.0019790952718559845, "loss": 0.1873, "step": 7532 }, { "epoch": 0.013358404722073726, "grad_norm": 3.9375, "learning_rate": 0.0019790825163447305, "loss": 0.2767, "step": 7534 }, { "epoch": 0.013361950887383543, "grad_norm": 2.265625, "learning_rate": 0.00197906975698888, "loss": 0.5543, "step": 7536 }, { "epoch": 0.013365497052693357, "grad_norm": 0.431640625, "learning_rate": 0.001979056993788489, "loss": 0.2476, "step": 7538 }, { "epoch": 0.013369043218003172, "grad_norm": 0.50390625, "learning_rate": 0.001979044226743613, "loss": 0.2713, "step": 7540 }, { "epoch": 0.013372589383312988, "grad_norm": 0.73046875, "learning_rate": 0.0019790314558543087, "loss": 0.4439, "step": 7542 }, { "epoch": 0.013376135548622803, "grad_norm": 0.9140625, "learning_rate": 0.001979018681120631, "loss": 0.2361, "step": 7544 }, { "epoch": 0.013379681713932617, "grad_norm": 0.66015625, "learning_rate": 0.001979005902542636, "loss": 0.286, "step": 7546 }, { "epoch": 0.013383227879242433, "grad_norm": 0.45703125, "learning_rate": 0.0019789931201203803, "loss": 0.224, "step": 7548 }, { "epoch": 0.013386774044552248, "grad_norm": 0.54296875, "learning_rate": 0.0019789803338539193, "loss": 0.2424, "step": 7550 }, { "epoch": 0.013390320209862062, "grad_norm": 0.71484375, "learning_rate": 0.0019789675437433085, "loss": 0.2961, "step": 7552 }, { "epoch": 0.013393866375171879, "grad_norm": 0.6484375, "learning_rate": 0.0019789547497886043, "loss": 0.2981, "step": 7554 }, { "epoch": 0.013397412540481693, "grad_norm": 0.6953125, "learning_rate": 0.0019789419519898625, "loss": 0.4272, "step": 7556 }, { "epoch": 0.013400958705791508, "grad_norm": 0.71484375, "learning_rate": 0.001978929150347139, "loss": 0.2879, "step": 7558 }, { "epoch": 0.013404504871101324, "grad_norm": 0.5, "learning_rate": 0.0019789163448604907, "loss": 0.2423, "step": 7560 }, { "epoch": 0.013408051036411139, "grad_norm": 0.8984375, "learning_rate": 0.0019789035355299725, "loss": 0.2264, "step": 7562 }, { "epoch": 0.013411597201720953, "grad_norm": 0.3046875, "learning_rate": 0.0019788907223556407, "loss": 0.1948, "step": 7564 }, { "epoch": 0.01341514336703077, "grad_norm": 0.3046875, "learning_rate": 0.001978877905337551, "loss": 0.2547, "step": 7566 }, { "epoch": 0.013418689532340584, "grad_norm": 0.84375, "learning_rate": 0.0019788650844757604, "loss": 0.2852, "step": 7568 }, { "epoch": 0.0134222356976504, "grad_norm": 0.462890625, "learning_rate": 0.0019788522597703243, "loss": 0.2168, "step": 7570 }, { "epoch": 0.013425781862960215, "grad_norm": 0.439453125, "learning_rate": 0.0019788394312212987, "loss": 0.219, "step": 7572 }, { "epoch": 0.01342932802827003, "grad_norm": 0.236328125, "learning_rate": 0.00197882659882874, "loss": 0.2216, "step": 7574 }, { "epoch": 0.013432874193579846, "grad_norm": 0.2412109375, "learning_rate": 0.0019788137625927045, "loss": 0.2031, "step": 7576 }, { "epoch": 0.01343642035888966, "grad_norm": 5.15625, "learning_rate": 0.001978800922513248, "loss": 0.3581, "step": 7578 }, { "epoch": 0.013439966524199475, "grad_norm": 0.94140625, "learning_rate": 0.0019787880785904263, "loss": 0.2773, "step": 7580 }, { "epoch": 0.013443512689509291, "grad_norm": 0.68359375, "learning_rate": 0.0019787752308242966, "loss": 0.2136, "step": 7582 }, { "epoch": 0.013447058854819106, "grad_norm": 0.37109375, "learning_rate": 0.001978762379214914, "loss": 0.2067, "step": 7584 }, { "epoch": 0.01345060502012892, "grad_norm": 0.83984375, "learning_rate": 0.0019787495237623353, "loss": 0.2701, "step": 7586 }, { "epoch": 0.013454151185438737, "grad_norm": 0.84765625, "learning_rate": 0.0019787366644666167, "loss": 0.246, "step": 7588 }, { "epoch": 0.013457697350748551, "grad_norm": 1.0390625, "learning_rate": 0.001978723801327815, "loss": 0.4068, "step": 7590 }, { "epoch": 0.013461243516058366, "grad_norm": 0.39453125, "learning_rate": 0.001978710934345985, "loss": 0.1986, "step": 7592 }, { "epoch": 0.013464789681368182, "grad_norm": 0.306640625, "learning_rate": 0.001978698063521184, "loss": 0.2285, "step": 7594 }, { "epoch": 0.013468335846677996, "grad_norm": 1.5859375, "learning_rate": 0.001978685188853468, "loss": 0.4398, "step": 7596 }, { "epoch": 0.013471882011987811, "grad_norm": 0.41015625, "learning_rate": 0.001978672310342893, "loss": 0.4547, "step": 7598 }, { "epoch": 0.013475428177297627, "grad_norm": 0.474609375, "learning_rate": 0.0019786594279895165, "loss": 0.2128, "step": 7600 }, { "epoch": 0.013478974342607442, "grad_norm": 0.81640625, "learning_rate": 0.0019786465417933937, "loss": 0.2281, "step": 7602 }, { "epoch": 0.013482520507917258, "grad_norm": 0.53125, "learning_rate": 0.0019786336517545813, "loss": 0.3014, "step": 7604 }, { "epoch": 0.013486066673227073, "grad_norm": 6.125, "learning_rate": 0.0019786207578731357, "loss": 0.2493, "step": 7606 }, { "epoch": 0.013489612838536887, "grad_norm": 0.56640625, "learning_rate": 0.0019786078601491132, "loss": 0.2138, "step": 7608 }, { "epoch": 0.013493159003846704, "grad_norm": 0.384765625, "learning_rate": 0.0019785949585825707, "loss": 0.3604, "step": 7610 }, { "epoch": 0.013496705169156518, "grad_norm": 1.4921875, "learning_rate": 0.0019785820531735636, "loss": 0.2503, "step": 7612 }, { "epoch": 0.013500251334466333, "grad_norm": 0.46875, "learning_rate": 0.0019785691439221493, "loss": 0.2542, "step": 7614 }, { "epoch": 0.013503797499776149, "grad_norm": 0.546875, "learning_rate": 0.0019785562308283836, "loss": 0.2516, "step": 7616 }, { "epoch": 0.013507343665085964, "grad_norm": 0.251953125, "learning_rate": 0.0019785433138923233, "loss": 0.2466, "step": 7618 }, { "epoch": 0.013510889830395778, "grad_norm": 0.37890625, "learning_rate": 0.0019785303931140253, "loss": 0.3439, "step": 7620 }, { "epoch": 0.013514435995705594, "grad_norm": 0.357421875, "learning_rate": 0.0019785174684935456, "loss": 0.2255, "step": 7622 }, { "epoch": 0.013517982161015409, "grad_norm": 0.376953125, "learning_rate": 0.0019785045400309404, "loss": 0.3415, "step": 7624 }, { "epoch": 0.013521528326325223, "grad_norm": 0.68359375, "learning_rate": 0.0019784916077262666, "loss": 0.2489, "step": 7626 }, { "epoch": 0.01352507449163504, "grad_norm": 0.484375, "learning_rate": 0.001978478671579581, "loss": 0.2358, "step": 7628 }, { "epoch": 0.013528620656944854, "grad_norm": 0.53125, "learning_rate": 0.00197846573159094, "loss": 0.2224, "step": 7630 }, { "epoch": 0.013532166822254669, "grad_norm": 3.578125, "learning_rate": 0.0019784527877604, "loss": 0.2525, "step": 7632 }, { "epoch": 0.013535712987564485, "grad_norm": 1.6171875, "learning_rate": 0.001978439840088018, "loss": 0.5032, "step": 7634 }, { "epoch": 0.0135392591528743, "grad_norm": 0.57421875, "learning_rate": 0.001978426888573851, "loss": 0.2922, "step": 7636 }, { "epoch": 0.013542805318184116, "grad_norm": 0.55078125, "learning_rate": 0.001978413933217954, "loss": 0.354, "step": 7638 }, { "epoch": 0.01354635148349393, "grad_norm": 1.5234375, "learning_rate": 0.001978400974020385, "loss": 0.53, "step": 7640 }, { "epoch": 0.013549897648803745, "grad_norm": 0.49609375, "learning_rate": 0.0019783880109812005, "loss": 0.222, "step": 7642 }, { "epoch": 0.013553443814113561, "grad_norm": 0.353515625, "learning_rate": 0.001978375044100457, "loss": 0.254, "step": 7644 }, { "epoch": 0.013556989979423376, "grad_norm": 0.59765625, "learning_rate": 0.0019783620733782115, "loss": 0.189, "step": 7646 }, { "epoch": 0.01356053614473319, "grad_norm": 0.53125, "learning_rate": 0.0019783490988145203, "loss": 0.3683, "step": 7648 }, { "epoch": 0.013564082310043007, "grad_norm": 0.455078125, "learning_rate": 0.00197833612040944, "loss": 0.2526, "step": 7650 }, { "epoch": 0.013567628475352821, "grad_norm": 0.267578125, "learning_rate": 0.001978323138163028, "loss": 0.2494, "step": 7652 }, { "epoch": 0.013571174640662636, "grad_norm": 0.6484375, "learning_rate": 0.001978310152075341, "loss": 0.3841, "step": 7654 }, { "epoch": 0.013574720805972452, "grad_norm": 1.1328125, "learning_rate": 0.0019782971621464356, "loss": 0.3076, "step": 7656 }, { "epoch": 0.013578266971282267, "grad_norm": 0.330078125, "learning_rate": 0.0019782841683763683, "loss": 0.2062, "step": 7658 }, { "epoch": 0.013581813136592081, "grad_norm": 0.73046875, "learning_rate": 0.0019782711707651965, "loss": 0.3023, "step": 7660 }, { "epoch": 0.013585359301901898, "grad_norm": 0.5078125, "learning_rate": 0.0019782581693129765, "loss": 0.2293, "step": 7662 }, { "epoch": 0.013588905467211712, "grad_norm": 1.03125, "learning_rate": 0.0019782451640197652, "loss": 0.2587, "step": 7664 }, { "epoch": 0.013592451632521527, "grad_norm": 0.4296875, "learning_rate": 0.0019782321548856203, "loss": 0.2545, "step": 7666 }, { "epoch": 0.013595997797831343, "grad_norm": 1.0390625, "learning_rate": 0.0019782191419105977, "loss": 0.2582, "step": 7668 }, { "epoch": 0.013599543963141158, "grad_norm": 0.322265625, "learning_rate": 0.001978206125094755, "loss": 0.1954, "step": 7670 }, { "epoch": 0.013603090128450974, "grad_norm": 0.47265625, "learning_rate": 0.0019781931044381483, "loss": 0.2613, "step": 7672 }, { "epoch": 0.013606636293760788, "grad_norm": 0.359375, "learning_rate": 0.0019781800799408356, "loss": 0.3365, "step": 7674 }, { "epoch": 0.013610182459070603, "grad_norm": 0.39453125, "learning_rate": 0.0019781670516028733, "loss": 0.1916, "step": 7676 }, { "epoch": 0.01361372862438042, "grad_norm": 0.5546875, "learning_rate": 0.001978154019424318, "loss": 0.2629, "step": 7678 }, { "epoch": 0.013617274789690234, "grad_norm": 0.75, "learning_rate": 0.0019781409834052272, "loss": 0.2863, "step": 7680 }, { "epoch": 0.013620820955000048, "grad_norm": 0.322265625, "learning_rate": 0.0019781279435456584, "loss": 0.2385, "step": 7682 }, { "epoch": 0.013624367120309865, "grad_norm": 0.3984375, "learning_rate": 0.001978114899845667, "loss": 0.2907, "step": 7684 }, { "epoch": 0.013627913285619679, "grad_norm": 0.46875, "learning_rate": 0.001978101852305312, "loss": 0.2181, "step": 7686 }, { "epoch": 0.013631459450929494, "grad_norm": 0.80859375, "learning_rate": 0.0019780888009246493, "loss": 0.2996, "step": 7688 }, { "epoch": 0.01363500561623931, "grad_norm": 0.421875, "learning_rate": 0.0019780757457037363, "loss": 0.2227, "step": 7690 }, { "epoch": 0.013638551781549125, "grad_norm": 0.5078125, "learning_rate": 0.00197806268664263, "loss": 0.2362, "step": 7692 }, { "epoch": 0.013642097946858939, "grad_norm": 0.369140625, "learning_rate": 0.0019780496237413875, "loss": 0.2549, "step": 7694 }, { "epoch": 0.013645644112168755, "grad_norm": 0.58203125, "learning_rate": 0.001978036557000066, "loss": 0.2283, "step": 7696 }, { "epoch": 0.01364919027747857, "grad_norm": 0.3125, "learning_rate": 0.0019780234864187228, "loss": 0.4069, "step": 7698 }, { "epoch": 0.013652736442788384, "grad_norm": 1.625, "learning_rate": 0.0019780104119974146, "loss": 0.2306, "step": 7700 }, { "epoch": 0.0136562826080982, "grad_norm": 1.734375, "learning_rate": 0.001977997333736199, "loss": 0.2526, "step": 7702 }, { "epoch": 0.013659828773408015, "grad_norm": 0.333984375, "learning_rate": 0.0019779842516351328, "loss": 0.2475, "step": 7704 }, { "epoch": 0.013663374938717832, "grad_norm": 0.255859375, "learning_rate": 0.0019779711656942736, "loss": 0.1859, "step": 7706 }, { "epoch": 0.013666921104027646, "grad_norm": 0.439453125, "learning_rate": 0.0019779580759136787, "loss": 0.2434, "step": 7708 }, { "epoch": 0.01367046726933746, "grad_norm": 2.21875, "learning_rate": 0.001977944982293405, "loss": 0.3137, "step": 7710 }, { "epoch": 0.013674013434647277, "grad_norm": 0.474609375, "learning_rate": 0.0019779318848335103, "loss": 0.2193, "step": 7712 }, { "epoch": 0.013677559599957092, "grad_norm": 0.37890625, "learning_rate": 0.0019779187835340514, "loss": 0.2377, "step": 7714 }, { "epoch": 0.013681105765266906, "grad_norm": 1.40625, "learning_rate": 0.001977905678395085, "loss": 0.2706, "step": 7716 }, { "epoch": 0.013684651930576722, "grad_norm": 0.337890625, "learning_rate": 0.0019778925694166703, "loss": 0.2012, "step": 7718 }, { "epoch": 0.013688198095886537, "grad_norm": 0.4140625, "learning_rate": 0.0019778794565988625, "loss": 0.3076, "step": 7720 }, { "epoch": 0.013691744261196351, "grad_norm": 0.88671875, "learning_rate": 0.00197786633994172, "loss": 0.2691, "step": 7722 }, { "epoch": 0.013695290426506168, "grad_norm": 0.271484375, "learning_rate": 0.0019778532194453004, "loss": 0.2075, "step": 7724 }, { "epoch": 0.013698836591815982, "grad_norm": 0.48828125, "learning_rate": 0.0019778400951096604, "loss": 0.5053, "step": 7726 }, { "epoch": 0.013702382757125797, "grad_norm": 1.71875, "learning_rate": 0.001977826966934858, "loss": 0.3419, "step": 7728 }, { "epoch": 0.013705928922435613, "grad_norm": 1.015625, "learning_rate": 0.0019778138349209502, "loss": 0.2519, "step": 7730 }, { "epoch": 0.013709475087745428, "grad_norm": 0.439453125, "learning_rate": 0.0019778006990679945, "loss": 0.212, "step": 7732 }, { "epoch": 0.013713021253055242, "grad_norm": 1.7109375, "learning_rate": 0.0019777875593760485, "loss": 0.2716, "step": 7734 }, { "epoch": 0.013716567418365059, "grad_norm": 2.34375, "learning_rate": 0.0019777744158451698, "loss": 0.225, "step": 7736 }, { "epoch": 0.013720113583674873, "grad_norm": 0.66796875, "learning_rate": 0.0019777612684754153, "loss": 0.273, "step": 7738 }, { "epoch": 0.01372365974898469, "grad_norm": 0.72265625, "learning_rate": 0.001977748117266843, "loss": 0.3076, "step": 7740 }, { "epoch": 0.013727205914294504, "grad_norm": 0.455078125, "learning_rate": 0.0019777349622195103, "loss": 0.2543, "step": 7742 }, { "epoch": 0.013730752079604319, "grad_norm": 0.703125, "learning_rate": 0.001977721803333475, "loss": 0.2176, "step": 7744 }, { "epoch": 0.013734298244914135, "grad_norm": 0.55078125, "learning_rate": 0.001977708640608794, "loss": 0.2822, "step": 7746 }, { "epoch": 0.01373784441022395, "grad_norm": 0.84765625, "learning_rate": 0.0019776954740455257, "loss": 0.2354, "step": 7748 }, { "epoch": 0.013741390575533764, "grad_norm": 2.609375, "learning_rate": 0.0019776823036437266, "loss": 0.2701, "step": 7750 }, { "epoch": 0.01374493674084358, "grad_norm": 2.34375, "learning_rate": 0.0019776691294034554, "loss": 0.3859, "step": 7752 }, { "epoch": 0.013748482906153395, "grad_norm": 0.56640625, "learning_rate": 0.001977655951324769, "loss": 0.306, "step": 7754 }, { "epoch": 0.01375202907146321, "grad_norm": 0.40625, "learning_rate": 0.0019776427694077254, "loss": 0.2485, "step": 7756 }, { "epoch": 0.013755575236773026, "grad_norm": 0.212890625, "learning_rate": 0.001977629583652382, "loss": 0.3498, "step": 7758 }, { "epoch": 0.01375912140208284, "grad_norm": 0.359375, "learning_rate": 0.0019776163940587966, "loss": 0.2204, "step": 7760 }, { "epoch": 0.013762667567392655, "grad_norm": 0.474609375, "learning_rate": 0.0019776032006270272, "loss": 0.1741, "step": 7762 }, { "epoch": 0.013766213732702471, "grad_norm": 0.609375, "learning_rate": 0.001977590003357131, "loss": 0.3222, "step": 7764 }, { "epoch": 0.013769759898012286, "grad_norm": 1.6796875, "learning_rate": 0.001977576802249166, "loss": 0.3199, "step": 7766 }, { "epoch": 0.0137733060633221, "grad_norm": 0.5, "learning_rate": 0.0019775635973031894, "loss": 0.2006, "step": 7768 }, { "epoch": 0.013776852228631916, "grad_norm": 1.0546875, "learning_rate": 0.0019775503885192595, "loss": 0.3139, "step": 7770 }, { "epoch": 0.013780398393941731, "grad_norm": 0.337890625, "learning_rate": 0.001977537175897434, "loss": 0.2011, "step": 7772 }, { "epoch": 0.013783944559251547, "grad_norm": 2.734375, "learning_rate": 0.0019775239594377707, "loss": 0.3913, "step": 7774 }, { "epoch": 0.013787490724561362, "grad_norm": 0.7265625, "learning_rate": 0.001977510739140327, "loss": 0.2466, "step": 7776 }, { "epoch": 0.013791036889871176, "grad_norm": 0.443359375, "learning_rate": 0.0019774975150051617, "loss": 0.3482, "step": 7778 }, { "epoch": 0.013794583055180993, "grad_norm": 1.484375, "learning_rate": 0.0019774842870323313, "loss": 0.3375, "step": 7780 }, { "epoch": 0.013798129220490807, "grad_norm": 0.8125, "learning_rate": 0.001977471055221895, "loss": 0.2647, "step": 7782 }, { "epoch": 0.013801675385800622, "grad_norm": 1.015625, "learning_rate": 0.0019774578195739098, "loss": 0.2908, "step": 7784 }, { "epoch": 0.013805221551110438, "grad_norm": 0.462890625, "learning_rate": 0.0019774445800884335, "loss": 0.2689, "step": 7786 }, { "epoch": 0.013808767716420253, "grad_norm": 0.4453125, "learning_rate": 0.0019774313367655243, "loss": 0.3056, "step": 7788 }, { "epoch": 0.013812313881730067, "grad_norm": 0.490234375, "learning_rate": 0.0019774180896052406, "loss": 0.1835, "step": 7790 }, { "epoch": 0.013815860047039883, "grad_norm": 0.375, "learning_rate": 0.0019774048386076394, "loss": 0.2325, "step": 7792 }, { "epoch": 0.013819406212349698, "grad_norm": 0.294921875, "learning_rate": 0.001977391583772779, "loss": 0.2261, "step": 7794 }, { "epoch": 0.013822952377659512, "grad_norm": 0.412109375, "learning_rate": 0.0019773783251007177, "loss": 0.2229, "step": 7796 }, { "epoch": 0.013826498542969329, "grad_norm": 0.73046875, "learning_rate": 0.001977365062591513, "loss": 0.2722, "step": 7798 }, { "epoch": 0.013830044708279143, "grad_norm": 0.94140625, "learning_rate": 0.0019773517962452234, "loss": 0.2715, "step": 7800 }, { "epoch": 0.013833590873588958, "grad_norm": 1.0390625, "learning_rate": 0.0019773385260619066, "loss": 0.2004, "step": 7802 }, { "epoch": 0.013837137038898774, "grad_norm": 0.5390625, "learning_rate": 0.001977325252041621, "loss": 0.3634, "step": 7804 }, { "epoch": 0.013840683204208589, "grad_norm": 0.87109375, "learning_rate": 0.001977311974184424, "loss": 0.2166, "step": 7806 }, { "epoch": 0.013844229369518405, "grad_norm": 1.7265625, "learning_rate": 0.001977298692490374, "loss": 0.3054, "step": 7808 }, { "epoch": 0.01384777553482822, "grad_norm": 0.5859375, "learning_rate": 0.001977285406959529, "loss": 0.313, "step": 7810 }, { "epoch": 0.013851321700138034, "grad_norm": 0.87109375, "learning_rate": 0.001977272117591948, "loss": 0.2316, "step": 7812 }, { "epoch": 0.01385486786544785, "grad_norm": 0.421875, "learning_rate": 0.0019772588243876874, "loss": 0.2951, "step": 7814 }, { "epoch": 0.013858414030757665, "grad_norm": 0.3046875, "learning_rate": 0.0019772455273468067, "loss": 0.2366, "step": 7816 }, { "epoch": 0.01386196019606748, "grad_norm": 0.294921875, "learning_rate": 0.0019772322264693634, "loss": 0.232, "step": 7818 }, { "epoch": 0.013865506361377296, "grad_norm": 2.3125, "learning_rate": 0.001977218921755416, "loss": 0.3022, "step": 7820 }, { "epoch": 0.01386905252668711, "grad_norm": 0.609375, "learning_rate": 0.001977205613205023, "loss": 0.2661, "step": 7822 }, { "epoch": 0.013872598691996925, "grad_norm": 1.3671875, "learning_rate": 0.0019771923008182414, "loss": 0.2097, "step": 7824 }, { "epoch": 0.013876144857306741, "grad_norm": 1.6328125, "learning_rate": 0.001977178984595131, "loss": 0.3737, "step": 7826 }, { "epoch": 0.013879691022616556, "grad_norm": 0.482421875, "learning_rate": 0.001977165664535749, "loss": 0.1889, "step": 7828 }, { "epoch": 0.01388323718792637, "grad_norm": 0.66796875, "learning_rate": 0.0019771523406401535, "loss": 0.592, "step": 7830 }, { "epoch": 0.013886783353236187, "grad_norm": 1.40625, "learning_rate": 0.0019771390129084037, "loss": 0.2485, "step": 7832 }, { "epoch": 0.013890329518546001, "grad_norm": 3.953125, "learning_rate": 0.001977125681340557, "loss": 0.351, "step": 7834 }, { "epoch": 0.013893875683855816, "grad_norm": 1.28125, "learning_rate": 0.001977112345936672, "loss": 0.3311, "step": 7836 }, { "epoch": 0.013897421849165632, "grad_norm": 1.3671875, "learning_rate": 0.0019770990066968076, "loss": 0.258, "step": 7838 }, { "epoch": 0.013900968014475447, "grad_norm": 0.30078125, "learning_rate": 0.001977085663621021, "loss": 0.2161, "step": 7840 }, { "epoch": 0.013904514179785261, "grad_norm": 0.61328125, "learning_rate": 0.0019770723167093717, "loss": 0.2532, "step": 7842 }, { "epoch": 0.013908060345095077, "grad_norm": 0.7578125, "learning_rate": 0.0019770589659619175, "loss": 0.2677, "step": 7844 }, { "epoch": 0.013911606510404892, "grad_norm": 1.0390625, "learning_rate": 0.0019770456113787165, "loss": 0.2911, "step": 7846 }, { "epoch": 0.013915152675714708, "grad_norm": 4.5625, "learning_rate": 0.0019770322529598277, "loss": 0.2344, "step": 7848 }, { "epoch": 0.013918698841024523, "grad_norm": 0.71484375, "learning_rate": 0.001977018890705309, "loss": 0.4691, "step": 7850 }, { "epoch": 0.013922245006334337, "grad_norm": 1.03125, "learning_rate": 0.0019770055246152193, "loss": 0.2439, "step": 7852 }, { "epoch": 0.013925791171644154, "grad_norm": 1.4140625, "learning_rate": 0.001976992154689617, "loss": 0.2845, "step": 7854 }, { "epoch": 0.013929337336953968, "grad_norm": 0.6015625, "learning_rate": 0.00197697878092856, "loss": 0.402, "step": 7856 }, { "epoch": 0.013932883502263783, "grad_norm": 0.2490234375, "learning_rate": 0.001976965403332108, "loss": 0.2305, "step": 7858 }, { "epoch": 0.013936429667573599, "grad_norm": 0.4765625, "learning_rate": 0.001976952021900318, "loss": 0.2462, "step": 7860 }, { "epoch": 0.013939975832883414, "grad_norm": 0.5078125, "learning_rate": 0.0019769386366332497, "loss": 0.2552, "step": 7862 }, { "epoch": 0.013943521998193228, "grad_norm": 1.8125, "learning_rate": 0.001976925247530961, "loss": 0.2692, "step": 7864 }, { "epoch": 0.013947068163503044, "grad_norm": 0.9375, "learning_rate": 0.0019769118545935106, "loss": 0.2507, "step": 7866 }, { "epoch": 0.013950614328812859, "grad_norm": 0.703125, "learning_rate": 0.001976898457820957, "loss": 0.2629, "step": 7868 }, { "epoch": 0.013954160494122674, "grad_norm": 0.92578125, "learning_rate": 0.0019768850572133593, "loss": 0.2842, "step": 7870 }, { "epoch": 0.01395770665943249, "grad_norm": 0.244140625, "learning_rate": 0.0019768716527707756, "loss": 0.2646, "step": 7872 }, { "epoch": 0.013961252824742304, "grad_norm": 0.431640625, "learning_rate": 0.0019768582444932644, "loss": 0.225, "step": 7874 }, { "epoch": 0.013964798990052119, "grad_norm": 0.396484375, "learning_rate": 0.0019768448323808844, "loss": 0.2122, "step": 7876 }, { "epoch": 0.013968345155361935, "grad_norm": 3.25, "learning_rate": 0.001976831416433695, "loss": 0.2633, "step": 7878 }, { "epoch": 0.01397189132067175, "grad_norm": 0.44140625, "learning_rate": 0.001976817996651754, "loss": 0.3261, "step": 7880 }, { "epoch": 0.013975437485981566, "grad_norm": 0.52734375, "learning_rate": 0.00197680457303512, "loss": 0.2683, "step": 7882 }, { "epoch": 0.01397898365129138, "grad_norm": 5.46875, "learning_rate": 0.0019767911455838526, "loss": 0.339, "step": 7884 }, { "epoch": 0.013982529816601195, "grad_norm": 0.396484375, "learning_rate": 0.00197677771429801, "loss": 0.2727, "step": 7886 }, { "epoch": 0.013986075981911011, "grad_norm": 0.380859375, "learning_rate": 0.001976764279177651, "loss": 0.2702, "step": 7888 }, { "epoch": 0.013989622147220826, "grad_norm": 0.85546875, "learning_rate": 0.0019767508402228347, "loss": 0.1854, "step": 7890 }, { "epoch": 0.01399316831253064, "grad_norm": 1.46875, "learning_rate": 0.0019767373974336187, "loss": 0.2519, "step": 7892 }, { "epoch": 0.013996714477840457, "grad_norm": 0.65625, "learning_rate": 0.001976723950810063, "loss": 0.2901, "step": 7894 }, { "epoch": 0.014000260643150271, "grad_norm": 0.423828125, "learning_rate": 0.001976710500352226, "loss": 0.2789, "step": 7896 }, { "epoch": 0.014003806808460086, "grad_norm": 0.462890625, "learning_rate": 0.0019766970460601664, "loss": 0.3878, "step": 7898 }, { "epoch": 0.014007352973769902, "grad_norm": 0.404296875, "learning_rate": 0.0019766835879339436, "loss": 0.2027, "step": 7900 }, { "epoch": 0.014010899139079717, "grad_norm": 0.56640625, "learning_rate": 0.0019766701259736155, "loss": 0.3059, "step": 7902 }, { "epoch": 0.014014445304389531, "grad_norm": 0.50390625, "learning_rate": 0.001976656660179242, "loss": 0.3784, "step": 7904 }, { "epoch": 0.014017991469699348, "grad_norm": 0.453125, "learning_rate": 0.0019766431905508808, "loss": 0.2456, "step": 7906 }, { "epoch": 0.014021537635009162, "grad_norm": 0.337890625, "learning_rate": 0.0019766297170885918, "loss": 0.2295, "step": 7908 }, { "epoch": 0.014025083800318977, "grad_norm": 0.41015625, "learning_rate": 0.001976616239792434, "loss": 0.2423, "step": 7910 }, { "epoch": 0.014028629965628793, "grad_norm": 0.8828125, "learning_rate": 0.0019766027586624654, "loss": 0.2471, "step": 7912 }, { "epoch": 0.014032176130938608, "grad_norm": 0.439453125, "learning_rate": 0.001976589273698746, "loss": 0.2488, "step": 7914 }, { "epoch": 0.014035722296248424, "grad_norm": 0.53125, "learning_rate": 0.001976575784901334, "loss": 0.2615, "step": 7916 }, { "epoch": 0.014039268461558238, "grad_norm": 0.7890625, "learning_rate": 0.001976562292270289, "loss": 0.2338, "step": 7918 }, { "epoch": 0.014042814626868053, "grad_norm": 0.296875, "learning_rate": 0.001976548795805669, "loss": 0.2023, "step": 7920 }, { "epoch": 0.01404636079217787, "grad_norm": 2.03125, "learning_rate": 0.0019765352955075344, "loss": 0.3934, "step": 7922 }, { "epoch": 0.014049906957487684, "grad_norm": 0.53515625, "learning_rate": 0.0019765217913759433, "loss": 0.2818, "step": 7924 }, { "epoch": 0.014053453122797498, "grad_norm": 1.296875, "learning_rate": 0.001976508283410955, "loss": 0.2061, "step": 7926 }, { "epoch": 0.014056999288107315, "grad_norm": 0.32421875, "learning_rate": 0.001976494771612629, "loss": 0.2473, "step": 7928 }, { "epoch": 0.01406054545341713, "grad_norm": 0.61328125, "learning_rate": 0.0019764812559810237, "loss": 0.2271, "step": 7930 }, { "epoch": 0.014064091618726944, "grad_norm": 2.28125, "learning_rate": 0.0019764677365161987, "loss": 0.2527, "step": 7932 }, { "epoch": 0.01406763778403676, "grad_norm": 1.125, "learning_rate": 0.0019764542132182125, "loss": 0.3249, "step": 7934 }, { "epoch": 0.014071183949346575, "grad_norm": 0.388671875, "learning_rate": 0.001976440686087125, "loss": 0.251, "step": 7936 }, { "epoch": 0.014074730114656389, "grad_norm": 2.828125, "learning_rate": 0.0019764271551229945, "loss": 0.2776, "step": 7938 }, { "epoch": 0.014078276279966205, "grad_norm": 1.3046875, "learning_rate": 0.0019764136203258816, "loss": 0.3305, "step": 7940 }, { "epoch": 0.01408182244527602, "grad_norm": 0.349609375, "learning_rate": 0.0019764000816958442, "loss": 0.2662, "step": 7942 }, { "epoch": 0.014085368610585835, "grad_norm": 2.234375, "learning_rate": 0.001976386539232942, "loss": 0.3798, "step": 7944 }, { "epoch": 0.01408891477589565, "grad_norm": 0.9296875, "learning_rate": 0.001976372992937234, "loss": 0.3456, "step": 7946 }, { "epoch": 0.014092460941205465, "grad_norm": 0.5078125, "learning_rate": 0.0019763594428087792, "loss": 0.2858, "step": 7948 }, { "epoch": 0.014096007106515282, "grad_norm": 0.3984375, "learning_rate": 0.001976345888847638, "loss": 0.3072, "step": 7950 }, { "epoch": 0.014099553271825096, "grad_norm": 0.8671875, "learning_rate": 0.0019763323310538687, "loss": 0.2565, "step": 7952 }, { "epoch": 0.01410309943713491, "grad_norm": 0.6875, "learning_rate": 0.0019763187694275307, "loss": 0.2388, "step": 7954 }, { "epoch": 0.014106645602444727, "grad_norm": 0.271484375, "learning_rate": 0.0019763052039686833, "loss": 0.2174, "step": 7956 }, { "epoch": 0.014110191767754542, "grad_norm": 0.60546875, "learning_rate": 0.001976291634677386, "loss": 0.2882, "step": 7958 }, { "epoch": 0.014113737933064356, "grad_norm": 1.0859375, "learning_rate": 0.001976278061553698, "loss": 0.2814, "step": 7960 }, { "epoch": 0.014117284098374172, "grad_norm": 0.43359375, "learning_rate": 0.0019762644845976794, "loss": 0.2552, "step": 7962 }, { "epoch": 0.014120830263683987, "grad_norm": 0.52734375, "learning_rate": 0.0019762509038093886, "loss": 0.282, "step": 7964 }, { "epoch": 0.014124376428993802, "grad_norm": 3.125, "learning_rate": 0.001976237319188885, "loss": 0.4814, "step": 7966 }, { "epoch": 0.014127922594303618, "grad_norm": 0.275390625, "learning_rate": 0.001976223730736229, "loss": 0.2168, "step": 7968 }, { "epoch": 0.014131468759613432, "grad_norm": 0.396484375, "learning_rate": 0.001976210138451479, "loss": 0.238, "step": 7970 }, { "epoch": 0.014135014924923247, "grad_norm": 0.482421875, "learning_rate": 0.0019761965423346945, "loss": 0.2386, "step": 7972 }, { "epoch": 0.014138561090233063, "grad_norm": 0.73046875, "learning_rate": 0.0019761829423859357, "loss": 0.4044, "step": 7974 }, { "epoch": 0.014142107255542878, "grad_norm": 0.94921875, "learning_rate": 0.0019761693386052613, "loss": 0.2591, "step": 7976 }, { "epoch": 0.014145653420852692, "grad_norm": 0.263671875, "learning_rate": 0.001976155730992731, "loss": 0.3086, "step": 7978 }, { "epoch": 0.014149199586162509, "grad_norm": 1.0859375, "learning_rate": 0.001976142119548405, "loss": 0.3166, "step": 7980 }, { "epoch": 0.014152745751472323, "grad_norm": 0.318359375, "learning_rate": 0.0019761285042723424, "loss": 0.2635, "step": 7982 }, { "epoch": 0.01415629191678214, "grad_norm": 0.3984375, "learning_rate": 0.0019761148851646024, "loss": 0.2934, "step": 7984 }, { "epoch": 0.014159838082091954, "grad_norm": 0.5546875, "learning_rate": 0.0019761012622252446, "loss": 0.2311, "step": 7986 }, { "epoch": 0.014163384247401769, "grad_norm": 0.369140625, "learning_rate": 0.001976087635454329, "loss": 0.2768, "step": 7988 }, { "epoch": 0.014166930412711585, "grad_norm": 1.1328125, "learning_rate": 0.001976074004851915, "loss": 0.3145, "step": 7990 }, { "epoch": 0.0141704765780214, "grad_norm": 0.953125, "learning_rate": 0.001976060370418062, "loss": 0.2376, "step": 7992 }, { "epoch": 0.014174022743331214, "grad_norm": 0.41015625, "learning_rate": 0.00197604673215283, "loss": 0.2532, "step": 7994 }, { "epoch": 0.01417756890864103, "grad_norm": 0.330078125, "learning_rate": 0.0019760330900562783, "loss": 0.2099, "step": 7996 }, { "epoch": 0.014181115073950845, "grad_norm": 0.64453125, "learning_rate": 0.001976019444128467, "loss": 0.3335, "step": 7998 }, { "epoch": 0.01418466123926066, "grad_norm": 0.3359375, "learning_rate": 0.001976005794369455, "loss": 0.2508, "step": 8000 }, { "epoch": 0.014188207404570476, "grad_norm": 0.59375, "learning_rate": 0.001975992140779303, "loss": 0.2161, "step": 8002 }, { "epoch": 0.01419175356988029, "grad_norm": 0.62890625, "learning_rate": 0.00197597848335807, "loss": 0.2582, "step": 8004 }, { "epoch": 0.014195299735190105, "grad_norm": 0.296875, "learning_rate": 0.001975964822105816, "loss": 0.2452, "step": 8006 }, { "epoch": 0.014198845900499921, "grad_norm": 0.32421875, "learning_rate": 0.0019759511570226007, "loss": 0.2437, "step": 8008 }, { "epoch": 0.014202392065809736, "grad_norm": 0.353515625, "learning_rate": 0.001975937488108484, "loss": 0.2512, "step": 8010 }, { "epoch": 0.01420593823111955, "grad_norm": 0.93359375, "learning_rate": 0.001975923815363525, "loss": 0.2971, "step": 8012 }, { "epoch": 0.014209484396429366, "grad_norm": 0.322265625, "learning_rate": 0.0019759101387877846, "loss": 0.2463, "step": 8014 }, { "epoch": 0.014213030561739181, "grad_norm": 3.09375, "learning_rate": 0.0019758964583813216, "loss": 0.3887, "step": 8016 }, { "epoch": 0.014216576727048997, "grad_norm": 0.58984375, "learning_rate": 0.0019758827741441966, "loss": 0.2735, "step": 8018 }, { "epoch": 0.014220122892358812, "grad_norm": 0.392578125, "learning_rate": 0.001975869086076469, "loss": 0.219, "step": 8020 }, { "epoch": 0.014223669057668626, "grad_norm": 1.75, "learning_rate": 0.0019758553941781986, "loss": 0.4032, "step": 8022 }, { "epoch": 0.014227215222978443, "grad_norm": 2.296875, "learning_rate": 0.0019758416984494457, "loss": 0.4561, "step": 8024 }, { "epoch": 0.014230761388288257, "grad_norm": 0.177734375, "learning_rate": 0.0019758279988902703, "loss": 0.2875, "step": 8026 }, { "epoch": 0.014234307553598072, "grad_norm": 0.625, "learning_rate": 0.0019758142955007313, "loss": 0.2761, "step": 8028 }, { "epoch": 0.014237853718907888, "grad_norm": 0.419921875, "learning_rate": 0.0019758005882808895, "loss": 0.3136, "step": 8030 }, { "epoch": 0.014241399884217703, "grad_norm": 0.50390625, "learning_rate": 0.001975786877230805, "loss": 0.2373, "step": 8032 }, { "epoch": 0.014244946049527517, "grad_norm": 0.294921875, "learning_rate": 0.001975773162350537, "loss": 0.2776, "step": 8034 }, { "epoch": 0.014248492214837333, "grad_norm": 2.4375, "learning_rate": 0.001975759443640146, "loss": 0.4026, "step": 8036 }, { "epoch": 0.014252038380147148, "grad_norm": 0.5546875, "learning_rate": 0.0019757457210996918, "loss": 0.2715, "step": 8038 }, { "epoch": 0.014255584545456963, "grad_norm": 0.96875, "learning_rate": 0.001975731994729235, "loss": 0.2807, "step": 8040 }, { "epoch": 0.014259130710766779, "grad_norm": 0.431640625, "learning_rate": 0.0019757182645288346, "loss": 0.2532, "step": 8042 }, { "epoch": 0.014262676876076593, "grad_norm": 0.337890625, "learning_rate": 0.0019757045304985513, "loss": 0.3072, "step": 8044 }, { "epoch": 0.014266223041386408, "grad_norm": 0.43359375, "learning_rate": 0.001975690792638445, "loss": 0.2501, "step": 8046 }, { "epoch": 0.014269769206696224, "grad_norm": 0.47265625, "learning_rate": 0.0019756770509485764, "loss": 0.3048, "step": 8048 }, { "epoch": 0.014273315372006039, "grad_norm": 0.765625, "learning_rate": 0.0019756633054290045, "loss": 0.3172, "step": 8050 }, { "epoch": 0.014276861537315855, "grad_norm": 0.291015625, "learning_rate": 0.00197564955607979, "loss": 0.2048, "step": 8052 }, { "epoch": 0.01428040770262567, "grad_norm": 2.3125, "learning_rate": 0.001975635802900993, "loss": 0.39, "step": 8054 }, { "epoch": 0.014283953867935484, "grad_norm": 0.50390625, "learning_rate": 0.0019756220458926736, "loss": 0.2126, "step": 8056 }, { "epoch": 0.0142875000332453, "grad_norm": 1.140625, "learning_rate": 0.0019756082850548922, "loss": 0.325, "step": 8058 }, { "epoch": 0.014291046198555115, "grad_norm": 0.53515625, "learning_rate": 0.0019755945203877084, "loss": 0.2655, "step": 8060 }, { "epoch": 0.01429459236386493, "grad_norm": 0.359375, "learning_rate": 0.001975580751891183, "loss": 0.2543, "step": 8062 }, { "epoch": 0.014298138529174746, "grad_norm": 0.470703125, "learning_rate": 0.0019755669795653765, "loss": 0.2692, "step": 8064 }, { "epoch": 0.01430168469448456, "grad_norm": 0.51953125, "learning_rate": 0.0019755532034103477, "loss": 0.2488, "step": 8066 }, { "epoch": 0.014305230859794375, "grad_norm": 0.40234375, "learning_rate": 0.001975539423426158, "loss": 0.2301, "step": 8068 }, { "epoch": 0.014308777025104191, "grad_norm": 0.33203125, "learning_rate": 0.001975525639612868, "loss": 0.2632, "step": 8070 }, { "epoch": 0.014312323190414006, "grad_norm": 0.220703125, "learning_rate": 0.001975511851970537, "loss": 0.2281, "step": 8072 }, { "epoch": 0.01431586935572382, "grad_norm": 0.421875, "learning_rate": 0.001975498060499226, "loss": 0.2813, "step": 8074 }, { "epoch": 0.014319415521033637, "grad_norm": 0.265625, "learning_rate": 0.0019754842651989943, "loss": 0.2185, "step": 8076 }, { "epoch": 0.014322961686343451, "grad_norm": 3.625, "learning_rate": 0.0019754704660699036, "loss": 0.3642, "step": 8078 }, { "epoch": 0.014326507851653266, "grad_norm": 0.380859375, "learning_rate": 0.0019754566631120136, "loss": 0.2144, "step": 8080 }, { "epoch": 0.014330054016963082, "grad_norm": 0.302734375, "learning_rate": 0.0019754428563253843, "loss": 0.2512, "step": 8082 }, { "epoch": 0.014333600182272897, "grad_norm": 0.384765625, "learning_rate": 0.001975429045710077, "loss": 0.2246, "step": 8084 }, { "epoch": 0.014337146347582713, "grad_norm": 0.458984375, "learning_rate": 0.001975415231266151, "loss": 0.2908, "step": 8086 }, { "epoch": 0.014340692512892527, "grad_norm": 1.4375, "learning_rate": 0.001975401412993668, "loss": 0.4582, "step": 8088 }, { "epoch": 0.014344238678202342, "grad_norm": 1.0, "learning_rate": 0.001975387590892687, "loss": 0.2912, "step": 8090 }, { "epoch": 0.014347784843512158, "grad_norm": 0.51171875, "learning_rate": 0.0019753737649632697, "loss": 0.2078, "step": 8092 }, { "epoch": 0.014351331008821973, "grad_norm": 0.3828125, "learning_rate": 0.0019753599352054754, "loss": 0.2609, "step": 8094 }, { "epoch": 0.014354877174131787, "grad_norm": 1.5703125, "learning_rate": 0.0019753461016193655, "loss": 0.3123, "step": 8096 }, { "epoch": 0.014358423339441604, "grad_norm": 0.69921875, "learning_rate": 0.0019753322642050005, "loss": 0.2319, "step": 8098 }, { "epoch": 0.014361969504751418, "grad_norm": 0.427734375, "learning_rate": 0.0019753184229624405, "loss": 0.214, "step": 8100 }, { "epoch": 0.014365515670061233, "grad_norm": 0.83203125, "learning_rate": 0.0019753045778917464, "loss": 0.2201, "step": 8102 }, { "epoch": 0.014369061835371049, "grad_norm": 0.30078125, "learning_rate": 0.0019752907289929777, "loss": 0.2869, "step": 8104 }, { "epoch": 0.014372608000680864, "grad_norm": 2.890625, "learning_rate": 0.0019752768762661965, "loss": 0.3829, "step": 8106 }, { "epoch": 0.014376154165990678, "grad_norm": 0.318359375, "learning_rate": 0.001975263019711462, "loss": 0.233, "step": 8108 }, { "epoch": 0.014379700331300494, "grad_norm": 0.6015625, "learning_rate": 0.0019752491593288363, "loss": 0.3708, "step": 8110 }, { "epoch": 0.014383246496610309, "grad_norm": 1.3203125, "learning_rate": 0.0019752352951183786, "loss": 0.4101, "step": 8112 }, { "epoch": 0.014386792661920124, "grad_norm": 2.765625, "learning_rate": 0.0019752214270801504, "loss": 0.2547, "step": 8114 }, { "epoch": 0.01439033882722994, "grad_norm": 0.65625, "learning_rate": 0.001975207555214212, "loss": 0.2971, "step": 8116 }, { "epoch": 0.014393884992539754, "grad_norm": 0.5078125, "learning_rate": 0.0019751936795206243, "loss": 0.2023, "step": 8118 }, { "epoch": 0.01439743115784957, "grad_norm": 0.392578125, "learning_rate": 0.001975179799999448, "loss": 0.2655, "step": 8120 }, { "epoch": 0.014400977323159385, "grad_norm": 0.341796875, "learning_rate": 0.001975165916650743, "loss": 0.4227, "step": 8122 }, { "epoch": 0.0144045234884692, "grad_norm": 0.423828125, "learning_rate": 0.0019751520294745708, "loss": 0.2429, "step": 8124 }, { "epoch": 0.014408069653779016, "grad_norm": 0.5, "learning_rate": 0.001975138138470992, "loss": 0.2966, "step": 8126 }, { "epoch": 0.01441161581908883, "grad_norm": 0.44140625, "learning_rate": 0.0019751242436400673, "loss": 0.3511, "step": 8128 }, { "epoch": 0.014415161984398645, "grad_norm": 0.2578125, "learning_rate": 0.0019751103449818573, "loss": 0.2727, "step": 8130 }, { "epoch": 0.014418708149708461, "grad_norm": 0.365234375, "learning_rate": 0.0019750964424964236, "loss": 0.2762, "step": 8132 }, { "epoch": 0.014422254315018276, "grad_norm": 0.298828125, "learning_rate": 0.001975082536183826, "loss": 0.2352, "step": 8134 }, { "epoch": 0.01442580048032809, "grad_norm": 0.28125, "learning_rate": 0.0019750686260441254, "loss": 0.1915, "step": 8136 }, { "epoch": 0.014429346645637907, "grad_norm": 1.9765625, "learning_rate": 0.0019750547120773836, "loss": 0.312, "step": 8138 }, { "epoch": 0.014432892810947721, "grad_norm": 1.4609375, "learning_rate": 0.0019750407942836605, "loss": 0.5173, "step": 8140 }, { "epoch": 0.014436438976257536, "grad_norm": 0.2890625, "learning_rate": 0.001975026872663017, "loss": 0.2093, "step": 8142 }, { "epoch": 0.014439985141567352, "grad_norm": 0.421875, "learning_rate": 0.0019750129472155143, "loss": 0.2657, "step": 8144 }, { "epoch": 0.014443531306877167, "grad_norm": 0.181640625, "learning_rate": 0.0019749990179412135, "loss": 0.2081, "step": 8146 }, { "epoch": 0.014447077472186981, "grad_norm": 0.6015625, "learning_rate": 0.001974985084840175, "loss": 0.1942, "step": 8148 }, { "epoch": 0.014450623637496798, "grad_norm": 0.50390625, "learning_rate": 0.0019749711479124603, "loss": 0.2726, "step": 8150 }, { "epoch": 0.014454169802806612, "grad_norm": 0.9375, "learning_rate": 0.0019749572071581295, "loss": 0.246, "step": 8152 }, { "epoch": 0.014457715968116428, "grad_norm": 0.39453125, "learning_rate": 0.001974943262577245, "loss": 0.2227, "step": 8154 }, { "epoch": 0.014461262133426243, "grad_norm": 0.35546875, "learning_rate": 0.0019749293141698657, "loss": 0.2184, "step": 8156 }, { "epoch": 0.014464808298736058, "grad_norm": 0.373046875, "learning_rate": 0.0019749153619360547, "loss": 0.2237, "step": 8158 }, { "epoch": 0.014468354464045874, "grad_norm": 1.9375, "learning_rate": 0.0019749014058758722, "loss": 0.3866, "step": 8160 }, { "epoch": 0.014471900629355688, "grad_norm": 0.142578125, "learning_rate": 0.0019748874459893785, "loss": 0.278, "step": 8162 }, { "epoch": 0.014475446794665503, "grad_norm": 0.24609375, "learning_rate": 0.0019748734822766355, "loss": 0.1951, "step": 8164 }, { "epoch": 0.01447899295997532, "grad_norm": 1.671875, "learning_rate": 0.0019748595147377045, "loss": 0.3066, "step": 8166 }, { "epoch": 0.014482539125285134, "grad_norm": 0.75390625, "learning_rate": 0.0019748455433726457, "loss": 0.2686, "step": 8168 }, { "epoch": 0.014486085290594948, "grad_norm": 0.51171875, "learning_rate": 0.0019748315681815207, "loss": 0.312, "step": 8170 }, { "epoch": 0.014489631455904765, "grad_norm": 0.244140625, "learning_rate": 0.001974817589164391, "loss": 0.2182, "step": 8172 }, { "epoch": 0.01449317762121458, "grad_norm": 0.56640625, "learning_rate": 0.001974803606321317, "loss": 0.2493, "step": 8174 }, { "epoch": 0.014496723786524394, "grad_norm": 0.58984375, "learning_rate": 0.0019747896196523605, "loss": 0.4058, "step": 8176 }, { "epoch": 0.01450026995183421, "grad_norm": 0.263671875, "learning_rate": 0.0019747756291575817, "loss": 0.3673, "step": 8178 }, { "epoch": 0.014503816117144025, "grad_norm": 0.8125, "learning_rate": 0.0019747616348370425, "loss": 0.1943, "step": 8180 }, { "epoch": 0.01450736228245384, "grad_norm": 1.7265625, "learning_rate": 0.0019747476366908045, "loss": 0.3337, "step": 8182 }, { "epoch": 0.014510908447763655, "grad_norm": 0.2451171875, "learning_rate": 0.0019747336347189282, "loss": 0.2392, "step": 8184 }, { "epoch": 0.01451445461307347, "grad_norm": 0.67578125, "learning_rate": 0.0019747196289214754, "loss": 0.2143, "step": 8186 }, { "epoch": 0.014518000778383286, "grad_norm": 0.30078125, "learning_rate": 0.001974705619298507, "loss": 0.2118, "step": 8188 }, { "epoch": 0.0145215469436931, "grad_norm": 0.40625, "learning_rate": 0.001974691605850084, "loss": 0.2607, "step": 8190 }, { "epoch": 0.014525093109002915, "grad_norm": 0.44140625, "learning_rate": 0.001974677588576268, "loss": 0.2566, "step": 8192 }, { "epoch": 0.014528639274312732, "grad_norm": 0.41796875, "learning_rate": 0.00197466356747712, "loss": 0.2181, "step": 8194 }, { "epoch": 0.014532185439622546, "grad_norm": 0.234375, "learning_rate": 0.001974649542552702, "loss": 0.2491, "step": 8196 }, { "epoch": 0.01453573160493236, "grad_norm": 1.8984375, "learning_rate": 0.0019746355138030754, "loss": 0.2448, "step": 8198 }, { "epoch": 0.014539277770242177, "grad_norm": 0.984375, "learning_rate": 0.0019746214812283005, "loss": 0.2635, "step": 8200 }, { "epoch": 0.014542823935551992, "grad_norm": 0.345703125, "learning_rate": 0.00197460744482844, "loss": 0.2502, "step": 8202 }, { "epoch": 0.014546370100861806, "grad_norm": 0.421875, "learning_rate": 0.0019745934046035535, "loss": 0.2543, "step": 8204 }, { "epoch": 0.014549916266171622, "grad_norm": 0.7109375, "learning_rate": 0.0019745793605537043, "loss": 0.299, "step": 8206 }, { "epoch": 0.014553462431481437, "grad_norm": 0.43359375, "learning_rate": 0.0019745653126789523, "loss": 0.3053, "step": 8208 }, { "epoch": 0.014557008596791252, "grad_norm": 0.43359375, "learning_rate": 0.0019745512609793603, "loss": 0.4376, "step": 8210 }, { "epoch": 0.014560554762101068, "grad_norm": 0.291015625, "learning_rate": 0.0019745372054549887, "loss": 0.2934, "step": 8212 }, { "epoch": 0.014564100927410882, "grad_norm": 0.267578125, "learning_rate": 0.0019745231461058997, "loss": 0.3122, "step": 8214 }, { "epoch": 0.014567647092720697, "grad_norm": 0.53125, "learning_rate": 0.0019745090829321544, "loss": 0.3878, "step": 8216 }, { "epoch": 0.014571193258030513, "grad_norm": 0.859375, "learning_rate": 0.001974495015933814, "loss": 0.2351, "step": 8218 }, { "epoch": 0.014574739423340328, "grad_norm": 0.32421875, "learning_rate": 0.0019744809451109403, "loss": 0.3699, "step": 8220 }, { "epoch": 0.014578285588650144, "grad_norm": 0.314453125, "learning_rate": 0.001974466870463595, "loss": 0.2613, "step": 8222 }, { "epoch": 0.014581831753959959, "grad_norm": 1.34375, "learning_rate": 0.00197445279199184, "loss": 0.2594, "step": 8224 }, { "epoch": 0.014585377919269773, "grad_norm": 0.6640625, "learning_rate": 0.001974438709695736, "loss": 0.3091, "step": 8226 }, { "epoch": 0.01458892408457959, "grad_norm": 0.330078125, "learning_rate": 0.001974424623575345, "loss": 0.1648, "step": 8228 }, { "epoch": 0.014592470249889404, "grad_norm": 0.306640625, "learning_rate": 0.001974410533630729, "loss": 0.3051, "step": 8230 }, { "epoch": 0.014596016415199219, "grad_norm": 0.3359375, "learning_rate": 0.0019743964398619487, "loss": 0.2379, "step": 8232 }, { "epoch": 0.014599562580509035, "grad_norm": 0.4609375, "learning_rate": 0.0019743823422690666, "loss": 0.2683, "step": 8234 }, { "epoch": 0.01460310874581885, "grad_norm": 0.55859375, "learning_rate": 0.001974368240852144, "loss": 0.2994, "step": 8236 }, { "epoch": 0.014606654911128664, "grad_norm": 0.365234375, "learning_rate": 0.0019743541356112425, "loss": 0.2508, "step": 8238 }, { "epoch": 0.01461020107643848, "grad_norm": 0.3984375, "learning_rate": 0.001974340026546424, "loss": 0.5068, "step": 8240 }, { "epoch": 0.014613747241748295, "grad_norm": 1.8515625, "learning_rate": 0.0019743259136577504, "loss": 0.2735, "step": 8242 }, { "epoch": 0.01461729340705811, "grad_norm": 0.388671875, "learning_rate": 0.001974311796945283, "loss": 0.2804, "step": 8244 }, { "epoch": 0.014620839572367926, "grad_norm": 0.4140625, "learning_rate": 0.0019742976764090835, "loss": 0.2208, "step": 8246 }, { "epoch": 0.01462438573767774, "grad_norm": 0.921875, "learning_rate": 0.001974283552049214, "loss": 0.28, "step": 8248 }, { "epoch": 0.014627931902987555, "grad_norm": 0.29296875, "learning_rate": 0.0019742694238657358, "loss": 0.2355, "step": 8250 }, { "epoch": 0.014631478068297371, "grad_norm": 0.419921875, "learning_rate": 0.001974255291858711, "loss": 0.2687, "step": 8252 }, { "epoch": 0.014635024233607186, "grad_norm": 0.6953125, "learning_rate": 0.0019742411560282015, "loss": 0.2353, "step": 8254 }, { "epoch": 0.014638570398917002, "grad_norm": 0.6953125, "learning_rate": 0.001974227016374269, "loss": 0.2276, "step": 8256 }, { "epoch": 0.014642116564226816, "grad_norm": 1.984375, "learning_rate": 0.001974212872896975, "loss": 0.4946, "step": 8258 }, { "epoch": 0.014645662729536631, "grad_norm": 0.6484375, "learning_rate": 0.0019741987255963824, "loss": 0.2401, "step": 8260 }, { "epoch": 0.014649208894846447, "grad_norm": 0.5546875, "learning_rate": 0.001974184574472552, "loss": 0.2202, "step": 8262 }, { "epoch": 0.014652755060156262, "grad_norm": 1.671875, "learning_rate": 0.0019741704195255457, "loss": 0.4242, "step": 8264 }, { "epoch": 0.014656301225466076, "grad_norm": 0.53125, "learning_rate": 0.0019741562607554263, "loss": 0.2719, "step": 8266 }, { "epoch": 0.014659847390775893, "grad_norm": 1.03125, "learning_rate": 0.0019741420981622547, "loss": 0.2843, "step": 8268 }, { "epoch": 0.014663393556085707, "grad_norm": 0.6171875, "learning_rate": 0.001974127931746094, "loss": 0.21, "step": 8270 }, { "epoch": 0.014666939721395522, "grad_norm": 0.78515625, "learning_rate": 0.001974113761507005, "loss": 0.239, "step": 8272 }, { "epoch": 0.014670485886705338, "grad_norm": 0.3515625, "learning_rate": 0.00197409958744505, "loss": 0.212, "step": 8274 }, { "epoch": 0.014674032052015153, "grad_norm": 0.455078125, "learning_rate": 0.0019740854095602915, "loss": 0.2199, "step": 8276 }, { "epoch": 0.014677578217324967, "grad_norm": 0.9140625, "learning_rate": 0.001974071227852791, "loss": 0.5372, "step": 8278 }, { "epoch": 0.014681124382634783, "grad_norm": 0.58984375, "learning_rate": 0.001974057042322611, "loss": 0.1938, "step": 8280 }, { "epoch": 0.014684670547944598, "grad_norm": 0.62890625, "learning_rate": 0.001974042852969813, "loss": 0.216, "step": 8282 }, { "epoch": 0.014688216713254413, "grad_norm": 0.34375, "learning_rate": 0.001974028659794459, "loss": 0.2577, "step": 8284 }, { "epoch": 0.014691762878564229, "grad_norm": 0.75390625, "learning_rate": 0.0019740144627966114, "loss": 0.2617, "step": 8286 }, { "epoch": 0.014695309043874043, "grad_norm": 0.66015625, "learning_rate": 0.0019740002619763326, "loss": 0.2305, "step": 8288 }, { "epoch": 0.01469885520918386, "grad_norm": 0.359375, "learning_rate": 0.0019739860573336843, "loss": 0.2246, "step": 8290 }, { "epoch": 0.014702401374493674, "grad_norm": 1.4921875, "learning_rate": 0.001973971848868728, "loss": 0.2925, "step": 8292 }, { "epoch": 0.014705947539803489, "grad_norm": 2.09375, "learning_rate": 0.0019739576365815272, "loss": 0.2573, "step": 8294 }, { "epoch": 0.014709493705113305, "grad_norm": 0.69921875, "learning_rate": 0.001973943420472143, "loss": 0.1889, "step": 8296 }, { "epoch": 0.01471303987042312, "grad_norm": 1.2109375, "learning_rate": 0.001973929200540638, "loss": 0.358, "step": 8298 }, { "epoch": 0.014716586035732934, "grad_norm": 0.5078125, "learning_rate": 0.001973914976787074, "loss": 0.2469, "step": 8300 }, { "epoch": 0.01472013220104275, "grad_norm": 0.294921875, "learning_rate": 0.0019739007492115142, "loss": 0.2914, "step": 8302 }, { "epoch": 0.014723678366352565, "grad_norm": 1.046875, "learning_rate": 0.0019738865178140197, "loss": 0.2945, "step": 8304 }, { "epoch": 0.01472722453166238, "grad_norm": 0.2236328125, "learning_rate": 0.001973872282594653, "loss": 0.1866, "step": 8306 }, { "epoch": 0.014730770696972196, "grad_norm": 0.56640625, "learning_rate": 0.001973858043553477, "loss": 0.2336, "step": 8308 }, { "epoch": 0.01473431686228201, "grad_norm": 0.30859375, "learning_rate": 0.0019738438006905537, "loss": 0.3078, "step": 8310 }, { "epoch": 0.014737863027591825, "grad_norm": 0.29296875, "learning_rate": 0.0019738295540059447, "loss": 0.2634, "step": 8312 }, { "epoch": 0.014741409192901641, "grad_norm": 0.65234375, "learning_rate": 0.0019738153034997126, "loss": 0.278, "step": 8314 }, { "epoch": 0.014744955358211456, "grad_norm": 0.279296875, "learning_rate": 0.0019738010491719203, "loss": 0.2528, "step": 8316 }, { "epoch": 0.01474850152352127, "grad_norm": 0.4453125, "learning_rate": 0.0019737867910226297, "loss": 0.2905, "step": 8318 }, { "epoch": 0.014752047688831087, "grad_norm": 0.228515625, "learning_rate": 0.0019737725290519034, "loss": 0.2447, "step": 8320 }, { "epoch": 0.014755593854140901, "grad_norm": 0.470703125, "learning_rate": 0.0019737582632598036, "loss": 0.3212, "step": 8322 }, { "epoch": 0.014759140019450718, "grad_norm": 0.451171875, "learning_rate": 0.0019737439936463926, "loss": 0.2487, "step": 8324 }, { "epoch": 0.014762686184760532, "grad_norm": 0.328125, "learning_rate": 0.001973729720211733, "loss": 0.2462, "step": 8326 }, { "epoch": 0.014766232350070347, "grad_norm": 0.421875, "learning_rate": 0.001973715442955887, "loss": 0.2829, "step": 8328 }, { "epoch": 0.014769778515380163, "grad_norm": 0.396484375, "learning_rate": 0.0019737011618789174, "loss": 0.2193, "step": 8330 }, { "epoch": 0.014773324680689977, "grad_norm": 6.5625, "learning_rate": 0.001973686876980886, "loss": 0.4112, "step": 8332 }, { "epoch": 0.014776870845999792, "grad_norm": 0.28125, "learning_rate": 0.001973672588261856, "loss": 0.3945, "step": 8334 }, { "epoch": 0.014780417011309608, "grad_norm": 0.412109375, "learning_rate": 0.0019736582957218898, "loss": 0.2134, "step": 8336 }, { "epoch": 0.014783963176619423, "grad_norm": 0.291015625, "learning_rate": 0.001973643999361049, "loss": 0.2612, "step": 8338 }, { "epoch": 0.014787509341929237, "grad_norm": 0.36328125, "learning_rate": 0.0019736296991793973, "loss": 0.2279, "step": 8340 }, { "epoch": 0.014791055507239054, "grad_norm": 0.69921875, "learning_rate": 0.0019736153951769968, "loss": 0.2541, "step": 8342 }, { "epoch": 0.014794601672548868, "grad_norm": 0.58203125, "learning_rate": 0.00197360108735391, "loss": 0.1865, "step": 8344 }, { "epoch": 0.014798147837858683, "grad_norm": 0.7890625, "learning_rate": 0.0019735867757101995, "loss": 0.5469, "step": 8346 }, { "epoch": 0.014801694003168499, "grad_norm": 0.8125, "learning_rate": 0.0019735724602459276, "loss": 0.208, "step": 8348 }, { "epoch": 0.014805240168478314, "grad_norm": 0.478515625, "learning_rate": 0.001973558140961157, "loss": 0.2023, "step": 8350 }, { "epoch": 0.014808786333788128, "grad_norm": 0.4375, "learning_rate": 0.001973543817855951, "loss": 0.2787, "step": 8352 }, { "epoch": 0.014812332499097944, "grad_norm": 0.421875, "learning_rate": 0.001973529490930372, "loss": 0.276, "step": 8354 }, { "epoch": 0.014815878664407759, "grad_norm": 0.57421875, "learning_rate": 0.0019735151601844815, "loss": 0.2239, "step": 8356 }, { "epoch": 0.014819424829717575, "grad_norm": 0.87109375, "learning_rate": 0.001973500825618344, "loss": 0.2352, "step": 8358 }, { "epoch": 0.01482297099502739, "grad_norm": 0.78515625, "learning_rate": 0.0019734864872320208, "loss": 0.2223, "step": 8360 }, { "epoch": 0.014826517160337204, "grad_norm": 0.3671875, "learning_rate": 0.0019734721450255753, "loss": 0.2678, "step": 8362 }, { "epoch": 0.01483006332564702, "grad_norm": 0.59375, "learning_rate": 0.0019734577989990697, "loss": 0.2802, "step": 8364 }, { "epoch": 0.014833609490956835, "grad_norm": 0.361328125, "learning_rate": 0.0019734434491525676, "loss": 0.1879, "step": 8366 }, { "epoch": 0.01483715565626665, "grad_norm": 0.3125, "learning_rate": 0.001973429095486131, "loss": 0.3326, "step": 8368 }, { "epoch": 0.014840701821576466, "grad_norm": 0.8125, "learning_rate": 0.0019734147379998224, "loss": 0.3008, "step": 8370 }, { "epoch": 0.01484424798688628, "grad_norm": 0.408203125, "learning_rate": 0.0019734003766937055, "loss": 0.168, "step": 8372 }, { "epoch": 0.014847794152196095, "grad_norm": 0.314453125, "learning_rate": 0.0019733860115678428, "loss": 0.2097, "step": 8374 }, { "epoch": 0.014851340317505912, "grad_norm": 1.234375, "learning_rate": 0.0019733716426222963, "loss": 0.2889, "step": 8376 }, { "epoch": 0.014854886482815726, "grad_norm": 0.60546875, "learning_rate": 0.0019733572698571304, "loss": 0.2187, "step": 8378 }, { "epoch": 0.01485843264812554, "grad_norm": 0.380859375, "learning_rate": 0.001973342893272407, "loss": 0.2115, "step": 8380 }, { "epoch": 0.014861978813435357, "grad_norm": 0.55078125, "learning_rate": 0.0019733285128681888, "loss": 0.2622, "step": 8382 }, { "epoch": 0.014865524978745171, "grad_norm": 1.109375, "learning_rate": 0.001973314128644539, "loss": 0.3458, "step": 8384 }, { "epoch": 0.014869071144054986, "grad_norm": 0.7109375, "learning_rate": 0.00197329974060152, "loss": 0.1967, "step": 8386 }, { "epoch": 0.014872617309364802, "grad_norm": 1.3828125, "learning_rate": 0.001973285348739196, "loss": 0.2537, "step": 8388 }, { "epoch": 0.014876163474674617, "grad_norm": 1.203125, "learning_rate": 0.001973270953057629, "loss": 0.4077, "step": 8390 }, { "epoch": 0.014879709639984433, "grad_norm": 0.5703125, "learning_rate": 0.0019732565535568822, "loss": 0.2731, "step": 8392 }, { "epoch": 0.014883255805294248, "grad_norm": 0.2734375, "learning_rate": 0.0019732421502370178, "loss": 0.2402, "step": 8394 }, { "epoch": 0.014886801970604062, "grad_norm": 0.25390625, "learning_rate": 0.0019732277430981, "loss": 0.2126, "step": 8396 }, { "epoch": 0.014890348135913879, "grad_norm": 0.5703125, "learning_rate": 0.0019732133321401914, "loss": 0.2764, "step": 8398 }, { "epoch": 0.014893894301223693, "grad_norm": 0.265625, "learning_rate": 0.0019731989173633548, "loss": 0.1994, "step": 8400 }, { "epoch": 0.014897440466533508, "grad_norm": 0.7421875, "learning_rate": 0.0019731844987676533, "loss": 0.2315, "step": 8402 }, { "epoch": 0.014900986631843324, "grad_norm": 0.31640625, "learning_rate": 0.00197317007635315, "loss": 0.2386, "step": 8404 }, { "epoch": 0.014904532797153138, "grad_norm": 0.34765625, "learning_rate": 0.001973155650119908, "loss": 0.2045, "step": 8406 }, { "epoch": 0.014908078962462953, "grad_norm": 0.3828125, "learning_rate": 0.00197314122006799, "loss": 0.2094, "step": 8408 }, { "epoch": 0.01491162512777277, "grad_norm": 0.75390625, "learning_rate": 0.0019731267861974604, "loss": 0.3569, "step": 8410 }, { "epoch": 0.014915171293082584, "grad_norm": 0.443359375, "learning_rate": 0.0019731123485083805, "loss": 0.237, "step": 8412 }, { "epoch": 0.014918717458392398, "grad_norm": 0.361328125, "learning_rate": 0.0019730979070008148, "loss": 0.3474, "step": 8414 }, { "epoch": 0.014922263623702215, "grad_norm": 0.74609375, "learning_rate": 0.001973083461674826, "loss": 0.3039, "step": 8416 }, { "epoch": 0.01492580978901203, "grad_norm": 1.203125, "learning_rate": 0.0019730690125304767, "loss": 0.2608, "step": 8418 }, { "epoch": 0.014929355954321844, "grad_norm": 0.7265625, "learning_rate": 0.0019730545595678314, "loss": 0.2046, "step": 8420 }, { "epoch": 0.01493290211963166, "grad_norm": 0.400390625, "learning_rate": 0.0019730401027869523, "loss": 0.2329, "step": 8422 }, { "epoch": 0.014936448284941475, "grad_norm": 0.53515625, "learning_rate": 0.001973025642187903, "loss": 0.2868, "step": 8424 }, { "epoch": 0.014939994450251291, "grad_norm": 0.400390625, "learning_rate": 0.001973011177770747, "loss": 0.2596, "step": 8426 }, { "epoch": 0.014943540615561106, "grad_norm": 1.8984375, "learning_rate": 0.0019729967095355465, "loss": 0.461, "step": 8428 }, { "epoch": 0.01494708678087092, "grad_norm": 4.15625, "learning_rate": 0.0019729822374823657, "loss": 0.3654, "step": 8430 }, { "epoch": 0.014950632946180736, "grad_norm": 0.55859375, "learning_rate": 0.001972967761611268, "loss": 0.4703, "step": 8432 }, { "epoch": 0.014954179111490551, "grad_norm": 0.81640625, "learning_rate": 0.001972953281922316, "loss": 0.2539, "step": 8434 }, { "epoch": 0.014957725276800365, "grad_norm": 0.3671875, "learning_rate": 0.0019729387984155737, "loss": 0.2693, "step": 8436 }, { "epoch": 0.014961271442110182, "grad_norm": 0.69140625, "learning_rate": 0.0019729243110911043, "loss": 0.3123, "step": 8438 }, { "epoch": 0.014964817607419996, "grad_norm": 1.0234375, "learning_rate": 0.001972909819948971, "loss": 0.2053, "step": 8440 }, { "epoch": 0.01496836377272981, "grad_norm": 0.5703125, "learning_rate": 0.0019728953249892366, "loss": 0.2043, "step": 8442 }, { "epoch": 0.014971909938039627, "grad_norm": 2.28125, "learning_rate": 0.0019728808262119654, "loss": 0.3579, "step": 8444 }, { "epoch": 0.014975456103349442, "grad_norm": 0.412109375, "learning_rate": 0.001972866323617221, "loss": 0.2023, "step": 8446 }, { "epoch": 0.014979002268659256, "grad_norm": 3.296875, "learning_rate": 0.0019728518172050656, "loss": 0.5548, "step": 8448 }, { "epoch": 0.014982548433969073, "grad_norm": 0.392578125, "learning_rate": 0.0019728373069755637, "loss": 0.2251, "step": 8450 }, { "epoch": 0.014986094599278887, "grad_norm": 0.8515625, "learning_rate": 0.001972822792928778, "loss": 0.2292, "step": 8452 }, { "epoch": 0.014989640764588702, "grad_norm": 0.69140625, "learning_rate": 0.001972808275064773, "loss": 0.2682, "step": 8454 }, { "epoch": 0.014993186929898518, "grad_norm": 0.80859375, "learning_rate": 0.0019727937533836116, "loss": 0.253, "step": 8456 }, { "epoch": 0.014996733095208332, "grad_norm": 0.5625, "learning_rate": 0.001972779227885357, "loss": 0.2328, "step": 8458 }, { "epoch": 0.015000279260518149, "grad_norm": 0.96875, "learning_rate": 0.001972764698570073, "loss": 0.2995, "step": 8460 }, { "epoch": 0.015003825425827963, "grad_norm": 0.349609375, "learning_rate": 0.0019727501654378233, "loss": 0.3771, "step": 8462 }, { "epoch": 0.015007371591137778, "grad_norm": 0.70703125, "learning_rate": 0.0019727356284886715, "loss": 0.2372, "step": 8464 }, { "epoch": 0.015010917756447594, "grad_norm": 0.5546875, "learning_rate": 0.0019727210877226804, "loss": 0.2373, "step": 8466 }, { "epoch": 0.015014463921757409, "grad_norm": 0.26953125, "learning_rate": 0.001972706543139915, "loss": 0.247, "step": 8468 }, { "epoch": 0.015018010087067223, "grad_norm": 1.4609375, "learning_rate": 0.0019726919947404375, "loss": 0.2755, "step": 8470 }, { "epoch": 0.01502155625237704, "grad_norm": 0.458984375, "learning_rate": 0.0019726774425243123, "loss": 0.1957, "step": 8472 }, { "epoch": 0.015025102417686854, "grad_norm": 0.828125, "learning_rate": 0.001972662886491603, "loss": 0.2928, "step": 8474 }, { "epoch": 0.015028648582996669, "grad_norm": 0.5546875, "learning_rate": 0.0019726483266423733, "loss": 0.1797, "step": 8476 }, { "epoch": 0.015032194748306485, "grad_norm": 0.37109375, "learning_rate": 0.001972633762976686, "loss": 0.2013, "step": 8478 }, { "epoch": 0.0150357409136163, "grad_norm": 0.337890625, "learning_rate": 0.0019726191954946063, "loss": 0.2781, "step": 8480 }, { "epoch": 0.015039287078926114, "grad_norm": 1.6640625, "learning_rate": 0.0019726046241961967, "loss": 0.2612, "step": 8482 }, { "epoch": 0.01504283324423593, "grad_norm": 2.296875, "learning_rate": 0.0019725900490815216, "loss": 0.2983, "step": 8484 }, { "epoch": 0.015046379409545745, "grad_norm": 0.35546875, "learning_rate": 0.0019725754701506444, "loss": 0.186, "step": 8486 }, { "epoch": 0.01504992557485556, "grad_norm": 1.796875, "learning_rate": 0.001972560887403629, "loss": 0.2253, "step": 8488 }, { "epoch": 0.015053471740165376, "grad_norm": 0.2490234375, "learning_rate": 0.0019725463008405386, "loss": 0.2858, "step": 8490 }, { "epoch": 0.01505701790547519, "grad_norm": 0.80078125, "learning_rate": 0.001972531710461438, "loss": 0.2459, "step": 8492 }, { "epoch": 0.015060564070785007, "grad_norm": 0.458984375, "learning_rate": 0.00197251711626639, "loss": 0.2563, "step": 8494 }, { "epoch": 0.015064110236094821, "grad_norm": 0.79296875, "learning_rate": 0.0019725025182554595, "loss": 0.3505, "step": 8496 }, { "epoch": 0.015067656401404636, "grad_norm": 1.6953125, "learning_rate": 0.0019724879164287096, "loss": 0.2718, "step": 8498 }, { "epoch": 0.015071202566714452, "grad_norm": 0.4453125, "learning_rate": 0.0019724733107862047, "loss": 0.3746, "step": 8500 }, { "epoch": 0.015074748732024267, "grad_norm": 0.88671875, "learning_rate": 0.001972458701328008, "loss": 0.2796, "step": 8502 }, { "epoch": 0.015078294897334081, "grad_norm": 0.396484375, "learning_rate": 0.001972444088054184, "loss": 0.1974, "step": 8504 }, { "epoch": 0.015081841062643897, "grad_norm": 1.4765625, "learning_rate": 0.001972429470964796, "loss": 0.259, "step": 8506 }, { "epoch": 0.015085387227953712, "grad_norm": 0.73828125, "learning_rate": 0.0019724148500599083, "loss": 0.3854, "step": 8508 }, { "epoch": 0.015088933393263526, "grad_norm": 0.78515625, "learning_rate": 0.001972400225339585, "loss": 0.2912, "step": 8510 }, { "epoch": 0.015092479558573343, "grad_norm": 2.046875, "learning_rate": 0.00197238559680389, "loss": 0.2514, "step": 8512 }, { "epoch": 0.015096025723883157, "grad_norm": 0.5234375, "learning_rate": 0.0019723709644528867, "loss": 0.2807, "step": 8514 }, { "epoch": 0.015099571889192972, "grad_norm": 0.458984375, "learning_rate": 0.00197235632828664, "loss": 0.251, "step": 8516 }, { "epoch": 0.015103118054502788, "grad_norm": 0.478515625, "learning_rate": 0.001972341688305213, "loss": 0.328, "step": 8518 }, { "epoch": 0.015106664219812603, "grad_norm": 0.5078125, "learning_rate": 0.00197232704450867, "loss": 0.2192, "step": 8520 }, { "epoch": 0.015110210385122417, "grad_norm": 1.5078125, "learning_rate": 0.001972312396897076, "loss": 0.2817, "step": 8522 }, { "epoch": 0.015113756550432234, "grad_norm": 0.8984375, "learning_rate": 0.001972297745470494, "loss": 0.221, "step": 8524 }, { "epoch": 0.015117302715742048, "grad_norm": 0.2734375, "learning_rate": 0.001972283090228988, "loss": 0.1854, "step": 8526 }, { "epoch": 0.015120848881051864, "grad_norm": 0.302734375, "learning_rate": 0.0019722684311726225, "loss": 0.2156, "step": 8528 }, { "epoch": 0.015124395046361679, "grad_norm": 0.45703125, "learning_rate": 0.0019722537683014617, "loss": 0.2754, "step": 8530 }, { "epoch": 0.015127941211671493, "grad_norm": 0.236328125, "learning_rate": 0.0019722391016155695, "loss": 0.2803, "step": 8532 }, { "epoch": 0.01513148737698131, "grad_norm": 0.439453125, "learning_rate": 0.0019722244311150103, "loss": 0.3082, "step": 8534 }, { "epoch": 0.015135033542291124, "grad_norm": 0.404296875, "learning_rate": 0.001972209756799848, "loss": 0.2524, "step": 8536 }, { "epoch": 0.015138579707600939, "grad_norm": 0.64453125, "learning_rate": 0.001972195078670147, "loss": 0.2449, "step": 8538 }, { "epoch": 0.015142125872910755, "grad_norm": 0.44140625, "learning_rate": 0.0019721803967259707, "loss": 0.251, "step": 8540 }, { "epoch": 0.01514567203822057, "grad_norm": 0.326171875, "learning_rate": 0.001972165710967385, "loss": 0.2039, "step": 8542 }, { "epoch": 0.015149218203530384, "grad_norm": 1.1171875, "learning_rate": 0.0019721510213944523, "loss": 0.2846, "step": 8544 }, { "epoch": 0.0151527643688402, "grad_norm": 0.333984375, "learning_rate": 0.001972136328007238, "loss": 0.2438, "step": 8546 }, { "epoch": 0.015156310534150015, "grad_norm": 1.1640625, "learning_rate": 0.001972121630805806, "loss": 0.2857, "step": 8548 }, { "epoch": 0.01515985669945983, "grad_norm": 0.326171875, "learning_rate": 0.0019721069297902205, "loss": 0.1888, "step": 8550 }, { "epoch": 0.015163402864769646, "grad_norm": 2.4375, "learning_rate": 0.001972092224960546, "loss": 0.3156, "step": 8552 }, { "epoch": 0.01516694903007946, "grad_norm": 0.431640625, "learning_rate": 0.001972077516316846, "loss": 0.2829, "step": 8554 }, { "epoch": 0.015170495195389275, "grad_norm": 0.55859375, "learning_rate": 0.0019720628038591864, "loss": 0.2099, "step": 8556 }, { "epoch": 0.015174041360699091, "grad_norm": 0.6484375, "learning_rate": 0.0019720480875876304, "loss": 0.287, "step": 8558 }, { "epoch": 0.015177587526008906, "grad_norm": 0.26953125, "learning_rate": 0.0019720333675022424, "loss": 0.2322, "step": 8560 }, { "epoch": 0.015181133691318722, "grad_norm": 0.474609375, "learning_rate": 0.001972018643603087, "loss": 0.2953, "step": 8562 }, { "epoch": 0.015184679856628537, "grad_norm": 0.22265625, "learning_rate": 0.0019720039158902286, "loss": 0.2044, "step": 8564 }, { "epoch": 0.015188226021938351, "grad_norm": 0.51953125, "learning_rate": 0.0019719891843637317, "loss": 0.2271, "step": 8566 }, { "epoch": 0.015191772187248168, "grad_norm": 1.0, "learning_rate": 0.0019719744490236607, "loss": 0.298, "step": 8568 }, { "epoch": 0.015195318352557982, "grad_norm": 0.412109375, "learning_rate": 0.0019719597098700795, "loss": 0.2512, "step": 8570 }, { "epoch": 0.015198864517867797, "grad_norm": 0.451171875, "learning_rate": 0.001971944966903054, "loss": 0.224, "step": 8572 }, { "epoch": 0.015202410683177613, "grad_norm": 2.53125, "learning_rate": 0.0019719302201226464, "loss": 0.3056, "step": 8574 }, { "epoch": 0.015205956848487428, "grad_norm": 0.435546875, "learning_rate": 0.0019719154695289234, "loss": 0.3421, "step": 8576 }, { "epoch": 0.015209503013797242, "grad_norm": 0.458984375, "learning_rate": 0.001971900715121948, "loss": 0.295, "step": 8578 }, { "epoch": 0.015213049179107058, "grad_norm": 0.494140625, "learning_rate": 0.001971885956901786, "loss": 0.2454, "step": 8580 }, { "epoch": 0.015216595344416873, "grad_norm": 0.4296875, "learning_rate": 0.0019718711948685007, "loss": 0.2786, "step": 8582 }, { "epoch": 0.015220141509726687, "grad_norm": 0.392578125, "learning_rate": 0.0019718564290221578, "loss": 0.2791, "step": 8584 }, { "epoch": 0.015223687675036504, "grad_norm": 0.5234375, "learning_rate": 0.001971841659362821, "loss": 0.2626, "step": 8586 }, { "epoch": 0.015227233840346318, "grad_norm": 0.34375, "learning_rate": 0.0019718268858905557, "loss": 0.1955, "step": 8588 }, { "epoch": 0.015230780005656133, "grad_norm": 0.58203125, "learning_rate": 0.001971812108605425, "loss": 0.2952, "step": 8590 }, { "epoch": 0.01523432617096595, "grad_norm": 0.66796875, "learning_rate": 0.001971797327507495, "loss": 0.2773, "step": 8592 }, { "epoch": 0.015237872336275764, "grad_norm": 0.5390625, "learning_rate": 0.0019717825425968304, "loss": 0.2793, "step": 8594 }, { "epoch": 0.01524141850158558, "grad_norm": 0.90625, "learning_rate": 0.001971767753873495, "loss": 0.2537, "step": 8596 }, { "epoch": 0.015244964666895395, "grad_norm": 0.474609375, "learning_rate": 0.0019717529613375536, "loss": 0.2958, "step": 8598 }, { "epoch": 0.015248510832205209, "grad_norm": 1.7578125, "learning_rate": 0.0019717381649890712, "loss": 0.3074, "step": 8600 }, { "epoch": 0.015252056997515025, "grad_norm": 0.2890625, "learning_rate": 0.0019717233648281125, "loss": 0.3694, "step": 8602 }, { "epoch": 0.01525560316282484, "grad_norm": 0.4296875, "learning_rate": 0.0019717085608547424, "loss": 0.2201, "step": 8604 }, { "epoch": 0.015259149328134654, "grad_norm": 0.265625, "learning_rate": 0.001971693753069025, "loss": 0.2749, "step": 8606 }, { "epoch": 0.01526269549344447, "grad_norm": 0.58203125, "learning_rate": 0.001971678941471026, "loss": 0.2294, "step": 8608 }, { "epoch": 0.015266241658754285, "grad_norm": 0.59375, "learning_rate": 0.0019716641260608086, "loss": 0.2263, "step": 8610 }, { "epoch": 0.0152697878240641, "grad_norm": 0.412109375, "learning_rate": 0.0019716493068384394, "loss": 0.2387, "step": 8612 }, { "epoch": 0.015273333989373916, "grad_norm": 0.326171875, "learning_rate": 0.0019716344838039824, "loss": 0.2777, "step": 8614 }, { "epoch": 0.01527688015468373, "grad_norm": 4.28125, "learning_rate": 0.0019716196569575027, "loss": 0.3567, "step": 8616 }, { "epoch": 0.015280426319993545, "grad_norm": 1.0234375, "learning_rate": 0.001971604826299065, "loss": 0.3221, "step": 8618 }, { "epoch": 0.015283972485303362, "grad_norm": 0.4296875, "learning_rate": 0.001971589991828733, "loss": 0.1985, "step": 8620 }, { "epoch": 0.015287518650613176, "grad_norm": 0.86328125, "learning_rate": 0.001971575153546574, "loss": 0.2859, "step": 8622 }, { "epoch": 0.01529106481592299, "grad_norm": 0.490234375, "learning_rate": 0.001971560311452651, "loss": 0.254, "step": 8624 }, { "epoch": 0.015294610981232807, "grad_norm": 1.828125, "learning_rate": 0.001971545465547029, "loss": 0.343, "step": 8626 }, { "epoch": 0.015298157146542622, "grad_norm": 0.49609375, "learning_rate": 0.001971530615829774, "loss": 0.3985, "step": 8628 }, { "epoch": 0.015301703311852438, "grad_norm": 0.3125, "learning_rate": 0.0019715157623009503, "loss": 0.2491, "step": 8630 }, { "epoch": 0.015305249477162252, "grad_norm": 0.353515625, "learning_rate": 0.0019715009049606227, "loss": 0.2225, "step": 8632 }, { "epoch": 0.015308795642472067, "grad_norm": 0.353515625, "learning_rate": 0.001971486043808856, "loss": 0.2721, "step": 8634 }, { "epoch": 0.015312341807781883, "grad_norm": 0.9375, "learning_rate": 0.001971471178845716, "loss": 0.2457, "step": 8636 }, { "epoch": 0.015315887973091698, "grad_norm": 0.369140625, "learning_rate": 0.001971456310071267, "loss": 0.2807, "step": 8638 }, { "epoch": 0.015319434138401512, "grad_norm": 0.439453125, "learning_rate": 0.001971441437485575, "loss": 0.2984, "step": 8640 }, { "epoch": 0.015322980303711329, "grad_norm": 0.296875, "learning_rate": 0.001971426561088704, "loss": 0.246, "step": 8642 }, { "epoch": 0.015326526469021143, "grad_norm": 0.5625, "learning_rate": 0.001971411680880719, "loss": 0.2864, "step": 8644 }, { "epoch": 0.015330072634330958, "grad_norm": 1.734375, "learning_rate": 0.001971396796861686, "loss": 0.2798, "step": 8646 }, { "epoch": 0.015333618799640774, "grad_norm": 0.4765625, "learning_rate": 0.0019713819090316693, "loss": 0.2598, "step": 8648 }, { "epoch": 0.015337164964950589, "grad_norm": 0.326171875, "learning_rate": 0.001971367017390734, "loss": 0.2185, "step": 8650 }, { "epoch": 0.015340711130260403, "grad_norm": 0.2265625, "learning_rate": 0.001971352121938946, "loss": 0.2564, "step": 8652 }, { "epoch": 0.01534425729557022, "grad_norm": 0.26953125, "learning_rate": 0.00197133722267637, "loss": 0.2426, "step": 8654 }, { "epoch": 0.015347803460880034, "grad_norm": 0.384765625, "learning_rate": 0.0019713223196030707, "loss": 0.2428, "step": 8656 }, { "epoch": 0.015351349626189848, "grad_norm": 1.2578125, "learning_rate": 0.001971307412719114, "loss": 0.5143, "step": 8658 }, { "epoch": 0.015354895791499665, "grad_norm": 0.6640625, "learning_rate": 0.0019712925020245646, "loss": 0.2882, "step": 8660 }, { "epoch": 0.01535844195680948, "grad_norm": 0.294921875, "learning_rate": 0.001971277587519488, "loss": 0.255, "step": 8662 }, { "epoch": 0.015361988122119296, "grad_norm": 0.53125, "learning_rate": 0.0019712626692039493, "loss": 0.2761, "step": 8664 }, { "epoch": 0.01536553428742911, "grad_norm": 0.314453125, "learning_rate": 0.001971247747078014, "loss": 0.2327, "step": 8666 }, { "epoch": 0.015369080452738925, "grad_norm": 0.345703125, "learning_rate": 0.001971232821141747, "loss": 0.2087, "step": 8668 }, { "epoch": 0.015372626618048741, "grad_norm": 0.36328125, "learning_rate": 0.0019712178913952137, "loss": 0.2459, "step": 8670 }, { "epoch": 0.015376172783358556, "grad_norm": 0.2236328125, "learning_rate": 0.0019712029578384796, "loss": 0.1898, "step": 8672 }, { "epoch": 0.01537971894866837, "grad_norm": 0.365234375, "learning_rate": 0.0019711880204716092, "loss": 0.2762, "step": 8674 }, { "epoch": 0.015383265113978186, "grad_norm": 0.416015625, "learning_rate": 0.001971173079294669, "loss": 0.3124, "step": 8676 }, { "epoch": 0.015386811279288001, "grad_norm": 0.50390625, "learning_rate": 0.0019711581343077236, "loss": 0.2829, "step": 8678 }, { "epoch": 0.015390357444597816, "grad_norm": 0.67578125, "learning_rate": 0.0019711431855108387, "loss": 0.2668, "step": 8680 }, { "epoch": 0.015393903609907632, "grad_norm": 0.8984375, "learning_rate": 0.001971128232904079, "loss": 0.2707, "step": 8682 }, { "epoch": 0.015397449775217446, "grad_norm": 1.8984375, "learning_rate": 0.001971113276487511, "loss": 0.3612, "step": 8684 }, { "epoch": 0.015400995940527261, "grad_norm": 0.76171875, "learning_rate": 0.001971098316261199, "loss": 0.2707, "step": 8686 }, { "epoch": 0.015404542105837077, "grad_norm": 0.3828125, "learning_rate": 0.0019710833522252097, "loss": 0.2478, "step": 8688 }, { "epoch": 0.015408088271146892, "grad_norm": 0.6953125, "learning_rate": 0.0019710683843796074, "loss": 0.2291, "step": 8690 }, { "epoch": 0.015411634436456706, "grad_norm": 0.703125, "learning_rate": 0.0019710534127244583, "loss": 0.2109, "step": 8692 }, { "epoch": 0.015415180601766523, "grad_norm": 0.47265625, "learning_rate": 0.001971038437259827, "loss": 0.231, "step": 8694 }, { "epoch": 0.015418726767076337, "grad_norm": 1.1640625, "learning_rate": 0.0019710234579857796, "loss": 0.3979, "step": 8696 }, { "epoch": 0.015422272932386153, "grad_norm": 0.640625, "learning_rate": 0.001971008474902382, "loss": 0.2348, "step": 8698 }, { "epoch": 0.015425819097695968, "grad_norm": 0.310546875, "learning_rate": 0.0019709934880096985, "loss": 0.2488, "step": 8700 }, { "epoch": 0.015429365263005783, "grad_norm": 0.26953125, "learning_rate": 0.001970978497307796, "loss": 0.2801, "step": 8702 }, { "epoch": 0.015432911428315599, "grad_norm": 0.462890625, "learning_rate": 0.0019709635027967396, "loss": 0.2071, "step": 8704 }, { "epoch": 0.015436457593625413, "grad_norm": 0.625, "learning_rate": 0.0019709485044765943, "loss": 0.2892, "step": 8706 }, { "epoch": 0.015440003758935228, "grad_norm": 0.28125, "learning_rate": 0.0019709335023474265, "loss": 0.1877, "step": 8708 }, { "epoch": 0.015443549924245044, "grad_norm": 1.875, "learning_rate": 0.0019709184964093016, "loss": 0.4012, "step": 8710 }, { "epoch": 0.015447096089554859, "grad_norm": 0.7421875, "learning_rate": 0.0019709034866622847, "loss": 0.2192, "step": 8712 }, { "epoch": 0.015450642254864673, "grad_norm": 1.3984375, "learning_rate": 0.0019708884731064417, "loss": 0.4603, "step": 8714 }, { "epoch": 0.01545418842017449, "grad_norm": 0.2373046875, "learning_rate": 0.0019708734557418385, "loss": 0.2656, "step": 8716 }, { "epoch": 0.015457734585484304, "grad_norm": 0.5234375, "learning_rate": 0.001970858434568541, "loss": 0.2632, "step": 8718 }, { "epoch": 0.015461280750794119, "grad_norm": 0.59375, "learning_rate": 0.0019708434095866145, "loss": 0.213, "step": 8720 }, { "epoch": 0.015464826916103935, "grad_norm": 1.3203125, "learning_rate": 0.0019708283807961247, "loss": 0.2399, "step": 8722 }, { "epoch": 0.01546837308141375, "grad_norm": 0.453125, "learning_rate": 0.001970813348197137, "loss": 0.6017, "step": 8724 }, { "epoch": 0.015471919246723564, "grad_norm": 0.73828125, "learning_rate": 0.0019707983117897183, "loss": 0.2586, "step": 8726 }, { "epoch": 0.01547546541203338, "grad_norm": 0.8359375, "learning_rate": 0.0019707832715739333, "loss": 0.4973, "step": 8728 }, { "epoch": 0.015479011577343195, "grad_norm": 1.2265625, "learning_rate": 0.001970768227549848, "loss": 0.3071, "step": 8730 }, { "epoch": 0.015482557742653011, "grad_norm": 0.412109375, "learning_rate": 0.001970753179717528, "loss": 0.2667, "step": 8732 }, { "epoch": 0.015486103907962826, "grad_norm": 0.443359375, "learning_rate": 0.0019707381280770395, "loss": 0.2113, "step": 8734 }, { "epoch": 0.01548965007327264, "grad_norm": 0.9453125, "learning_rate": 0.0019707230726284486, "loss": 0.2516, "step": 8736 }, { "epoch": 0.015493196238582457, "grad_norm": 0.287109375, "learning_rate": 0.0019707080133718204, "loss": 0.2966, "step": 8738 }, { "epoch": 0.015496742403892271, "grad_norm": 0.458984375, "learning_rate": 0.0019706929503072214, "loss": 0.2895, "step": 8740 }, { "epoch": 0.015500288569202086, "grad_norm": 0.369140625, "learning_rate": 0.001970677883434717, "loss": 0.2919, "step": 8742 }, { "epoch": 0.015503834734511902, "grad_norm": 1.828125, "learning_rate": 0.001970662812754373, "loss": 0.365, "step": 8744 }, { "epoch": 0.015507380899821717, "grad_norm": 0.6015625, "learning_rate": 0.001970647738266256, "loss": 0.1899, "step": 8746 }, { "epoch": 0.015510927065131531, "grad_norm": 0.7734375, "learning_rate": 0.0019706326599704315, "loss": 0.253, "step": 8748 }, { "epoch": 0.015514473230441347, "grad_norm": 0.33984375, "learning_rate": 0.0019706175778669652, "loss": 0.1949, "step": 8750 }, { "epoch": 0.015518019395751162, "grad_norm": 0.48046875, "learning_rate": 0.0019706024919559236, "loss": 0.2643, "step": 8752 }, { "epoch": 0.015521565561060977, "grad_norm": 0.484375, "learning_rate": 0.001970587402237372, "loss": 0.2585, "step": 8754 }, { "epoch": 0.015525111726370793, "grad_norm": 1.0, "learning_rate": 0.0019705723087113775, "loss": 0.3023, "step": 8756 }, { "epoch": 0.015528657891680607, "grad_norm": 0.37109375, "learning_rate": 0.001970557211378005, "loss": 0.2703, "step": 8758 }, { "epoch": 0.015532204056990422, "grad_norm": 0.359375, "learning_rate": 0.001970542110237321, "loss": 0.3066, "step": 8760 }, { "epoch": 0.015535750222300238, "grad_norm": 2.828125, "learning_rate": 0.0019705270052893914, "loss": 0.3463, "step": 8762 }, { "epoch": 0.015539296387610053, "grad_norm": 1.2734375, "learning_rate": 0.0019705118965342825, "loss": 0.3042, "step": 8764 }, { "epoch": 0.015542842552919869, "grad_norm": 2.359375, "learning_rate": 0.00197049678397206, "loss": 0.3152, "step": 8766 }, { "epoch": 0.015546388718229684, "grad_norm": 1.6640625, "learning_rate": 0.0019704816676027904, "loss": 0.4442, "step": 8768 }, { "epoch": 0.015549934883539498, "grad_norm": 3.171875, "learning_rate": 0.001970466547426539, "loss": 0.45, "step": 8770 }, { "epoch": 0.015553481048849314, "grad_norm": 1.703125, "learning_rate": 0.0019704514234433735, "loss": 0.2297, "step": 8772 }, { "epoch": 0.015557027214159129, "grad_norm": 1.28125, "learning_rate": 0.001970436295653359, "loss": 0.2423, "step": 8774 }, { "epoch": 0.015560573379468944, "grad_norm": 1.734375, "learning_rate": 0.0019704211640565613, "loss": 0.2454, "step": 8776 }, { "epoch": 0.01556411954477876, "grad_norm": 1.0234375, "learning_rate": 0.001970406028653047, "loss": 0.2548, "step": 8778 }, { "epoch": 0.015567665710088574, "grad_norm": 0.2490234375, "learning_rate": 0.0019703908894428824, "loss": 0.3037, "step": 8780 }, { "epoch": 0.015571211875398389, "grad_norm": 1.15625, "learning_rate": 0.001970375746426134, "loss": 0.2951, "step": 8782 }, { "epoch": 0.015574758040708205, "grad_norm": 0.40234375, "learning_rate": 0.001970360599602867, "loss": 0.2847, "step": 8784 }, { "epoch": 0.01557830420601802, "grad_norm": 0.431640625, "learning_rate": 0.001970345448973149, "loss": 0.1947, "step": 8786 }, { "epoch": 0.015581850371327834, "grad_norm": 0.42578125, "learning_rate": 0.001970330294537045, "loss": 0.2164, "step": 8788 }, { "epoch": 0.01558539653663765, "grad_norm": 0.6875, "learning_rate": 0.001970315136294622, "loss": 0.4597, "step": 8790 }, { "epoch": 0.015588942701947465, "grad_norm": 0.47265625, "learning_rate": 0.0019702999742459465, "loss": 0.2569, "step": 8792 }, { "epoch": 0.01559248886725728, "grad_norm": 1.8125, "learning_rate": 0.001970284808391084, "loss": 0.3938, "step": 8794 }, { "epoch": 0.015596035032567096, "grad_norm": 0.287109375, "learning_rate": 0.0019702696387301015, "loss": 0.1909, "step": 8796 }, { "epoch": 0.01559958119787691, "grad_norm": 1.015625, "learning_rate": 0.0019702544652630653, "loss": 0.2431, "step": 8798 }, { "epoch": 0.015603127363186727, "grad_norm": 0.40234375, "learning_rate": 0.001970239287990041, "loss": 0.2457, "step": 8800 }, { "epoch": 0.015606673528496541, "grad_norm": 1.4140625, "learning_rate": 0.001970224106911096, "loss": 0.2552, "step": 8802 }, { "epoch": 0.015610219693806356, "grad_norm": 0.4765625, "learning_rate": 0.001970208922026296, "loss": 0.2023, "step": 8804 }, { "epoch": 0.015613765859116172, "grad_norm": 0.52734375, "learning_rate": 0.001970193733335708, "loss": 0.2378, "step": 8806 }, { "epoch": 0.015617312024425987, "grad_norm": 0.55859375, "learning_rate": 0.001970178540839398, "loss": 0.3996, "step": 8808 }, { "epoch": 0.015620858189735801, "grad_norm": 0.50390625, "learning_rate": 0.0019701633445374325, "loss": 0.1683, "step": 8810 }, { "epoch": 0.015624404355045618, "grad_norm": 1.1875, "learning_rate": 0.0019701481444298775, "loss": 0.3738, "step": 8812 }, { "epoch": 0.01562795052035543, "grad_norm": 0.73828125, "learning_rate": 0.0019701329405168, "loss": 0.521, "step": 8814 }, { "epoch": 0.01563149668566525, "grad_norm": 0.7265625, "learning_rate": 0.001970117732798267, "loss": 0.4277, "step": 8816 }, { "epoch": 0.015635042850975063, "grad_norm": 0.314453125, "learning_rate": 0.001970102521274344, "loss": 0.2433, "step": 8818 }, { "epoch": 0.015638589016284878, "grad_norm": 0.322265625, "learning_rate": 0.0019700873059450984, "loss": 0.2382, "step": 8820 }, { "epoch": 0.015642135181594692, "grad_norm": 0.2431640625, "learning_rate": 0.0019700720868105963, "loss": 0.2892, "step": 8822 }, { "epoch": 0.015645681346904507, "grad_norm": 0.392578125, "learning_rate": 0.001970056863870904, "loss": 0.2265, "step": 8824 }, { "epoch": 0.015649227512214325, "grad_norm": 0.357421875, "learning_rate": 0.0019700416371260885, "loss": 0.2546, "step": 8826 }, { "epoch": 0.01565277367752414, "grad_norm": 2.359375, "learning_rate": 0.001970026406576216, "loss": 0.3298, "step": 8828 }, { "epoch": 0.015656319842833954, "grad_norm": 0.42578125, "learning_rate": 0.0019700111722213537, "loss": 0.304, "step": 8830 }, { "epoch": 0.01565986600814377, "grad_norm": 0.62890625, "learning_rate": 0.001969995934061568, "loss": 0.2647, "step": 8832 }, { "epoch": 0.015663412173453583, "grad_norm": 0.291015625, "learning_rate": 0.0019699806920969254, "loss": 0.2051, "step": 8834 }, { "epoch": 0.015666958338763397, "grad_norm": 1.1171875, "learning_rate": 0.0019699654463274925, "loss": 0.3798, "step": 8836 }, { "epoch": 0.015670504504073215, "grad_norm": 3.078125, "learning_rate": 0.0019699501967533357, "loss": 0.2796, "step": 8838 }, { "epoch": 0.01567405066938303, "grad_norm": 3.953125, "learning_rate": 0.0019699349433745226, "loss": 0.3843, "step": 8840 }, { "epoch": 0.015677596834692845, "grad_norm": 0.447265625, "learning_rate": 0.001969919686191119, "loss": 0.3353, "step": 8842 }, { "epoch": 0.01568114300000266, "grad_norm": 0.58984375, "learning_rate": 0.0019699044252031923, "loss": 0.193, "step": 8844 }, { "epoch": 0.015684689165312474, "grad_norm": 0.77734375, "learning_rate": 0.001969889160410809, "loss": 0.4398, "step": 8846 }, { "epoch": 0.015688235330622288, "grad_norm": 0.546875, "learning_rate": 0.0019698738918140354, "loss": 0.2342, "step": 8848 }, { "epoch": 0.015691781495932106, "grad_norm": 0.84765625, "learning_rate": 0.001969858619412939, "loss": 0.3275, "step": 8850 }, { "epoch": 0.01569532766124192, "grad_norm": 0.3828125, "learning_rate": 0.001969843343207586, "loss": 0.2707, "step": 8852 }, { "epoch": 0.015698873826551735, "grad_norm": 0.75390625, "learning_rate": 0.001969828063198044, "loss": 0.2012, "step": 8854 }, { "epoch": 0.01570241999186155, "grad_norm": 0.54296875, "learning_rate": 0.0019698127793843787, "loss": 0.2342, "step": 8856 }, { "epoch": 0.015705966157171364, "grad_norm": 0.6875, "learning_rate": 0.001969797491766658, "loss": 0.2103, "step": 8858 }, { "epoch": 0.015709512322481183, "grad_norm": 0.5546875, "learning_rate": 0.001969782200344948, "loss": 0.2587, "step": 8860 }, { "epoch": 0.015713058487790997, "grad_norm": 0.421875, "learning_rate": 0.0019697669051193163, "loss": 0.1839, "step": 8862 }, { "epoch": 0.01571660465310081, "grad_norm": 0.419921875, "learning_rate": 0.001969751606089829, "loss": 0.2522, "step": 8864 }, { "epoch": 0.015720150818410626, "grad_norm": 0.60546875, "learning_rate": 0.0019697363032565533, "loss": 0.3043, "step": 8866 }, { "epoch": 0.01572369698372044, "grad_norm": 1.171875, "learning_rate": 0.0019697209966195563, "loss": 0.3253, "step": 8868 }, { "epoch": 0.015727243149030255, "grad_norm": 0.376953125, "learning_rate": 0.001969705686178905, "loss": 0.3684, "step": 8870 }, { "epoch": 0.015730789314340073, "grad_norm": 0.2197265625, "learning_rate": 0.001969690371934666, "loss": 0.2541, "step": 8872 }, { "epoch": 0.015734335479649888, "grad_norm": 0.365234375, "learning_rate": 0.001969675053886907, "loss": 0.2577, "step": 8874 }, { "epoch": 0.015737881644959702, "grad_norm": 0.70703125, "learning_rate": 0.001969659732035694, "loss": 0.3146, "step": 8876 }, { "epoch": 0.015741427810269517, "grad_norm": 0.427734375, "learning_rate": 0.0019696444063810946, "loss": 0.2843, "step": 8878 }, { "epoch": 0.01574497397557933, "grad_norm": 0.365234375, "learning_rate": 0.0019696290769231754, "loss": 0.1831, "step": 8880 }, { "epoch": 0.015748520140889146, "grad_norm": 1.5234375, "learning_rate": 0.001969613743662004, "loss": 0.2378, "step": 8882 }, { "epoch": 0.015752066306198964, "grad_norm": 1.84375, "learning_rate": 0.0019695984065976474, "loss": 0.4394, "step": 8884 }, { "epoch": 0.01575561247150878, "grad_norm": 0.6015625, "learning_rate": 0.0019695830657301726, "loss": 0.2568, "step": 8886 }, { "epoch": 0.015759158636818593, "grad_norm": 0.55859375, "learning_rate": 0.0019695677210596466, "loss": 0.226, "step": 8888 }, { "epoch": 0.015762704802128408, "grad_norm": 0.2373046875, "learning_rate": 0.0019695523725861363, "loss": 0.1982, "step": 8890 }, { "epoch": 0.015766250967438222, "grad_norm": 0.35546875, "learning_rate": 0.0019695370203097087, "loss": 0.431, "step": 8892 }, { "epoch": 0.01576979713274804, "grad_norm": 0.37890625, "learning_rate": 0.0019695216642304316, "loss": 0.2, "step": 8894 }, { "epoch": 0.015773343298057855, "grad_norm": 0.83984375, "learning_rate": 0.001969506304348372, "loss": 0.2671, "step": 8896 }, { "epoch": 0.01577688946336767, "grad_norm": 0.404296875, "learning_rate": 0.0019694909406635965, "loss": 0.3697, "step": 8898 }, { "epoch": 0.015780435628677484, "grad_norm": 0.318359375, "learning_rate": 0.001969475573176173, "loss": 0.2599, "step": 8900 }, { "epoch": 0.0157839817939873, "grad_norm": 0.447265625, "learning_rate": 0.001969460201886168, "loss": 0.2766, "step": 8902 }, { "epoch": 0.015787527959297113, "grad_norm": 0.486328125, "learning_rate": 0.0019694448267936495, "loss": 0.2613, "step": 8904 }, { "epoch": 0.01579107412460693, "grad_norm": 0.37890625, "learning_rate": 0.0019694294478986843, "loss": 0.2224, "step": 8906 }, { "epoch": 0.015794620289916746, "grad_norm": 0.70703125, "learning_rate": 0.0019694140652013396, "loss": 0.2584, "step": 8908 }, { "epoch": 0.01579816645522656, "grad_norm": 0.66796875, "learning_rate": 0.0019693986787016828, "loss": 0.2002, "step": 8910 }, { "epoch": 0.015801712620536375, "grad_norm": 0.318359375, "learning_rate": 0.0019693832883997814, "loss": 0.1634, "step": 8912 }, { "epoch": 0.01580525878584619, "grad_norm": 0.76171875, "learning_rate": 0.0019693678942957022, "loss": 0.2095, "step": 8914 }, { "epoch": 0.015808804951156004, "grad_norm": 0.56640625, "learning_rate": 0.001969352496389513, "loss": 0.2213, "step": 8916 }, { "epoch": 0.015812351116465822, "grad_norm": 0.28125, "learning_rate": 0.001969337094681281, "loss": 0.183, "step": 8918 }, { "epoch": 0.015815897281775636, "grad_norm": 0.416015625, "learning_rate": 0.001969321689171073, "loss": 0.2282, "step": 8920 }, { "epoch": 0.01581944344708545, "grad_norm": 0.73046875, "learning_rate": 0.0019693062798589577, "loss": 0.3324, "step": 8922 }, { "epoch": 0.015822989612395266, "grad_norm": 0.5078125, "learning_rate": 0.0019692908667450016, "loss": 0.1827, "step": 8924 }, { "epoch": 0.01582653577770508, "grad_norm": 0.494140625, "learning_rate": 0.0019692754498292714, "loss": 0.2467, "step": 8926 }, { "epoch": 0.015830081943014898, "grad_norm": 0.671875, "learning_rate": 0.001969260029111836, "loss": 0.2548, "step": 8928 }, { "epoch": 0.015833628108324713, "grad_norm": 0.890625, "learning_rate": 0.001969244604592762, "loss": 0.2927, "step": 8930 }, { "epoch": 0.015837174273634527, "grad_norm": 0.23046875, "learning_rate": 0.001969229176272117, "loss": 0.2603, "step": 8932 }, { "epoch": 0.015840720438944342, "grad_norm": 0.2119140625, "learning_rate": 0.0019692137441499682, "loss": 0.2697, "step": 8934 }, { "epoch": 0.015844266604254156, "grad_norm": 0.419921875, "learning_rate": 0.0019691983082263838, "loss": 0.2379, "step": 8936 }, { "epoch": 0.01584781276956397, "grad_norm": 0.8203125, "learning_rate": 0.001969182868501431, "loss": 0.2241, "step": 8938 }, { "epoch": 0.01585135893487379, "grad_norm": 0.376953125, "learning_rate": 0.0019691674249751765, "loss": 0.2037, "step": 8940 }, { "epoch": 0.015854905100183603, "grad_norm": 0.63671875, "learning_rate": 0.0019691519776476895, "loss": 0.2639, "step": 8942 }, { "epoch": 0.015858451265493418, "grad_norm": 0.33984375, "learning_rate": 0.0019691365265190356, "loss": 0.191, "step": 8944 }, { "epoch": 0.015861997430803233, "grad_norm": 0.421875, "learning_rate": 0.001969121071589284, "loss": 0.2542, "step": 8946 }, { "epoch": 0.015865543596113047, "grad_norm": 0.73046875, "learning_rate": 0.001969105612858502, "loss": 0.2814, "step": 8948 }, { "epoch": 0.01586908976142286, "grad_norm": 0.765625, "learning_rate": 0.0019690901503267564, "loss": 0.2825, "step": 8950 }, { "epoch": 0.01587263592673268, "grad_norm": 0.76953125, "learning_rate": 0.001969074683994115, "loss": 0.3389, "step": 8952 }, { "epoch": 0.015876182092042494, "grad_norm": 0.40234375, "learning_rate": 0.0019690592138606466, "loss": 0.2813, "step": 8954 }, { "epoch": 0.01587972825735231, "grad_norm": 0.33203125, "learning_rate": 0.001969043739926417, "loss": 0.2227, "step": 8956 }, { "epoch": 0.015883274422662123, "grad_norm": 0.3203125, "learning_rate": 0.0019690282621914958, "loss": 0.3896, "step": 8958 }, { "epoch": 0.015886820587971938, "grad_norm": 2.5, "learning_rate": 0.0019690127806559496, "loss": 0.3129, "step": 8960 }, { "epoch": 0.015890366753281756, "grad_norm": 0.458984375, "learning_rate": 0.001968997295319846, "loss": 0.2696, "step": 8962 }, { "epoch": 0.01589391291859157, "grad_norm": 0.65625, "learning_rate": 0.0019689818061832533, "loss": 0.2344, "step": 8964 }, { "epoch": 0.015897459083901385, "grad_norm": 0.40234375, "learning_rate": 0.0019689663132462384, "loss": 0.2639, "step": 8966 }, { "epoch": 0.0159010052492112, "grad_norm": 0.98046875, "learning_rate": 0.00196895081650887, "loss": 0.259, "step": 8968 }, { "epoch": 0.015904551414521014, "grad_norm": 0.6640625, "learning_rate": 0.0019689353159712156, "loss": 0.1795, "step": 8970 }, { "epoch": 0.01590809757983083, "grad_norm": 0.671875, "learning_rate": 0.0019689198116333425, "loss": 0.2184, "step": 8972 }, { "epoch": 0.015911643745140647, "grad_norm": 1.03125, "learning_rate": 0.001968904303495319, "loss": 0.2089, "step": 8974 }, { "epoch": 0.01591518991045046, "grad_norm": 0.73046875, "learning_rate": 0.0019688887915572132, "loss": 0.2451, "step": 8976 }, { "epoch": 0.015918736075760276, "grad_norm": 0.4375, "learning_rate": 0.001968873275819092, "loss": 0.2544, "step": 8978 }, { "epoch": 0.01592228224107009, "grad_norm": 6.65625, "learning_rate": 0.001968857756281024, "loss": 0.4497, "step": 8980 }, { "epoch": 0.015925828406379905, "grad_norm": 0.546875, "learning_rate": 0.001968842232943077, "loss": 0.1765, "step": 8982 }, { "epoch": 0.01592937457168972, "grad_norm": 0.359375, "learning_rate": 0.0019688267058053186, "loss": 0.2274, "step": 8984 }, { "epoch": 0.015932920736999538, "grad_norm": 0.9296875, "learning_rate": 0.0019688111748678164, "loss": 0.2432, "step": 8986 }, { "epoch": 0.015936466902309352, "grad_norm": 4.28125, "learning_rate": 0.0019687956401306396, "loss": 0.2341, "step": 8988 }, { "epoch": 0.015940013067619167, "grad_norm": 4.1875, "learning_rate": 0.0019687801015938547, "loss": 0.3296, "step": 8990 }, { "epoch": 0.01594355923292898, "grad_norm": 0.470703125, "learning_rate": 0.001968764559257531, "loss": 0.213, "step": 8992 }, { "epoch": 0.015947105398238796, "grad_norm": 0.87890625, "learning_rate": 0.001968749013121735, "loss": 0.2537, "step": 8994 }, { "epoch": 0.015950651563548614, "grad_norm": 0.41796875, "learning_rate": 0.001968733463186536, "loss": 0.21, "step": 8996 }, { "epoch": 0.01595419772885843, "grad_norm": 0.400390625, "learning_rate": 0.001968717909452001, "loss": 0.2433, "step": 8998 }, { "epoch": 0.015957743894168243, "grad_norm": 0.515625, "learning_rate": 0.0019687023519181987, "loss": 0.3763, "step": 9000 }, { "epoch": 0.015961290059478057, "grad_norm": 0.6015625, "learning_rate": 0.001968686790585197, "loss": 0.2646, "step": 9002 }, { "epoch": 0.015964836224787872, "grad_norm": 0.59765625, "learning_rate": 0.0019686712254530637, "loss": 0.2571, "step": 9004 }, { "epoch": 0.015968382390097687, "grad_norm": 2.140625, "learning_rate": 0.001968655656521867, "loss": 0.4143, "step": 9006 }, { "epoch": 0.015971928555407505, "grad_norm": 0.28515625, "learning_rate": 0.0019686400837916752, "loss": 0.2215, "step": 9008 }, { "epoch": 0.01597547472071732, "grad_norm": 0.41015625, "learning_rate": 0.001968624507262556, "loss": 0.2064, "step": 9010 }, { "epoch": 0.015979020886027134, "grad_norm": 0.51953125, "learning_rate": 0.0019686089269345787, "loss": 0.2513, "step": 9012 }, { "epoch": 0.015982567051336948, "grad_norm": 0.498046875, "learning_rate": 0.0019685933428078097, "loss": 0.276, "step": 9014 }, { "epoch": 0.015986113216646763, "grad_norm": 0.37109375, "learning_rate": 0.0019685777548823177, "loss": 0.1765, "step": 9016 }, { "epoch": 0.015989659381956577, "grad_norm": 0.30859375, "learning_rate": 0.0019685621631581715, "loss": 0.1829, "step": 9018 }, { "epoch": 0.015993205547266395, "grad_norm": 2.296875, "learning_rate": 0.001968546567635439, "loss": 0.3764, "step": 9020 }, { "epoch": 0.01599675171257621, "grad_norm": 0.65234375, "learning_rate": 0.001968530968314188, "loss": 0.2513, "step": 9022 }, { "epoch": 0.016000297877886024, "grad_norm": 0.5, "learning_rate": 0.001968515365194487, "loss": 0.3134, "step": 9024 }, { "epoch": 0.01600384404319584, "grad_norm": 0.291015625, "learning_rate": 0.0019684997582764046, "loss": 0.1954, "step": 9026 }, { "epoch": 0.016007390208505654, "grad_norm": 0.29296875, "learning_rate": 0.001968484147560009, "loss": 0.1838, "step": 9028 }, { "epoch": 0.01601093637381547, "grad_norm": 0.498046875, "learning_rate": 0.001968468533045368, "loss": 0.347, "step": 9030 }, { "epoch": 0.016014482539125286, "grad_norm": 0.390625, "learning_rate": 0.0019684529147325496, "loss": 0.1951, "step": 9032 }, { "epoch": 0.0160180287044351, "grad_norm": 0.375, "learning_rate": 0.001968437292621623, "loss": 0.419, "step": 9034 }, { "epoch": 0.016021574869744915, "grad_norm": 2.125, "learning_rate": 0.0019684216667126566, "loss": 0.2516, "step": 9036 }, { "epoch": 0.01602512103505473, "grad_norm": 0.2373046875, "learning_rate": 0.0019684060370057177, "loss": 0.2799, "step": 9038 }, { "epoch": 0.016028667200364544, "grad_norm": 0.419921875, "learning_rate": 0.001968390403500875, "loss": 0.2165, "step": 9040 }, { "epoch": 0.016032213365674362, "grad_norm": 0.3046875, "learning_rate": 0.0019683747661981975, "loss": 0.2137, "step": 9042 }, { "epoch": 0.016035759530984177, "grad_norm": 0.7578125, "learning_rate": 0.001968359125097753, "loss": 0.3507, "step": 9044 }, { "epoch": 0.01603930569629399, "grad_norm": 0.462890625, "learning_rate": 0.00196834348019961, "loss": 0.2477, "step": 9046 }, { "epoch": 0.016042851861603806, "grad_norm": 0.7734375, "learning_rate": 0.0019683278315038378, "loss": 0.2222, "step": 9048 }, { "epoch": 0.01604639802691362, "grad_norm": 0.3671875, "learning_rate": 0.0019683121790105033, "loss": 0.2, "step": 9050 }, { "epoch": 0.016049944192223435, "grad_norm": 0.75390625, "learning_rate": 0.0019682965227196757, "loss": 0.2866, "step": 9052 }, { "epoch": 0.016053490357533253, "grad_norm": 0.2353515625, "learning_rate": 0.0019682808626314235, "loss": 0.2183, "step": 9054 }, { "epoch": 0.016057036522843068, "grad_norm": 1.75, "learning_rate": 0.0019682651987458157, "loss": 0.2317, "step": 9056 }, { "epoch": 0.016060582688152882, "grad_norm": 0.953125, "learning_rate": 0.00196824953106292, "loss": 0.2602, "step": 9058 }, { "epoch": 0.016064128853462697, "grad_norm": 0.4765625, "learning_rate": 0.0019682338595828045, "loss": 0.2026, "step": 9060 }, { "epoch": 0.01606767501877251, "grad_norm": 0.306640625, "learning_rate": 0.0019682181843055395, "loss": 0.2367, "step": 9062 }, { "epoch": 0.01607122118408233, "grad_norm": 1.203125, "learning_rate": 0.0019682025052311916, "loss": 0.3423, "step": 9064 }, { "epoch": 0.016074767349392144, "grad_norm": 0.365234375, "learning_rate": 0.001968186822359831, "loss": 0.2734, "step": 9066 }, { "epoch": 0.01607831351470196, "grad_norm": 0.55859375, "learning_rate": 0.001968171135691525, "loss": 0.1992, "step": 9068 }, { "epoch": 0.016081859680011773, "grad_norm": 0.9140625, "learning_rate": 0.001968155445226343, "loss": 0.3029, "step": 9070 }, { "epoch": 0.016085405845321588, "grad_norm": 0.578125, "learning_rate": 0.001968139750964353, "loss": 0.2207, "step": 9072 }, { "epoch": 0.016088952010631402, "grad_norm": 0.6640625, "learning_rate": 0.0019681240529056247, "loss": 0.3986, "step": 9074 }, { "epoch": 0.01609249817594122, "grad_norm": 0.267578125, "learning_rate": 0.0019681083510502254, "loss": 0.5211, "step": 9076 }, { "epoch": 0.016096044341251035, "grad_norm": 0.65625, "learning_rate": 0.0019680926453982243, "loss": 0.2527, "step": 9078 }, { "epoch": 0.01609959050656085, "grad_norm": 0.2734375, "learning_rate": 0.001968076935949691, "loss": 0.204, "step": 9080 }, { "epoch": 0.016103136671870664, "grad_norm": 1.6484375, "learning_rate": 0.001968061222704693, "loss": 0.3984, "step": 9082 }, { "epoch": 0.01610668283718048, "grad_norm": 0.52734375, "learning_rate": 0.0019680455056632993, "loss": 0.2289, "step": 9084 }, { "epoch": 0.016110229002490293, "grad_norm": 2.1875, "learning_rate": 0.001968029784825579, "loss": 0.3513, "step": 9086 }, { "epoch": 0.01611377516780011, "grad_norm": 1.2890625, "learning_rate": 0.0019680140601916005, "loss": 0.2703, "step": 9088 }, { "epoch": 0.016117321333109925, "grad_norm": 0.37890625, "learning_rate": 0.001967998331761433, "loss": 0.229, "step": 9090 }, { "epoch": 0.01612086749841974, "grad_norm": 0.6484375, "learning_rate": 0.0019679825995351447, "loss": 0.3204, "step": 9092 }, { "epoch": 0.016124413663729555, "grad_norm": 0.36328125, "learning_rate": 0.001967966863512805, "loss": 0.224, "step": 9094 }, { "epoch": 0.01612795982903937, "grad_norm": 0.57421875, "learning_rate": 0.0019679511236944816, "loss": 0.2061, "step": 9096 }, { "epoch": 0.016131505994349187, "grad_norm": 0.48828125, "learning_rate": 0.001967935380080245, "loss": 0.2472, "step": 9098 }, { "epoch": 0.016135052159659, "grad_norm": 0.466796875, "learning_rate": 0.0019679196326701626, "loss": 0.2555, "step": 9100 }, { "epoch": 0.016138598324968816, "grad_norm": 0.474609375, "learning_rate": 0.0019679038814643043, "loss": 0.1926, "step": 9102 }, { "epoch": 0.01614214449027863, "grad_norm": 0.38671875, "learning_rate": 0.001967888126462739, "loss": 0.2364, "step": 9104 }, { "epoch": 0.016145690655588445, "grad_norm": 0.35546875, "learning_rate": 0.001967872367665534, "loss": 0.2426, "step": 9106 }, { "epoch": 0.01614923682089826, "grad_norm": 0.90625, "learning_rate": 0.0019678566050727606, "loss": 0.2446, "step": 9108 }, { "epoch": 0.016152782986208078, "grad_norm": 0.455078125, "learning_rate": 0.001967840838684486, "loss": 0.2825, "step": 9110 }, { "epoch": 0.016156329151517893, "grad_norm": 0.515625, "learning_rate": 0.001967825068500779, "loss": 0.2445, "step": 9112 }, { "epoch": 0.016159875316827707, "grad_norm": 1.3046875, "learning_rate": 0.00196780929452171, "loss": 0.2415, "step": 9114 }, { "epoch": 0.01616342148213752, "grad_norm": 0.58203125, "learning_rate": 0.001967793516747347, "loss": 0.3694, "step": 9116 }, { "epoch": 0.016166967647447336, "grad_norm": 0.359375, "learning_rate": 0.0019677777351777594, "loss": 0.1842, "step": 9118 }, { "epoch": 0.01617051381275715, "grad_norm": 0.419921875, "learning_rate": 0.001967761949813016, "loss": 0.3075, "step": 9120 }, { "epoch": 0.01617405997806697, "grad_norm": 0.7734375, "learning_rate": 0.001967746160653186, "loss": 0.2139, "step": 9122 }, { "epoch": 0.016177606143376783, "grad_norm": 0.62890625, "learning_rate": 0.001967730367698338, "loss": 0.281, "step": 9124 }, { "epoch": 0.016181152308686598, "grad_norm": 0.78515625, "learning_rate": 0.0019677145709485417, "loss": 0.2777, "step": 9126 }, { "epoch": 0.016184698473996412, "grad_norm": 0.376953125, "learning_rate": 0.0019676987704038656, "loss": 0.2851, "step": 9128 }, { "epoch": 0.016188244639306227, "grad_norm": 0.61328125, "learning_rate": 0.001967682966064379, "loss": 0.2513, "step": 9130 }, { "epoch": 0.016191790804616045, "grad_norm": 0.50390625, "learning_rate": 0.001967667157930152, "loss": 0.2483, "step": 9132 }, { "epoch": 0.01619533696992586, "grad_norm": 0.88671875, "learning_rate": 0.001967651346001252, "loss": 0.2562, "step": 9134 }, { "epoch": 0.016198883135235674, "grad_norm": 0.85546875, "learning_rate": 0.0019676355302777494, "loss": 0.4384, "step": 9136 }, { "epoch": 0.01620242930054549, "grad_norm": 0.4140625, "learning_rate": 0.001967619710759713, "loss": 0.26, "step": 9138 }, { "epoch": 0.016205975465855303, "grad_norm": 0.365234375, "learning_rate": 0.0019676038874472117, "loss": 0.264, "step": 9140 }, { "epoch": 0.016209521631165118, "grad_norm": 0.6796875, "learning_rate": 0.001967588060340315, "loss": 0.3224, "step": 9142 }, { "epoch": 0.016213067796474936, "grad_norm": 0.248046875, "learning_rate": 0.0019675722294390918, "loss": 0.2915, "step": 9144 }, { "epoch": 0.01621661396178475, "grad_norm": 0.59765625, "learning_rate": 0.0019675563947436124, "loss": 0.2036, "step": 9146 }, { "epoch": 0.016220160127094565, "grad_norm": 1.3125, "learning_rate": 0.0019675405562539446, "loss": 0.2886, "step": 9148 }, { "epoch": 0.01622370629240438, "grad_norm": 0.8671875, "learning_rate": 0.0019675247139701586, "loss": 0.2406, "step": 9150 }, { "epoch": 0.016227252457714194, "grad_norm": 0.58984375, "learning_rate": 0.0019675088678923233, "loss": 0.2157, "step": 9152 }, { "epoch": 0.01623079862302401, "grad_norm": 1.4296875, "learning_rate": 0.001967493018020508, "loss": 0.2927, "step": 9154 }, { "epoch": 0.016234344788333827, "grad_norm": 0.64453125, "learning_rate": 0.001967477164354783, "loss": 0.2413, "step": 9156 }, { "epoch": 0.01623789095364364, "grad_norm": 0.77734375, "learning_rate": 0.0019674613068952156, "loss": 0.2759, "step": 9158 }, { "epoch": 0.016241437118953456, "grad_norm": 0.451171875, "learning_rate": 0.001967445445641877, "loss": 0.2807, "step": 9160 }, { "epoch": 0.01624498328426327, "grad_norm": 0.3984375, "learning_rate": 0.0019674295805948354, "loss": 0.2622, "step": 9162 }, { "epoch": 0.016248529449573085, "grad_norm": 2.921875, "learning_rate": 0.001967413711754161, "loss": 0.2902, "step": 9164 }, { "epoch": 0.016252075614882903, "grad_norm": 0.45703125, "learning_rate": 0.001967397839119923, "loss": 0.2736, "step": 9166 }, { "epoch": 0.016255621780192717, "grad_norm": 1.7265625, "learning_rate": 0.0019673819626921905, "loss": 0.3685, "step": 9168 }, { "epoch": 0.016259167945502532, "grad_norm": 0.57421875, "learning_rate": 0.0019673660824710334, "loss": 0.1939, "step": 9170 }, { "epoch": 0.016262714110812346, "grad_norm": 1.7265625, "learning_rate": 0.0019673501984565204, "loss": 0.3006, "step": 9172 }, { "epoch": 0.01626626027612216, "grad_norm": 1.0625, "learning_rate": 0.001967334310648722, "loss": 0.2636, "step": 9174 }, { "epoch": 0.016269806441431976, "grad_norm": 0.33203125, "learning_rate": 0.0019673184190477067, "loss": 0.3767, "step": 9176 }, { "epoch": 0.016273352606741794, "grad_norm": 1.171875, "learning_rate": 0.0019673025236535448, "loss": 0.2422, "step": 9178 }, { "epoch": 0.016276898772051608, "grad_norm": 0.45703125, "learning_rate": 0.0019672866244663054, "loss": 0.1636, "step": 9180 }, { "epoch": 0.016280444937361423, "grad_norm": 0.32421875, "learning_rate": 0.0019672707214860577, "loss": 0.2745, "step": 9182 }, { "epoch": 0.016283991102671237, "grad_norm": 1.2734375, "learning_rate": 0.001967254814712872, "loss": 0.2408, "step": 9184 }, { "epoch": 0.016287537267981052, "grad_norm": 0.412109375, "learning_rate": 0.0019672389041468174, "loss": 0.2722, "step": 9186 }, { "epoch": 0.016291083433290866, "grad_norm": 0.83984375, "learning_rate": 0.0019672229897879636, "loss": 0.2187, "step": 9188 }, { "epoch": 0.016294629598600684, "grad_norm": 0.40234375, "learning_rate": 0.00196720707163638, "loss": 0.3051, "step": 9190 }, { "epoch": 0.0162981757639105, "grad_norm": 0.3671875, "learning_rate": 0.0019671911496921364, "loss": 0.2302, "step": 9192 }, { "epoch": 0.016301721929220313, "grad_norm": 0.46484375, "learning_rate": 0.0019671752239553025, "loss": 0.3003, "step": 9194 }, { "epoch": 0.016305268094530128, "grad_norm": 1.2265625, "learning_rate": 0.001967159294425948, "loss": 0.3314, "step": 9196 }, { "epoch": 0.016308814259839943, "grad_norm": 1.03125, "learning_rate": 0.0019671433611041424, "loss": 0.2185, "step": 9198 }, { "epoch": 0.01631236042514976, "grad_norm": 0.494140625, "learning_rate": 0.0019671274239899555, "loss": 0.2651, "step": 9200 }, { "epoch": 0.016315906590459575, "grad_norm": 0.52734375, "learning_rate": 0.001967111483083457, "loss": 0.2236, "step": 9202 }, { "epoch": 0.01631945275576939, "grad_norm": 0.87890625, "learning_rate": 0.001967095538384716, "loss": 0.2779, "step": 9204 }, { "epoch": 0.016322998921079204, "grad_norm": 0.39453125, "learning_rate": 0.0019670795898938036, "loss": 0.3884, "step": 9206 }, { "epoch": 0.01632654508638902, "grad_norm": 2.5, "learning_rate": 0.0019670636376107886, "loss": 0.2762, "step": 9208 }, { "epoch": 0.016330091251698833, "grad_norm": 0.53125, "learning_rate": 0.0019670476815357406, "loss": 0.2428, "step": 9210 }, { "epoch": 0.01633363741700865, "grad_norm": 0.38671875, "learning_rate": 0.0019670317216687297, "loss": 0.2458, "step": 9212 }, { "epoch": 0.016337183582318466, "grad_norm": 0.384765625, "learning_rate": 0.0019670157580098254, "loss": 0.2737, "step": 9214 }, { "epoch": 0.01634072974762828, "grad_norm": 2.3125, "learning_rate": 0.0019669997905590983, "loss": 0.2491, "step": 9216 }, { "epoch": 0.016344275912938095, "grad_norm": 1.2421875, "learning_rate": 0.0019669838193166174, "loss": 0.2095, "step": 9218 }, { "epoch": 0.01634782207824791, "grad_norm": 0.369140625, "learning_rate": 0.0019669678442824534, "loss": 0.2204, "step": 9220 }, { "epoch": 0.016351368243557724, "grad_norm": 0.3046875, "learning_rate": 0.0019669518654566753, "loss": 0.258, "step": 9222 }, { "epoch": 0.016354914408867542, "grad_norm": 0.357421875, "learning_rate": 0.0019669358828393532, "loss": 0.1901, "step": 9224 }, { "epoch": 0.016358460574177357, "grad_norm": 0.302734375, "learning_rate": 0.001966919896430557, "loss": 0.2416, "step": 9226 }, { "epoch": 0.01636200673948717, "grad_norm": 2.5625, "learning_rate": 0.001966903906230357, "loss": 0.3124, "step": 9228 }, { "epoch": 0.016365552904796986, "grad_norm": 0.29296875, "learning_rate": 0.001966887912238823, "loss": 0.5054, "step": 9230 }, { "epoch": 0.0163690990701068, "grad_norm": 0.443359375, "learning_rate": 0.0019668719144560246, "loss": 0.2961, "step": 9232 }, { "epoch": 0.01637264523541662, "grad_norm": 0.703125, "learning_rate": 0.0019668559128820317, "loss": 0.2606, "step": 9234 }, { "epoch": 0.016376191400726433, "grad_norm": 0.80078125, "learning_rate": 0.001966839907516915, "loss": 0.2821, "step": 9236 }, { "epoch": 0.016379737566036248, "grad_norm": 0.62109375, "learning_rate": 0.0019668238983607442, "loss": 0.2284, "step": 9238 }, { "epoch": 0.016383283731346062, "grad_norm": 3.140625, "learning_rate": 0.001966807885413589, "loss": 0.2976, "step": 9240 }, { "epoch": 0.016386829896655877, "grad_norm": 1.546875, "learning_rate": 0.0019667918686755194, "loss": 0.3194, "step": 9242 }, { "epoch": 0.01639037606196569, "grad_norm": 0.92578125, "learning_rate": 0.001966775848146606, "loss": 0.1863, "step": 9244 }, { "epoch": 0.01639392222727551, "grad_norm": 0.279296875, "learning_rate": 0.0019667598238269184, "loss": 0.1665, "step": 9246 }, { "epoch": 0.016397468392585324, "grad_norm": 0.490234375, "learning_rate": 0.0019667437957165265, "loss": 0.2047, "step": 9248 }, { "epoch": 0.01640101455789514, "grad_norm": 0.62109375, "learning_rate": 0.001966727763815501, "loss": 0.3, "step": 9250 }, { "epoch": 0.016404560723204953, "grad_norm": 1.6953125, "learning_rate": 0.0019667117281239118, "loss": 0.3029, "step": 9252 }, { "epoch": 0.016408106888514767, "grad_norm": 0.84375, "learning_rate": 0.001966695688641829, "loss": 0.3038, "step": 9254 }, { "epoch": 0.016411653053824582, "grad_norm": 0.75390625, "learning_rate": 0.0019666796453693225, "loss": 0.2118, "step": 9256 }, { "epoch": 0.0164151992191344, "grad_norm": 0.8046875, "learning_rate": 0.0019666635983064625, "loss": 0.2733, "step": 9258 }, { "epoch": 0.016418745384444215, "grad_norm": 0.443359375, "learning_rate": 0.0019666475474533194, "loss": 0.1609, "step": 9260 }, { "epoch": 0.01642229154975403, "grad_norm": 0.353515625, "learning_rate": 0.0019666314928099638, "loss": 0.2794, "step": 9262 }, { "epoch": 0.016425837715063844, "grad_norm": 0.94140625, "learning_rate": 0.001966615434376465, "loss": 0.2715, "step": 9264 }, { "epoch": 0.016429383880373658, "grad_norm": 0.45703125, "learning_rate": 0.0019665993721528934, "loss": 0.2159, "step": 9266 }, { "epoch": 0.016432930045683476, "grad_norm": 0.8359375, "learning_rate": 0.00196658330613932, "loss": 0.2003, "step": 9268 }, { "epoch": 0.01643647621099329, "grad_norm": 0.353515625, "learning_rate": 0.0019665672363358144, "loss": 0.1921, "step": 9270 }, { "epoch": 0.016440022376303105, "grad_norm": 0.55859375, "learning_rate": 0.0019665511627424475, "loss": 0.2469, "step": 9272 }, { "epoch": 0.01644356854161292, "grad_norm": 0.3203125, "learning_rate": 0.001966535085359289, "loss": 0.2532, "step": 9274 }, { "epoch": 0.016447114706922734, "grad_norm": 0.65234375, "learning_rate": 0.001966519004186409, "loss": 0.2338, "step": 9276 }, { "epoch": 0.01645066087223255, "grad_norm": 0.396484375, "learning_rate": 0.001966502919223878, "loss": 0.2488, "step": 9278 }, { "epoch": 0.016454207037542367, "grad_norm": 0.53515625, "learning_rate": 0.0019664868304717672, "loss": 0.1875, "step": 9280 }, { "epoch": 0.01645775320285218, "grad_norm": 0.79296875, "learning_rate": 0.001966470737930146, "loss": 0.2337, "step": 9282 }, { "epoch": 0.016461299368161996, "grad_norm": 1.4453125, "learning_rate": 0.0019664546415990854, "loss": 0.3458, "step": 9284 }, { "epoch": 0.01646484553347181, "grad_norm": 0.56640625, "learning_rate": 0.0019664385414786554, "loss": 0.2514, "step": 9286 }, { "epoch": 0.016468391698781625, "grad_norm": 0.369140625, "learning_rate": 0.001966422437568926, "loss": 0.2683, "step": 9288 }, { "epoch": 0.01647193786409144, "grad_norm": 0.421875, "learning_rate": 0.001966406329869969, "loss": 0.2173, "step": 9290 }, { "epoch": 0.016475484029401258, "grad_norm": 0.447265625, "learning_rate": 0.001966390218381853, "loss": 0.2834, "step": 9292 }, { "epoch": 0.016479030194711072, "grad_norm": 1.390625, "learning_rate": 0.00196637410310465, "loss": 0.2814, "step": 9294 }, { "epoch": 0.016482576360020887, "grad_norm": 0.8125, "learning_rate": 0.0019663579840384303, "loss": 0.2642, "step": 9296 }, { "epoch": 0.0164861225253307, "grad_norm": 0.53125, "learning_rate": 0.0019663418611832635, "loss": 0.2865, "step": 9298 }, { "epoch": 0.016489668690640516, "grad_norm": 0.419921875, "learning_rate": 0.001966325734539221, "loss": 0.212, "step": 9300 }, { "epoch": 0.016493214855950334, "grad_norm": 0.6953125, "learning_rate": 0.001966309604106372, "loss": 0.3237, "step": 9302 }, { "epoch": 0.01649676102126015, "grad_norm": 0.36328125, "learning_rate": 0.001966293469884789, "loss": 0.2307, "step": 9304 }, { "epoch": 0.016500307186569963, "grad_norm": 0.42578125, "learning_rate": 0.001966277331874541, "loss": 0.2543, "step": 9306 }, { "epoch": 0.016503853351879778, "grad_norm": 0.82421875, "learning_rate": 0.0019662611900756995, "loss": 0.2334, "step": 9308 }, { "epoch": 0.016507399517189592, "grad_norm": 0.9609375, "learning_rate": 0.001966245044488335, "loss": 0.2651, "step": 9310 }, { "epoch": 0.016510945682499407, "grad_norm": 0.412109375, "learning_rate": 0.0019662288951125175, "loss": 0.1734, "step": 9312 }, { "epoch": 0.016514491847809225, "grad_norm": 0.359375, "learning_rate": 0.0019662127419483176, "loss": 0.2603, "step": 9314 }, { "epoch": 0.01651803801311904, "grad_norm": 0.59375, "learning_rate": 0.001966196584995807, "loss": 0.2612, "step": 9316 }, { "epoch": 0.016521584178428854, "grad_norm": 0.375, "learning_rate": 0.0019661804242550552, "loss": 0.2759, "step": 9318 }, { "epoch": 0.01652513034373867, "grad_norm": 0.6484375, "learning_rate": 0.0019661642597261337, "loss": 0.2949, "step": 9320 }, { "epoch": 0.016528676509048483, "grad_norm": 1.2265625, "learning_rate": 0.0019661480914091125, "loss": 0.2587, "step": 9322 }, { "epoch": 0.016532222674358298, "grad_norm": 0.6875, "learning_rate": 0.001966131919304063, "loss": 0.282, "step": 9324 }, { "epoch": 0.016535768839668116, "grad_norm": 0.3828125, "learning_rate": 0.0019661157434110554, "loss": 0.2139, "step": 9326 }, { "epoch": 0.01653931500497793, "grad_norm": 0.259765625, "learning_rate": 0.0019660995637301604, "loss": 0.3594, "step": 9328 }, { "epoch": 0.016542861170287745, "grad_norm": 0.328125, "learning_rate": 0.0019660833802614495, "loss": 0.2163, "step": 9330 }, { "epoch": 0.01654640733559756, "grad_norm": 0.373046875, "learning_rate": 0.001966067193004993, "loss": 0.2012, "step": 9332 }, { "epoch": 0.016549953500907374, "grad_norm": 0.423828125, "learning_rate": 0.001966051001960861, "loss": 0.2219, "step": 9334 }, { "epoch": 0.016553499666217192, "grad_norm": 0.6875, "learning_rate": 0.0019660348071291254, "loss": 0.2263, "step": 9336 }, { "epoch": 0.016557045831527006, "grad_norm": 0.28515625, "learning_rate": 0.001966018608509857, "loss": 0.2248, "step": 9338 }, { "epoch": 0.01656059199683682, "grad_norm": 0.609375, "learning_rate": 0.001966002406103126, "loss": 0.2741, "step": 9340 }, { "epoch": 0.016564138162146635, "grad_norm": 0.255859375, "learning_rate": 0.0019659861999090033, "loss": 0.2063, "step": 9342 }, { "epoch": 0.01656768432745645, "grad_norm": 0.40234375, "learning_rate": 0.00196596998992756, "loss": 0.2669, "step": 9344 }, { "epoch": 0.016571230492766265, "grad_norm": 0.75, "learning_rate": 0.001965953776158867, "loss": 0.2563, "step": 9346 }, { "epoch": 0.016574776658076083, "grad_norm": 0.404296875, "learning_rate": 0.001965937558602995, "loss": 0.4689, "step": 9348 }, { "epoch": 0.016578322823385897, "grad_norm": 0.380859375, "learning_rate": 0.0019659213372600157, "loss": 0.2028, "step": 9350 }, { "epoch": 0.01658186898869571, "grad_norm": 0.435546875, "learning_rate": 0.001965905112129999, "loss": 0.2586, "step": 9352 }, { "epoch": 0.016585415154005526, "grad_norm": 0.3671875, "learning_rate": 0.0019658888832130164, "loss": 0.2505, "step": 9354 }, { "epoch": 0.01658896131931534, "grad_norm": 0.30859375, "learning_rate": 0.0019658726505091388, "loss": 0.2558, "step": 9356 }, { "epoch": 0.016592507484625155, "grad_norm": 3.703125, "learning_rate": 0.0019658564140184374, "loss": 0.2322, "step": 9358 }, { "epoch": 0.016596053649934973, "grad_norm": 0.34765625, "learning_rate": 0.0019658401737409825, "loss": 0.2673, "step": 9360 }, { "epoch": 0.016599599815244788, "grad_norm": 0.84375, "learning_rate": 0.001965823929676846, "loss": 0.2613, "step": 9362 }, { "epoch": 0.016603145980554603, "grad_norm": 0.365234375, "learning_rate": 0.0019658076818260986, "loss": 0.2405, "step": 9364 }, { "epoch": 0.016606692145864417, "grad_norm": 0.80078125, "learning_rate": 0.001965791430188811, "loss": 0.2341, "step": 9366 }, { "epoch": 0.01661023831117423, "grad_norm": 0.2333984375, "learning_rate": 0.0019657751747650548, "loss": 0.2252, "step": 9368 }, { "epoch": 0.01661378447648405, "grad_norm": 0.376953125, "learning_rate": 0.0019657589155549007, "loss": 0.2062, "step": 9370 }, { "epoch": 0.016617330641793864, "grad_norm": 0.6796875, "learning_rate": 0.0019657426525584204, "loss": 0.3197, "step": 9372 }, { "epoch": 0.01662087680710368, "grad_norm": 0.314453125, "learning_rate": 0.0019657263857756845, "loss": 0.2255, "step": 9374 }, { "epoch": 0.016624422972413493, "grad_norm": 0.3359375, "learning_rate": 0.001965710115206764, "loss": 0.2125, "step": 9376 }, { "epoch": 0.016627969137723308, "grad_norm": 0.52734375, "learning_rate": 0.001965693840851731, "loss": 0.2745, "step": 9378 }, { "epoch": 0.016631515303033122, "grad_norm": 0.337890625, "learning_rate": 0.0019656775627106553, "loss": 0.2305, "step": 9380 }, { "epoch": 0.01663506146834294, "grad_norm": 0.40625, "learning_rate": 0.001965661280783609, "loss": 0.2013, "step": 9382 }, { "epoch": 0.016638607633652755, "grad_norm": 0.50390625, "learning_rate": 0.001965644995070663, "loss": 0.2655, "step": 9384 }, { "epoch": 0.01664215379896257, "grad_norm": 0.396484375, "learning_rate": 0.0019656287055718883, "loss": 0.338, "step": 9386 }, { "epoch": 0.016645699964272384, "grad_norm": 0.46484375, "learning_rate": 0.0019656124122873573, "loss": 0.1839, "step": 9388 }, { "epoch": 0.0166492461295822, "grad_norm": 0.470703125, "learning_rate": 0.0019655961152171397, "loss": 0.2716, "step": 9390 }, { "epoch": 0.016652792294892013, "grad_norm": 0.462890625, "learning_rate": 0.001965579814361308, "loss": 0.2017, "step": 9392 }, { "epoch": 0.01665633846020183, "grad_norm": 1.9375, "learning_rate": 0.0019655635097199325, "loss": 0.3478, "step": 9394 }, { "epoch": 0.016659884625511646, "grad_norm": 0.25390625, "learning_rate": 0.0019655472012930853, "loss": 0.2572, "step": 9396 }, { "epoch": 0.01666343079082146, "grad_norm": 0.8828125, "learning_rate": 0.0019655308890808372, "loss": 0.2865, "step": 9398 }, { "epoch": 0.016666976956131275, "grad_norm": 0.41796875, "learning_rate": 0.00196551457308326, "loss": 0.2431, "step": 9400 }, { "epoch": 0.01667052312144109, "grad_norm": 0.298828125, "learning_rate": 0.0019654982533004245, "loss": 0.4089, "step": 9402 }, { "epoch": 0.016674069286750907, "grad_norm": 0.89453125, "learning_rate": 0.0019654819297324024, "loss": 0.2949, "step": 9404 }, { "epoch": 0.016677615452060722, "grad_norm": 1.125, "learning_rate": 0.0019654656023792654, "loss": 0.4328, "step": 9406 }, { "epoch": 0.016681161617370537, "grad_norm": 0.373046875, "learning_rate": 0.0019654492712410845, "loss": 0.2589, "step": 9408 }, { "epoch": 0.01668470778268035, "grad_norm": 0.451171875, "learning_rate": 0.001965432936317931, "loss": 0.3978, "step": 9410 }, { "epoch": 0.016688253947990166, "grad_norm": 0.70703125, "learning_rate": 0.0019654165976098767, "loss": 0.2142, "step": 9412 }, { "epoch": 0.01669180011329998, "grad_norm": 0.408203125, "learning_rate": 0.0019654002551169926, "loss": 0.208, "step": 9414 }, { "epoch": 0.016695346278609798, "grad_norm": 0.59375, "learning_rate": 0.001965383908839351, "loss": 0.2061, "step": 9416 }, { "epoch": 0.016698892443919613, "grad_norm": 0.8828125, "learning_rate": 0.0019653675587770225, "loss": 0.4743, "step": 9418 }, { "epoch": 0.016702438609229427, "grad_norm": 1.0625, "learning_rate": 0.001965351204930079, "loss": 0.2348, "step": 9420 }, { "epoch": 0.016705984774539242, "grad_norm": 0.7734375, "learning_rate": 0.001965334847298592, "loss": 0.2371, "step": 9422 }, { "epoch": 0.016709530939849056, "grad_norm": 2.40625, "learning_rate": 0.0019653184858826334, "loss": 0.2486, "step": 9424 }, { "epoch": 0.01671307710515887, "grad_norm": 0.3828125, "learning_rate": 0.0019653021206822736, "loss": 0.2321, "step": 9426 }, { "epoch": 0.01671662327046869, "grad_norm": 0.404296875, "learning_rate": 0.0019652857516975855, "loss": 0.1857, "step": 9428 }, { "epoch": 0.016720169435778504, "grad_norm": 0.5234375, "learning_rate": 0.00196526937892864, "loss": 0.2349, "step": 9430 }, { "epoch": 0.016723715601088318, "grad_norm": 0.3984375, "learning_rate": 0.0019652530023755088, "loss": 0.1952, "step": 9432 }, { "epoch": 0.016727261766398133, "grad_norm": 0.453125, "learning_rate": 0.0019652366220382638, "loss": 0.2145, "step": 9434 }, { "epoch": 0.016730807931707947, "grad_norm": 0.54296875, "learning_rate": 0.001965220237916976, "loss": 0.1916, "step": 9436 }, { "epoch": 0.016734354097017765, "grad_norm": 0.63671875, "learning_rate": 0.0019652038500117177, "loss": 0.2237, "step": 9438 }, { "epoch": 0.01673790026232758, "grad_norm": 0.6484375, "learning_rate": 0.00196518745832256, "loss": 0.2424, "step": 9440 }, { "epoch": 0.016741446427637394, "grad_norm": 0.71484375, "learning_rate": 0.0019651710628495757, "loss": 0.276, "step": 9442 }, { "epoch": 0.01674499259294721, "grad_norm": 0.62109375, "learning_rate": 0.0019651546635928354, "loss": 0.3992, "step": 9444 }, { "epoch": 0.016748538758257023, "grad_norm": 0.3046875, "learning_rate": 0.0019651382605524108, "loss": 0.1902, "step": 9446 }, { "epoch": 0.016752084923566838, "grad_norm": 0.84375, "learning_rate": 0.0019651218537283742, "loss": 0.232, "step": 9448 }, { "epoch": 0.016755631088876656, "grad_norm": 1.234375, "learning_rate": 0.0019651054431207974, "loss": 0.1949, "step": 9450 }, { "epoch": 0.01675917725418647, "grad_norm": 1.0078125, "learning_rate": 0.0019650890287297514, "loss": 0.3047, "step": 9452 }, { "epoch": 0.016762723419496285, "grad_norm": 0.48046875, "learning_rate": 0.0019650726105553086, "loss": 0.2615, "step": 9454 }, { "epoch": 0.0167662695848061, "grad_norm": 0.61328125, "learning_rate": 0.001965056188597541, "loss": 0.2362, "step": 9456 }, { "epoch": 0.016769815750115914, "grad_norm": 0.302734375, "learning_rate": 0.00196503976285652, "loss": 0.2349, "step": 9458 }, { "epoch": 0.01677336191542573, "grad_norm": 1.03125, "learning_rate": 0.0019650233333323172, "loss": 0.2014, "step": 9460 }, { "epoch": 0.016776908080735547, "grad_norm": 0.337890625, "learning_rate": 0.001965006900025005, "loss": 0.2017, "step": 9462 }, { "epoch": 0.01678045424604536, "grad_norm": 0.5, "learning_rate": 0.0019649904629346557, "loss": 0.2151, "step": 9464 }, { "epoch": 0.016784000411355176, "grad_norm": 0.40234375, "learning_rate": 0.0019649740220613393, "loss": 0.2648, "step": 9466 }, { "epoch": 0.01678754657666499, "grad_norm": 0.55859375, "learning_rate": 0.00196495757740513, "loss": 0.2362, "step": 9468 }, { "epoch": 0.016791092741974805, "grad_norm": 1.71875, "learning_rate": 0.001964941128966098, "loss": 0.3285, "step": 9470 }, { "epoch": 0.016794638907284623, "grad_norm": 1.1484375, "learning_rate": 0.0019649246767443167, "loss": 0.3969, "step": 9472 }, { "epoch": 0.016798185072594438, "grad_norm": 0.388671875, "learning_rate": 0.001964908220739857, "loss": 0.2377, "step": 9474 }, { "epoch": 0.016801731237904252, "grad_norm": 0.58984375, "learning_rate": 0.001964891760952791, "loss": 0.4679, "step": 9476 }, { "epoch": 0.016805277403214067, "grad_norm": 0.369140625, "learning_rate": 0.0019648752973831906, "loss": 0.1752, "step": 9478 }, { "epoch": 0.01680882356852388, "grad_norm": 0.5625, "learning_rate": 0.0019648588300311283, "loss": 0.2766, "step": 9480 }, { "epoch": 0.016812369733833696, "grad_norm": 0.2412109375, "learning_rate": 0.001964842358896676, "loss": 0.2533, "step": 9482 }, { "epoch": 0.016815915899143514, "grad_norm": 1.0546875, "learning_rate": 0.0019648258839799052, "loss": 0.2178, "step": 9484 }, { "epoch": 0.01681946206445333, "grad_norm": 0.5625, "learning_rate": 0.0019648094052808884, "loss": 0.2029, "step": 9486 }, { "epoch": 0.016823008229763143, "grad_norm": 1.140625, "learning_rate": 0.0019647929227996977, "loss": 0.2834, "step": 9488 }, { "epoch": 0.016826554395072957, "grad_norm": 0.384765625, "learning_rate": 0.0019647764365364052, "loss": 0.4014, "step": 9490 }, { "epoch": 0.016830100560382772, "grad_norm": 0.3125, "learning_rate": 0.001964759946491083, "loss": 0.2392, "step": 9492 }, { "epoch": 0.016833646725692587, "grad_norm": 0.263671875, "learning_rate": 0.001964743452663803, "loss": 0.2035, "step": 9494 }, { "epoch": 0.016837192891002405, "grad_norm": 1.21875, "learning_rate": 0.0019647269550546373, "loss": 0.2188, "step": 9496 }, { "epoch": 0.01684073905631222, "grad_norm": 0.5, "learning_rate": 0.001964710453663658, "loss": 0.2276, "step": 9498 }, { "epoch": 0.016844285221622034, "grad_norm": 0.51953125, "learning_rate": 0.0019646939484909377, "loss": 0.2155, "step": 9500 }, { "epoch": 0.01684783138693185, "grad_norm": 0.8828125, "learning_rate": 0.0019646774395365484, "loss": 0.2257, "step": 9502 }, { "epoch": 0.016851377552241663, "grad_norm": 0.3671875, "learning_rate": 0.001964660926800562, "loss": 0.3609, "step": 9504 }, { "epoch": 0.01685492371755148, "grad_norm": 0.87890625, "learning_rate": 0.0019646444102830512, "loss": 0.2938, "step": 9506 }, { "epoch": 0.016858469882861295, "grad_norm": 2.21875, "learning_rate": 0.001964627889984088, "loss": 0.4673, "step": 9508 }, { "epoch": 0.01686201604817111, "grad_norm": 0.63671875, "learning_rate": 0.0019646113659037446, "loss": 0.2517, "step": 9510 }, { "epoch": 0.016865562213480925, "grad_norm": 0.68359375, "learning_rate": 0.0019645948380420937, "loss": 0.268, "step": 9512 }, { "epoch": 0.01686910837879074, "grad_norm": 0.341796875, "learning_rate": 0.0019645783063992066, "loss": 0.2256, "step": 9514 }, { "epoch": 0.016872654544100554, "grad_norm": 1.8828125, "learning_rate": 0.001964561770975156, "loss": 0.3528, "step": 9516 }, { "epoch": 0.01687620070941037, "grad_norm": 2.6875, "learning_rate": 0.0019645452317700153, "loss": 0.3423, "step": 9518 }, { "epoch": 0.016879746874720186, "grad_norm": 0.2470703125, "learning_rate": 0.001964528688783855, "loss": 0.1844, "step": 9520 }, { "epoch": 0.01688329304003, "grad_norm": 0.58984375, "learning_rate": 0.001964512142016749, "loss": 0.1722, "step": 9522 }, { "epoch": 0.016886839205339815, "grad_norm": 0.63671875, "learning_rate": 0.0019644955914687686, "loss": 0.2366, "step": 9524 }, { "epoch": 0.01689038537064963, "grad_norm": 0.671875, "learning_rate": 0.001964479037139987, "loss": 0.1995, "step": 9526 }, { "epoch": 0.016893931535959444, "grad_norm": 0.3203125, "learning_rate": 0.0019644624790304765, "loss": 0.2399, "step": 9528 }, { "epoch": 0.016897477701269262, "grad_norm": 0.7265625, "learning_rate": 0.001964445917140309, "loss": 0.2286, "step": 9530 }, { "epoch": 0.016901023866579077, "grad_norm": 0.66796875, "learning_rate": 0.001964429351469557, "loss": 0.3129, "step": 9532 }, { "epoch": 0.01690457003188889, "grad_norm": 0.5390625, "learning_rate": 0.0019644127820182935, "loss": 0.2849, "step": 9534 }, { "epoch": 0.016908116197198706, "grad_norm": 1.7265625, "learning_rate": 0.00196439620878659, "loss": 0.2212, "step": 9536 }, { "epoch": 0.01691166236250852, "grad_norm": 0.76171875, "learning_rate": 0.00196437963177452, "loss": 0.319, "step": 9538 }, { "epoch": 0.01691520852781834, "grad_norm": 0.5625, "learning_rate": 0.001964363050982156, "loss": 0.2335, "step": 9540 }, { "epoch": 0.016918754693128153, "grad_norm": 0.2490234375, "learning_rate": 0.001964346466409569, "loss": 0.2173, "step": 9542 }, { "epoch": 0.016922300858437968, "grad_norm": 0.53515625, "learning_rate": 0.0019643298780568333, "loss": 0.2494, "step": 9544 }, { "epoch": 0.016925847023747782, "grad_norm": 1.0859375, "learning_rate": 0.001964313285924021, "loss": 0.2045, "step": 9546 }, { "epoch": 0.016929393189057597, "grad_norm": 0.41796875, "learning_rate": 0.001964296690011204, "loss": 0.1743, "step": 9548 }, { "epoch": 0.01693293935436741, "grad_norm": 2.140625, "learning_rate": 0.0019642800903184555, "loss": 0.4388, "step": 9550 }, { "epoch": 0.01693648551967723, "grad_norm": 0.330078125, "learning_rate": 0.001964263486845848, "loss": 0.2515, "step": 9552 }, { "epoch": 0.016940031684987044, "grad_norm": 1.5703125, "learning_rate": 0.0019642468795934534, "loss": 0.3165, "step": 9554 }, { "epoch": 0.01694357785029686, "grad_norm": 0.326171875, "learning_rate": 0.001964230268561346, "loss": 0.2904, "step": 9556 }, { "epoch": 0.016947124015606673, "grad_norm": 0.490234375, "learning_rate": 0.001964213653749597, "loss": 0.2365, "step": 9558 }, { "epoch": 0.016950670180916488, "grad_norm": 0.53125, "learning_rate": 0.001964197035158279, "loss": 0.2333, "step": 9560 }, { "epoch": 0.016954216346226302, "grad_norm": 0.38671875, "learning_rate": 0.001964180412787466, "loss": 0.2503, "step": 9562 }, { "epoch": 0.01695776251153612, "grad_norm": 0.337890625, "learning_rate": 0.0019641637866372294, "loss": 0.32, "step": 9564 }, { "epoch": 0.016961308676845935, "grad_norm": 0.51953125, "learning_rate": 0.0019641471567076424, "loss": 0.3051, "step": 9566 }, { "epoch": 0.01696485484215575, "grad_norm": 0.55859375, "learning_rate": 0.0019641305229987772, "loss": 0.2374, "step": 9568 }, { "epoch": 0.016968401007465564, "grad_norm": 0.42578125, "learning_rate": 0.0019641138855107073, "loss": 0.2053, "step": 9570 }, { "epoch": 0.01697194717277538, "grad_norm": 1.21875, "learning_rate": 0.0019640972442435058, "loss": 0.3455, "step": 9572 }, { "epoch": 0.016975493338085196, "grad_norm": 0.4765625, "learning_rate": 0.0019640805991972443, "loss": 0.237, "step": 9574 }, { "epoch": 0.01697903950339501, "grad_norm": 0.341796875, "learning_rate": 0.001964063950371996, "loss": 0.1935, "step": 9576 }, { "epoch": 0.016982585668704826, "grad_norm": 1.1640625, "learning_rate": 0.0019640472977678347, "loss": 0.2765, "step": 9578 }, { "epoch": 0.01698613183401464, "grad_norm": 0.40625, "learning_rate": 0.001964030641384832, "loss": 0.1984, "step": 9580 }, { "epoch": 0.016989677999324455, "grad_norm": 0.8671875, "learning_rate": 0.001964013981223061, "loss": 0.2682, "step": 9582 }, { "epoch": 0.01699322416463427, "grad_norm": 1.671875, "learning_rate": 0.001963997317282595, "loss": 0.4339, "step": 9584 }, { "epoch": 0.016996770329944087, "grad_norm": 2.640625, "learning_rate": 0.001963980649563506, "loss": 0.285, "step": 9586 }, { "epoch": 0.017000316495253902, "grad_norm": 0.71484375, "learning_rate": 0.0019639639780658683, "loss": 0.2807, "step": 9588 }, { "epoch": 0.017003862660563716, "grad_norm": 0.44921875, "learning_rate": 0.0019639473027897535, "loss": 0.1932, "step": 9590 }, { "epoch": 0.01700740882587353, "grad_norm": 0.62890625, "learning_rate": 0.0019639306237352354, "loss": 0.2416, "step": 9592 }, { "epoch": 0.017010954991183345, "grad_norm": 0.6171875, "learning_rate": 0.0019639139409023862, "loss": 0.2396, "step": 9594 }, { "epoch": 0.01701450115649316, "grad_norm": 0.875, "learning_rate": 0.0019638972542912795, "loss": 0.2727, "step": 9596 }, { "epoch": 0.017018047321802978, "grad_norm": 0.408203125, "learning_rate": 0.001963880563901988, "loss": 0.2144, "step": 9598 }, { "epoch": 0.017021593487112793, "grad_norm": 1.0625, "learning_rate": 0.001963863869734585, "loss": 0.2307, "step": 9600 }, { "epoch": 0.017025139652422607, "grad_norm": 0.423828125, "learning_rate": 0.0019638471717891423, "loss": 0.2664, "step": 9602 }, { "epoch": 0.01702868581773242, "grad_norm": 0.5546875, "learning_rate": 0.0019638304700657343, "loss": 0.1799, "step": 9604 }, { "epoch": 0.017032231983042236, "grad_norm": 1.0859375, "learning_rate": 0.001963813764564434, "loss": 0.2072, "step": 9606 }, { "epoch": 0.017035778148352054, "grad_norm": 1.1640625, "learning_rate": 0.001963797055285314, "loss": 0.2371, "step": 9608 }, { "epoch": 0.01703932431366187, "grad_norm": 1.25, "learning_rate": 0.001963780342228447, "loss": 0.2137, "step": 9610 }, { "epoch": 0.017042870478971683, "grad_norm": 0.63671875, "learning_rate": 0.0019637636253939067, "loss": 0.1847, "step": 9612 }, { "epoch": 0.017046416644281498, "grad_norm": 1.7578125, "learning_rate": 0.001963746904781766, "loss": 0.2864, "step": 9614 }, { "epoch": 0.017049962809591312, "grad_norm": 6.8125, "learning_rate": 0.0019637301803920983, "loss": 0.3559, "step": 9616 }, { "epoch": 0.017053508974901127, "grad_norm": 0.8046875, "learning_rate": 0.0019637134522249764, "loss": 0.2836, "step": 9618 }, { "epoch": 0.017057055140210945, "grad_norm": 0.35546875, "learning_rate": 0.0019636967202804733, "loss": 0.4623, "step": 9620 }, { "epoch": 0.01706060130552076, "grad_norm": 0.369140625, "learning_rate": 0.0019636799845586628, "loss": 0.2132, "step": 9622 }, { "epoch": 0.017064147470830574, "grad_norm": 1.03125, "learning_rate": 0.0019636632450596176, "loss": 0.2257, "step": 9624 }, { "epoch": 0.01706769363614039, "grad_norm": 0.40234375, "learning_rate": 0.0019636465017834107, "loss": 0.2445, "step": 9626 }, { "epoch": 0.017071239801450203, "grad_norm": 0.83203125, "learning_rate": 0.0019636297547301162, "loss": 0.2282, "step": 9628 }, { "epoch": 0.017074785966760018, "grad_norm": 0.498046875, "learning_rate": 0.0019636130038998066, "loss": 0.2248, "step": 9630 }, { "epoch": 0.017078332132069836, "grad_norm": 0.3984375, "learning_rate": 0.0019635962492925555, "loss": 0.2048, "step": 9632 }, { "epoch": 0.01708187829737965, "grad_norm": 0.427734375, "learning_rate": 0.001963579490908436, "loss": 0.2065, "step": 9634 }, { "epoch": 0.017085424462689465, "grad_norm": 0.421875, "learning_rate": 0.001963562728747521, "loss": 0.2628, "step": 9636 }, { "epoch": 0.01708897062799928, "grad_norm": 0.703125, "learning_rate": 0.001963545962809885, "loss": 0.1987, "step": 9638 }, { "epoch": 0.017092516793309094, "grad_norm": 0.54296875, "learning_rate": 0.0019635291930955997, "loss": 0.2323, "step": 9640 }, { "epoch": 0.017096062958618912, "grad_norm": 0.5703125, "learning_rate": 0.00196351241960474, "loss": 0.2546, "step": 9642 }, { "epoch": 0.017099609123928727, "grad_norm": 0.341796875, "learning_rate": 0.001963495642337378, "loss": 0.212, "step": 9644 }, { "epoch": 0.01710315528923854, "grad_norm": 0.6484375, "learning_rate": 0.0019634788612935884, "loss": 0.2484, "step": 9646 }, { "epoch": 0.017106701454548356, "grad_norm": 0.6484375, "learning_rate": 0.001963462076473443, "loss": 0.2394, "step": 9648 }, { "epoch": 0.01711024761985817, "grad_norm": 0.443359375, "learning_rate": 0.001963445287877017, "loss": 0.2231, "step": 9650 }, { "epoch": 0.017113793785167985, "grad_norm": 0.56640625, "learning_rate": 0.0019634284955043816, "loss": 0.2258, "step": 9652 }, { "epoch": 0.017117339950477803, "grad_norm": 0.8671875, "learning_rate": 0.0019634116993556125, "loss": 0.2194, "step": 9654 }, { "epoch": 0.017120886115787617, "grad_norm": 0.45703125, "learning_rate": 0.001963394899430782, "loss": 0.2386, "step": 9656 }, { "epoch": 0.017124432281097432, "grad_norm": 0.9453125, "learning_rate": 0.0019633780957299635, "loss": 0.2901, "step": 9658 }, { "epoch": 0.017127978446407247, "grad_norm": 0.416015625, "learning_rate": 0.0019633612882532306, "loss": 0.2492, "step": 9660 }, { "epoch": 0.01713152461171706, "grad_norm": 0.4609375, "learning_rate": 0.001963344477000657, "loss": 0.2727, "step": 9662 }, { "epoch": 0.017135070777026876, "grad_norm": 0.255859375, "learning_rate": 0.0019633276619723163, "loss": 0.2561, "step": 9664 }, { "epoch": 0.017138616942336694, "grad_norm": 0.875, "learning_rate": 0.001963310843168282, "loss": 0.2272, "step": 9666 }, { "epoch": 0.017142163107646508, "grad_norm": 0.330078125, "learning_rate": 0.0019632940205886276, "loss": 0.2567, "step": 9668 }, { "epoch": 0.017145709272956323, "grad_norm": 0.890625, "learning_rate": 0.001963277194233426, "loss": 0.3432, "step": 9670 }, { "epoch": 0.017149255438266137, "grad_norm": 2.359375, "learning_rate": 0.001963260364102752, "loss": 0.3546, "step": 9672 }, { "epoch": 0.017152801603575952, "grad_norm": 0.46484375, "learning_rate": 0.0019632435301966786, "loss": 0.2367, "step": 9674 }, { "epoch": 0.01715634776888577, "grad_norm": 0.7109375, "learning_rate": 0.0019632266925152793, "loss": 0.2208, "step": 9676 }, { "epoch": 0.017159893934195584, "grad_norm": 0.2490234375, "learning_rate": 0.0019632098510586277, "loss": 0.1687, "step": 9678 }, { "epoch": 0.0171634400995054, "grad_norm": 0.279296875, "learning_rate": 0.0019631930058267976, "loss": 0.2545, "step": 9680 }, { "epoch": 0.017166986264815214, "grad_norm": 0.427734375, "learning_rate": 0.001963176156819863, "loss": 0.2024, "step": 9682 }, { "epoch": 0.017170532430125028, "grad_norm": 0.349609375, "learning_rate": 0.001963159304037897, "loss": 0.2608, "step": 9684 }, { "epoch": 0.017174078595434843, "grad_norm": 0.52734375, "learning_rate": 0.0019631424474809735, "loss": 0.2846, "step": 9686 }, { "epoch": 0.01717762476074466, "grad_norm": 5.6875, "learning_rate": 0.0019631255871491666, "loss": 0.385, "step": 9688 }, { "epoch": 0.017181170926054475, "grad_norm": 0.7890625, "learning_rate": 0.0019631087230425493, "loss": 0.1695, "step": 9690 }, { "epoch": 0.01718471709136429, "grad_norm": 0.4609375, "learning_rate": 0.0019630918551611963, "loss": 0.2207, "step": 9692 }, { "epoch": 0.017188263256674104, "grad_norm": 1.0078125, "learning_rate": 0.001963074983505181, "loss": 0.2661, "step": 9694 }, { "epoch": 0.01719180942198392, "grad_norm": 0.43359375, "learning_rate": 0.001963058108074577, "loss": 0.3198, "step": 9696 }, { "epoch": 0.017195355587293733, "grad_norm": 0.240234375, "learning_rate": 0.0019630412288694577, "loss": 0.19, "step": 9698 }, { "epoch": 0.01719890175260355, "grad_norm": 0.287109375, "learning_rate": 0.0019630243458898977, "loss": 0.1994, "step": 9700 }, { "epoch": 0.017202447917913366, "grad_norm": 0.419921875, "learning_rate": 0.0019630074591359702, "loss": 0.2371, "step": 9702 }, { "epoch": 0.01720599408322318, "grad_norm": 0.65625, "learning_rate": 0.00196299056860775, "loss": 0.2823, "step": 9704 }, { "epoch": 0.017209540248532995, "grad_norm": 0.71484375, "learning_rate": 0.0019629736743053097, "loss": 0.2987, "step": 9706 }, { "epoch": 0.01721308641384281, "grad_norm": 0.41796875, "learning_rate": 0.001962956776228724, "loss": 0.2761, "step": 9708 }, { "epoch": 0.017216632579152628, "grad_norm": 0.404296875, "learning_rate": 0.001962939874378067, "loss": 0.2157, "step": 9710 }, { "epoch": 0.017220178744462442, "grad_norm": 0.6875, "learning_rate": 0.0019629229687534114, "loss": 0.2217, "step": 9712 }, { "epoch": 0.017223724909772257, "grad_norm": 0.298828125, "learning_rate": 0.0019629060593548326, "loss": 0.276, "step": 9714 }, { "epoch": 0.01722727107508207, "grad_norm": 0.314453125, "learning_rate": 0.001962889146182404, "loss": 0.2515, "step": 9716 }, { "epoch": 0.017230817240391886, "grad_norm": 0.404296875, "learning_rate": 0.0019628722292361995, "loss": 0.246, "step": 9718 }, { "epoch": 0.0172343634057017, "grad_norm": 0.431640625, "learning_rate": 0.0019628553085162927, "loss": 0.2115, "step": 9720 }, { "epoch": 0.01723790957101152, "grad_norm": 0.515625, "learning_rate": 0.001962838384022758, "loss": 0.1892, "step": 9722 }, { "epoch": 0.017241455736321333, "grad_norm": 0.40625, "learning_rate": 0.0019628214557556698, "loss": 0.2051, "step": 9724 }, { "epoch": 0.017245001901631148, "grad_norm": 0.9140625, "learning_rate": 0.0019628045237151015, "loss": 0.1695, "step": 9726 }, { "epoch": 0.017248548066940962, "grad_norm": 0.9765625, "learning_rate": 0.0019627875879011276, "loss": 0.3314, "step": 9728 }, { "epoch": 0.017252094232250777, "grad_norm": 6.09375, "learning_rate": 0.001962770648313822, "loss": 0.4471, "step": 9730 }, { "epoch": 0.01725564039756059, "grad_norm": 0.435546875, "learning_rate": 0.0019627537049532583, "loss": 0.2151, "step": 9732 }, { "epoch": 0.01725918656287041, "grad_norm": 0.61328125, "learning_rate": 0.0019627367578195112, "loss": 0.2691, "step": 9734 }, { "epoch": 0.017262732728180224, "grad_norm": 0.28515625, "learning_rate": 0.0019627198069126547, "loss": 0.3818, "step": 9736 }, { "epoch": 0.01726627889349004, "grad_norm": 0.44921875, "learning_rate": 0.001962702852232763, "loss": 0.3112, "step": 9738 }, { "epoch": 0.017269825058799853, "grad_norm": 0.419921875, "learning_rate": 0.0019626858937799104, "loss": 0.2058, "step": 9740 }, { "epoch": 0.017273371224109667, "grad_norm": 0.353515625, "learning_rate": 0.0019626689315541705, "loss": 0.2172, "step": 9742 }, { "epoch": 0.017276917389419486, "grad_norm": 0.88671875, "learning_rate": 0.0019626519655556174, "loss": 0.3053, "step": 9744 }, { "epoch": 0.0172804635547293, "grad_norm": 0.2890625, "learning_rate": 0.001962634995784326, "loss": 0.2005, "step": 9746 }, { "epoch": 0.017284009720039115, "grad_norm": 0.330078125, "learning_rate": 0.0019626180222403703, "loss": 0.2145, "step": 9748 }, { "epoch": 0.01728755588534893, "grad_norm": 0.703125, "learning_rate": 0.0019626010449238247, "loss": 0.3239, "step": 9750 }, { "epoch": 0.017291102050658744, "grad_norm": 0.62890625, "learning_rate": 0.001962584063834763, "loss": 0.2424, "step": 9752 }, { "epoch": 0.01729464821596856, "grad_norm": 0.3515625, "learning_rate": 0.001962567078973259, "loss": 0.2768, "step": 9754 }, { "epoch": 0.017298194381278376, "grad_norm": 2.96875, "learning_rate": 0.0019625500903393883, "loss": 0.2033, "step": 9756 }, { "epoch": 0.01730174054658819, "grad_norm": 0.6953125, "learning_rate": 0.0019625330979332247, "loss": 0.2733, "step": 9758 }, { "epoch": 0.017305286711898005, "grad_norm": 0.64453125, "learning_rate": 0.001962516101754842, "loss": 0.346, "step": 9760 }, { "epoch": 0.01730883287720782, "grad_norm": 0.353515625, "learning_rate": 0.001962499101804315, "loss": 0.2294, "step": 9762 }, { "epoch": 0.017312379042517635, "grad_norm": 1.0859375, "learning_rate": 0.001962482098081718, "loss": 0.2946, "step": 9764 }, { "epoch": 0.01731592520782745, "grad_norm": 0.322265625, "learning_rate": 0.0019624650905871246, "loss": 0.2107, "step": 9766 }, { "epoch": 0.017319471373137267, "grad_norm": 1.1171875, "learning_rate": 0.001962448079320611, "loss": 0.2931, "step": 9768 }, { "epoch": 0.01732301753844708, "grad_norm": 0.435546875, "learning_rate": 0.0019624310642822494, "loss": 0.3014, "step": 9770 }, { "epoch": 0.017326563703756896, "grad_norm": 4.71875, "learning_rate": 0.001962414045472116, "loss": 0.3846, "step": 9772 }, { "epoch": 0.01733010986906671, "grad_norm": 0.9765625, "learning_rate": 0.001962397022890284, "loss": 0.2053, "step": 9774 }, { "epoch": 0.017333656034376525, "grad_norm": 0.478515625, "learning_rate": 0.0019623799965368294, "loss": 0.2185, "step": 9776 }, { "epoch": 0.017337202199686343, "grad_norm": 0.92578125, "learning_rate": 0.0019623629664118247, "loss": 0.3544, "step": 9778 }, { "epoch": 0.017340748364996158, "grad_norm": 0.42578125, "learning_rate": 0.0019623459325153456, "loss": 0.2414, "step": 9780 }, { "epoch": 0.017344294530305972, "grad_norm": 5.65625, "learning_rate": 0.001962328894847466, "loss": 0.3282, "step": 9782 }, { "epoch": 0.017347840695615787, "grad_norm": 0.93359375, "learning_rate": 0.0019623118534082607, "loss": 0.3129, "step": 9784 }, { "epoch": 0.0173513868609256, "grad_norm": 0.703125, "learning_rate": 0.0019622948081978045, "loss": 0.2256, "step": 9786 }, { "epoch": 0.017354933026235416, "grad_norm": 0.380859375, "learning_rate": 0.001962277759216172, "loss": 0.2163, "step": 9788 }, { "epoch": 0.017358479191545234, "grad_norm": 0.353515625, "learning_rate": 0.001962260706463437, "loss": 0.2617, "step": 9790 }, { "epoch": 0.01736202535685505, "grad_norm": 0.423828125, "learning_rate": 0.0019622436499396744, "loss": 0.2128, "step": 9792 }, { "epoch": 0.017365571522164863, "grad_norm": 0.3125, "learning_rate": 0.0019622265896449592, "loss": 0.2186, "step": 9794 }, { "epoch": 0.017369117687474678, "grad_norm": 0.8984375, "learning_rate": 0.001962209525579366, "loss": 0.28, "step": 9796 }, { "epoch": 0.017372663852784492, "grad_norm": 0.78125, "learning_rate": 0.0019621924577429687, "loss": 0.2019, "step": 9798 }, { "epoch": 0.017376210018094307, "grad_norm": 0.67578125, "learning_rate": 0.0019621753861358425, "loss": 0.2743, "step": 9800 }, { "epoch": 0.017379756183404125, "grad_norm": 0.61328125, "learning_rate": 0.0019621583107580624, "loss": 0.2726, "step": 9802 }, { "epoch": 0.01738330234871394, "grad_norm": 2.09375, "learning_rate": 0.001962141231609702, "loss": 0.3481, "step": 9804 }, { "epoch": 0.017386848514023754, "grad_norm": 0.2890625, "learning_rate": 0.001962124148690837, "loss": 0.1917, "step": 9806 }, { "epoch": 0.01739039467933357, "grad_norm": 0.63671875, "learning_rate": 0.001962107062001542, "loss": 0.1999, "step": 9808 }, { "epoch": 0.017393940844643383, "grad_norm": 0.51171875, "learning_rate": 0.0019620899715418905, "loss": 0.2412, "step": 9810 }, { "epoch": 0.0173974870099532, "grad_norm": 0.91796875, "learning_rate": 0.0019620728773119592, "loss": 0.2498, "step": 9812 }, { "epoch": 0.017401033175263016, "grad_norm": 0.41796875, "learning_rate": 0.0019620557793118215, "loss": 0.247, "step": 9814 }, { "epoch": 0.01740457934057283, "grad_norm": 0.48046875, "learning_rate": 0.001962038677541553, "loss": 0.2054, "step": 9816 }, { "epoch": 0.017408125505882645, "grad_norm": 0.93359375, "learning_rate": 0.0019620215720012276, "loss": 0.329, "step": 9818 }, { "epoch": 0.01741167167119246, "grad_norm": 0.55078125, "learning_rate": 0.0019620044626909206, "loss": 0.2947, "step": 9820 }, { "epoch": 0.017415217836502274, "grad_norm": 0.52734375, "learning_rate": 0.001961987349610707, "loss": 0.1959, "step": 9822 }, { "epoch": 0.017418764001812092, "grad_norm": 10.0, "learning_rate": 0.0019619702327606614, "loss": 0.335, "step": 9824 }, { "epoch": 0.017422310167121906, "grad_norm": 0.318359375, "learning_rate": 0.0019619531121408585, "loss": 0.2384, "step": 9826 }, { "epoch": 0.01742585633243172, "grad_norm": 0.84375, "learning_rate": 0.0019619359877513735, "loss": 0.3579, "step": 9828 }, { "epoch": 0.017429402497741536, "grad_norm": 0.314453125, "learning_rate": 0.001961918859592281, "loss": 0.2646, "step": 9830 }, { "epoch": 0.01743294866305135, "grad_norm": 0.388671875, "learning_rate": 0.001961901727663656, "loss": 0.216, "step": 9832 }, { "epoch": 0.017436494828361165, "grad_norm": 0.60546875, "learning_rate": 0.001961884591965574, "loss": 0.2314, "step": 9834 }, { "epoch": 0.017440040993670983, "grad_norm": 0.251953125, "learning_rate": 0.0019618674524981092, "loss": 0.253, "step": 9836 }, { "epoch": 0.017443587158980797, "grad_norm": 0.53125, "learning_rate": 0.0019618503092613364, "loss": 0.3197, "step": 9838 }, { "epoch": 0.017447133324290612, "grad_norm": 2.8125, "learning_rate": 0.001961833162255331, "loss": 0.3736, "step": 9840 }, { "epoch": 0.017450679489600426, "grad_norm": 0.4609375, "learning_rate": 0.0019618160114801687, "loss": 0.2202, "step": 9842 }, { "epoch": 0.01745422565491024, "grad_norm": 0.455078125, "learning_rate": 0.001961798856935923, "loss": 0.2819, "step": 9844 }, { "epoch": 0.01745777182022006, "grad_norm": 0.5625, "learning_rate": 0.0019617816986226702, "loss": 0.2464, "step": 9846 }, { "epoch": 0.017461317985529873, "grad_norm": 0.56640625, "learning_rate": 0.0019617645365404847, "loss": 0.2757, "step": 9848 }, { "epoch": 0.017464864150839688, "grad_norm": 0.83203125, "learning_rate": 0.0019617473706894416, "loss": 0.2038, "step": 9850 }, { "epoch": 0.017468410316149503, "grad_norm": 1.15625, "learning_rate": 0.0019617302010696163, "loss": 0.4924, "step": 9852 }, { "epoch": 0.017471956481459317, "grad_norm": 0.384765625, "learning_rate": 0.0019617130276810835, "loss": 0.242, "step": 9854 }, { "epoch": 0.01747550264676913, "grad_norm": 1.1953125, "learning_rate": 0.001961695850523918, "loss": 0.3336, "step": 9856 }, { "epoch": 0.01747904881207895, "grad_norm": 0.376953125, "learning_rate": 0.001961678669598196, "loss": 0.1924, "step": 9858 }, { "epoch": 0.017482594977388764, "grad_norm": 0.310546875, "learning_rate": 0.001961661484903992, "loss": 0.2198, "step": 9860 }, { "epoch": 0.01748614114269858, "grad_norm": 0.400390625, "learning_rate": 0.001961644296441381, "loss": 0.3027, "step": 9862 }, { "epoch": 0.017489687308008393, "grad_norm": 0.359375, "learning_rate": 0.0019616271042104385, "loss": 0.2166, "step": 9864 }, { "epoch": 0.017493233473318208, "grad_norm": 0.515625, "learning_rate": 0.0019616099082112393, "loss": 0.2122, "step": 9866 }, { "epoch": 0.017496779638628022, "grad_norm": 0.4375, "learning_rate": 0.0019615927084438585, "loss": 0.3945, "step": 9868 }, { "epoch": 0.01750032580393784, "grad_norm": 16.0, "learning_rate": 0.001961575504908372, "loss": 0.3347, "step": 9870 }, { "epoch": 0.017503871969247655, "grad_norm": 1.59375, "learning_rate": 0.0019615582976048552, "loss": 0.3852, "step": 9872 }, { "epoch": 0.01750741813455747, "grad_norm": 1.015625, "learning_rate": 0.0019615410865333823, "loss": 0.35, "step": 9874 }, { "epoch": 0.017510964299867284, "grad_norm": 0.416015625, "learning_rate": 0.0019615238716940296, "loss": 0.2599, "step": 9876 }, { "epoch": 0.0175145104651771, "grad_norm": 1.328125, "learning_rate": 0.0019615066530868715, "loss": 0.3203, "step": 9878 }, { "epoch": 0.017518056630486917, "grad_norm": 0.5703125, "learning_rate": 0.0019614894307119837, "loss": 0.3224, "step": 9880 }, { "epoch": 0.01752160279579673, "grad_norm": 0.83203125, "learning_rate": 0.0019614722045694414, "loss": 0.2866, "step": 9882 }, { "epoch": 0.017525148961106546, "grad_norm": 0.4609375, "learning_rate": 0.0019614549746593208, "loss": 0.2413, "step": 9884 }, { "epoch": 0.01752869512641636, "grad_norm": 0.46484375, "learning_rate": 0.001961437740981696, "loss": 0.2427, "step": 9886 }, { "epoch": 0.017532241291726175, "grad_norm": 0.40625, "learning_rate": 0.001961420503536643, "loss": 0.2393, "step": 9888 }, { "epoch": 0.01753578745703599, "grad_norm": 0.28125, "learning_rate": 0.001961403262324237, "loss": 0.221, "step": 9890 }, { "epoch": 0.017539333622345808, "grad_norm": 0.357421875, "learning_rate": 0.0019613860173445536, "loss": 0.2751, "step": 9892 }, { "epoch": 0.017542879787655622, "grad_norm": 0.302734375, "learning_rate": 0.001961368768597668, "loss": 0.2411, "step": 9894 }, { "epoch": 0.017546425952965437, "grad_norm": 1.9140625, "learning_rate": 0.0019613515160836563, "loss": 0.4987, "step": 9896 }, { "epoch": 0.01754997211827525, "grad_norm": 0.40625, "learning_rate": 0.0019613342598025925, "loss": 0.1701, "step": 9898 }, { "epoch": 0.017553518283585066, "grad_norm": 0.66796875, "learning_rate": 0.0019613169997545533, "loss": 0.2124, "step": 9900 }, { "epoch": 0.01755706444889488, "grad_norm": 0.9140625, "learning_rate": 0.001961299735939614, "loss": 0.2289, "step": 9902 }, { "epoch": 0.0175606106142047, "grad_norm": 0.439453125, "learning_rate": 0.00196128246835785, "loss": 0.2478, "step": 9904 }, { "epoch": 0.017564156779514513, "grad_norm": 0.48828125, "learning_rate": 0.0019612651970093366, "loss": 0.2447, "step": 9906 }, { "epoch": 0.017567702944824327, "grad_norm": 0.314453125, "learning_rate": 0.0019612479218941497, "loss": 0.2238, "step": 9908 }, { "epoch": 0.017571249110134142, "grad_norm": 0.4765625, "learning_rate": 0.0019612306430123647, "loss": 0.2221, "step": 9910 }, { "epoch": 0.017574795275443957, "grad_norm": 0.482421875, "learning_rate": 0.0019612133603640566, "loss": 0.3662, "step": 9912 }, { "epoch": 0.017578341440753775, "grad_norm": 0.37109375, "learning_rate": 0.001961196073949302, "loss": 0.2078, "step": 9914 }, { "epoch": 0.01758188760606359, "grad_norm": 1.4140625, "learning_rate": 0.001961178783768176, "loss": 0.3762, "step": 9916 }, { "epoch": 0.017585433771373404, "grad_norm": 0.625, "learning_rate": 0.001961161489820754, "loss": 0.3405, "step": 9918 }, { "epoch": 0.017588979936683218, "grad_norm": 0.455078125, "learning_rate": 0.001961144192107112, "loss": 0.3045, "step": 9920 }, { "epoch": 0.017592526101993033, "grad_norm": 1.5859375, "learning_rate": 0.001961126890627326, "loss": 0.2774, "step": 9922 }, { "epoch": 0.017596072267302847, "grad_norm": 0.37109375, "learning_rate": 0.0019611095853814702, "loss": 0.2138, "step": 9924 }, { "epoch": 0.017599618432612665, "grad_norm": 0.46484375, "learning_rate": 0.0019610922763696223, "loss": 0.2308, "step": 9926 }, { "epoch": 0.01760316459792248, "grad_norm": 0.87890625, "learning_rate": 0.001961074963591856, "loss": 0.2296, "step": 9928 }, { "epoch": 0.017606710763232294, "grad_norm": 1.671875, "learning_rate": 0.0019610576470482487, "loss": 0.4829, "step": 9930 }, { "epoch": 0.01761025692854211, "grad_norm": 0.5546875, "learning_rate": 0.001961040326738875, "loss": 0.3005, "step": 9932 }, { "epoch": 0.017613803093851924, "grad_norm": 0.2578125, "learning_rate": 0.001961023002663811, "loss": 0.1953, "step": 9934 }, { "epoch": 0.017617349259161738, "grad_norm": 0.95703125, "learning_rate": 0.0019610056748231327, "loss": 0.2426, "step": 9936 }, { "epoch": 0.017620895424471556, "grad_norm": 0.51171875, "learning_rate": 0.001960988343216916, "loss": 0.1865, "step": 9938 }, { "epoch": 0.01762444158978137, "grad_norm": 2.046875, "learning_rate": 0.0019609710078452363, "loss": 0.4257, "step": 9940 }, { "epoch": 0.017627987755091185, "grad_norm": 0.74609375, "learning_rate": 0.0019609536687081692, "loss": 0.223, "step": 9942 }, { "epoch": 0.017631533920401, "grad_norm": 0.20703125, "learning_rate": 0.001960936325805791, "loss": 0.1712, "step": 9944 }, { "epoch": 0.017635080085710814, "grad_norm": 0.56640625, "learning_rate": 0.0019609189791381775, "loss": 0.2821, "step": 9946 }, { "epoch": 0.017638626251020632, "grad_norm": 0.443359375, "learning_rate": 0.0019609016287054043, "loss": 0.2669, "step": 9948 }, { "epoch": 0.017642172416330447, "grad_norm": 2.484375, "learning_rate": 0.0019608842745075477, "loss": 0.2853, "step": 9950 }, { "epoch": 0.01764571858164026, "grad_norm": 1.7109375, "learning_rate": 0.0019608669165446834, "loss": 0.2976, "step": 9952 }, { "epoch": 0.017649264746950076, "grad_norm": 0.73046875, "learning_rate": 0.001960849554816887, "loss": 0.271, "step": 9954 }, { "epoch": 0.01765281091225989, "grad_norm": 0.455078125, "learning_rate": 0.0019608321893242343, "loss": 0.2153, "step": 9956 }, { "epoch": 0.017656357077569705, "grad_norm": 0.458984375, "learning_rate": 0.001960814820066802, "loss": 0.2775, "step": 9958 }, { "epoch": 0.017659903242879523, "grad_norm": 0.46484375, "learning_rate": 0.0019607974470446663, "loss": 0.2958, "step": 9960 }, { "epoch": 0.017663449408189338, "grad_norm": 0.267578125, "learning_rate": 0.001960780070257902, "loss": 0.2829, "step": 9962 }, { "epoch": 0.017666995573499152, "grad_norm": 0.41015625, "learning_rate": 0.0019607626897065857, "loss": 0.2188, "step": 9964 }, { "epoch": 0.017670541738808967, "grad_norm": 0.71484375, "learning_rate": 0.0019607453053907932, "loss": 0.3304, "step": 9966 }, { "epoch": 0.01767408790411878, "grad_norm": 0.337890625, "learning_rate": 0.001960727917310601, "loss": 0.2613, "step": 9968 }, { "epoch": 0.017677634069428596, "grad_norm": 0.28515625, "learning_rate": 0.0019607105254660848, "loss": 0.3164, "step": 9970 }, { "epoch": 0.017681180234738414, "grad_norm": 0.72265625, "learning_rate": 0.0019606931298573205, "loss": 0.2424, "step": 9972 }, { "epoch": 0.01768472640004823, "grad_norm": 0.5390625, "learning_rate": 0.0019606757304843846, "loss": 0.2003, "step": 9974 }, { "epoch": 0.017688272565358043, "grad_norm": 0.314453125, "learning_rate": 0.0019606583273473533, "loss": 0.1778, "step": 9976 }, { "epoch": 0.017691818730667858, "grad_norm": 0.458984375, "learning_rate": 0.001960640920446302, "loss": 0.2982, "step": 9978 }, { "epoch": 0.017695364895977672, "grad_norm": 0.36328125, "learning_rate": 0.0019606235097813073, "loss": 0.2776, "step": 9980 }, { "epoch": 0.01769891106128749, "grad_norm": 0.416015625, "learning_rate": 0.0019606060953524453, "loss": 0.208, "step": 9982 }, { "epoch": 0.017702457226597305, "grad_norm": 0.328125, "learning_rate": 0.0019605886771597923, "loss": 0.2513, "step": 9984 }, { "epoch": 0.01770600339190712, "grad_norm": 0.38671875, "learning_rate": 0.001960571255203424, "loss": 0.2291, "step": 9986 }, { "epoch": 0.017709549557216934, "grad_norm": 0.6171875, "learning_rate": 0.001960553829483417, "loss": 0.2553, "step": 9988 }, { "epoch": 0.01771309572252675, "grad_norm": 0.40234375, "learning_rate": 0.0019605363999998476, "loss": 0.1867, "step": 9990 }, { "epoch": 0.017716641887836563, "grad_norm": 0.244140625, "learning_rate": 0.001960518966752792, "loss": 0.2054, "step": 9992 }, { "epoch": 0.01772018805314638, "grad_norm": 0.46875, "learning_rate": 0.001960501529742326, "loss": 0.3144, "step": 9994 }, { "epoch": 0.017723734218456196, "grad_norm": 0.62109375, "learning_rate": 0.001960484088968526, "loss": 0.2266, "step": 9996 }, { "epoch": 0.01772728038376601, "grad_norm": 0.6953125, "learning_rate": 0.001960466644431469, "loss": 0.2959, "step": 9998 }, { "epoch": 0.017730826549075825, "grad_norm": 3.296875, "learning_rate": 0.00196044919613123, "loss": 0.3539, "step": 10000 }, { "epoch": 0.01773437271438564, "grad_norm": 0.3515625, "learning_rate": 0.001960431744067886, "loss": 0.2411, "step": 10002 }, { "epoch": 0.017737918879695454, "grad_norm": 0.64453125, "learning_rate": 0.0019604142882415137, "loss": 0.2444, "step": 10004 }, { "epoch": 0.017741465045005272, "grad_norm": 0.40625, "learning_rate": 0.001960396828652189, "loss": 0.2599, "step": 10006 }, { "epoch": 0.017745011210315086, "grad_norm": 0.5625, "learning_rate": 0.0019603793652999886, "loss": 0.2996, "step": 10008 }, { "epoch": 0.0177485573756249, "grad_norm": 0.458984375, "learning_rate": 0.0019603618981849885, "loss": 0.413, "step": 10010 }, { "epoch": 0.017752103540934715, "grad_norm": 1.1953125, "learning_rate": 0.0019603444273072652, "loss": 0.3284, "step": 10012 }, { "epoch": 0.01775564970624453, "grad_norm": 0.369140625, "learning_rate": 0.001960326952666895, "loss": 0.2157, "step": 10014 }, { "epoch": 0.017759195871554348, "grad_norm": 1.6171875, "learning_rate": 0.001960309474263955, "loss": 0.2594, "step": 10016 }, { "epoch": 0.017762742036864163, "grad_norm": 0.57421875, "learning_rate": 0.0019602919920985204, "loss": 0.2838, "step": 10018 }, { "epoch": 0.017766288202173977, "grad_norm": 0.7578125, "learning_rate": 0.0019602745061706684, "loss": 0.2199, "step": 10020 }, { "epoch": 0.01776983436748379, "grad_norm": 0.48828125, "learning_rate": 0.001960257016480476, "loss": 0.2057, "step": 10022 }, { "epoch": 0.017773380532793606, "grad_norm": 0.40625, "learning_rate": 0.0019602395230280184, "loss": 0.1974, "step": 10024 }, { "epoch": 0.01777692669810342, "grad_norm": 0.69921875, "learning_rate": 0.0019602220258133733, "loss": 0.2449, "step": 10026 }, { "epoch": 0.01778047286341324, "grad_norm": 0.28125, "learning_rate": 0.001960204524836616, "loss": 0.2083, "step": 10028 }, { "epoch": 0.017784019028723053, "grad_norm": 0.80859375, "learning_rate": 0.001960187020097825, "loss": 0.3281, "step": 10030 }, { "epoch": 0.017787565194032868, "grad_norm": 0.31640625, "learning_rate": 0.0019601695115970745, "loss": 0.2249, "step": 10032 }, { "epoch": 0.017791111359342682, "grad_norm": 0.43359375, "learning_rate": 0.0019601519993344427, "loss": 0.2312, "step": 10034 }, { "epoch": 0.017794657524652497, "grad_norm": 0.318359375, "learning_rate": 0.001960134483310006, "loss": 0.3007, "step": 10036 }, { "epoch": 0.01779820368996231, "grad_norm": 0.392578125, "learning_rate": 0.00196011696352384, "loss": 0.2588, "step": 10038 }, { "epoch": 0.01780174985527213, "grad_norm": 0.36328125, "learning_rate": 0.0019600994399760225, "loss": 0.2709, "step": 10040 }, { "epoch": 0.017805296020581944, "grad_norm": 0.474609375, "learning_rate": 0.0019600819126666296, "loss": 0.2179, "step": 10042 }, { "epoch": 0.01780884218589176, "grad_norm": 0.54296875, "learning_rate": 0.0019600643815957377, "loss": 0.1617, "step": 10044 }, { "epoch": 0.017812388351201573, "grad_norm": 0.341796875, "learning_rate": 0.0019600468467634237, "loss": 0.2976, "step": 10046 }, { "epoch": 0.017815934516511388, "grad_norm": 0.50390625, "learning_rate": 0.0019600293081697647, "loss": 0.2313, "step": 10048 }, { "epoch": 0.017819480681821206, "grad_norm": 0.27734375, "learning_rate": 0.001960011765814837, "loss": 0.2177, "step": 10050 }, { "epoch": 0.01782302684713102, "grad_norm": 0.345703125, "learning_rate": 0.0019599942196987176, "loss": 0.2227, "step": 10052 }, { "epoch": 0.017826573012440835, "grad_norm": 2.71875, "learning_rate": 0.001959976669821483, "loss": 0.4067, "step": 10054 }, { "epoch": 0.01783011917775065, "grad_norm": 1.4921875, "learning_rate": 0.0019599591161832096, "loss": 0.2393, "step": 10056 }, { "epoch": 0.017833665343060464, "grad_norm": 0.61328125, "learning_rate": 0.001959941558783975, "loss": 0.2976, "step": 10058 }, { "epoch": 0.01783721150837028, "grad_norm": 0.291015625, "learning_rate": 0.001959923997623855, "loss": 0.2025, "step": 10060 }, { "epoch": 0.017840757673680097, "grad_norm": 0.82421875, "learning_rate": 0.0019599064327029273, "loss": 0.2785, "step": 10062 }, { "epoch": 0.01784430383898991, "grad_norm": 0.8828125, "learning_rate": 0.0019598888640212685, "loss": 0.22, "step": 10064 }, { "epoch": 0.017847850004299726, "grad_norm": 0.29296875, "learning_rate": 0.001959871291578955, "loss": 0.2273, "step": 10066 }, { "epoch": 0.01785139616960954, "grad_norm": 0.31640625, "learning_rate": 0.001959853715376064, "loss": 0.1614, "step": 10068 }, { "epoch": 0.017854942334919355, "grad_norm": 0.49609375, "learning_rate": 0.001959836135412673, "loss": 0.2047, "step": 10070 }, { "epoch": 0.01785848850022917, "grad_norm": 1.2265625, "learning_rate": 0.001959818551688857, "loss": 0.3237, "step": 10072 }, { "epoch": 0.017862034665538987, "grad_norm": 0.375, "learning_rate": 0.0019598009642046946, "loss": 0.284, "step": 10074 }, { "epoch": 0.017865580830848802, "grad_norm": 0.69140625, "learning_rate": 0.001959783372960262, "loss": 0.2585, "step": 10076 }, { "epoch": 0.017869126996158616, "grad_norm": 0.64453125, "learning_rate": 0.001959765777955637, "loss": 0.2346, "step": 10078 }, { "epoch": 0.01787267316146843, "grad_norm": 0.8046875, "learning_rate": 0.0019597481791908955, "loss": 0.2129, "step": 10080 }, { "epoch": 0.017876219326778246, "grad_norm": 0.40234375, "learning_rate": 0.001959730576666115, "loss": 0.1958, "step": 10082 }, { "epoch": 0.017879765492088064, "grad_norm": 0.71484375, "learning_rate": 0.001959712970381372, "loss": 0.1828, "step": 10084 }, { "epoch": 0.017883311657397878, "grad_norm": 0.375, "learning_rate": 0.0019596953603367444, "loss": 0.2684, "step": 10086 }, { "epoch": 0.017886857822707693, "grad_norm": 1.4296875, "learning_rate": 0.001959677746532308, "loss": 0.2528, "step": 10088 }, { "epoch": 0.017890403988017507, "grad_norm": 0.41796875, "learning_rate": 0.0019596601289681406, "loss": 0.2253, "step": 10090 }, { "epoch": 0.017893950153327322, "grad_norm": 0.59765625, "learning_rate": 0.0019596425076443195, "loss": 0.2649, "step": 10092 }, { "epoch": 0.017897496318637136, "grad_norm": 0.37109375, "learning_rate": 0.0019596248825609216, "loss": 0.2368, "step": 10094 }, { "epoch": 0.017901042483946954, "grad_norm": 0.76953125, "learning_rate": 0.0019596072537180235, "loss": 0.22, "step": 10096 }, { "epoch": 0.01790458864925677, "grad_norm": 6.125, "learning_rate": 0.001959589621115702, "loss": 0.444, "step": 10098 }, { "epoch": 0.017908134814566583, "grad_norm": 1.984375, "learning_rate": 0.0019595719847540355, "loss": 0.3407, "step": 10100 }, { "epoch": 0.017911680979876398, "grad_norm": 0.62109375, "learning_rate": 0.0019595543446331008, "loss": 0.2183, "step": 10102 }, { "epoch": 0.017915227145186213, "grad_norm": 0.88671875, "learning_rate": 0.001959536700752974, "loss": 0.2608, "step": 10104 }, { "epoch": 0.017918773310496027, "grad_norm": 0.6875, "learning_rate": 0.0019595190531137333, "loss": 0.3895, "step": 10106 }, { "epoch": 0.017922319475805845, "grad_norm": 3.53125, "learning_rate": 0.001959501401715455, "loss": 0.3692, "step": 10108 }, { "epoch": 0.01792586564111566, "grad_norm": 0.6328125, "learning_rate": 0.0019594837465582173, "loss": 0.2468, "step": 10110 }, { "epoch": 0.017929411806425474, "grad_norm": 0.453125, "learning_rate": 0.001959466087642097, "loss": 0.2157, "step": 10112 }, { "epoch": 0.01793295797173529, "grad_norm": 1.3359375, "learning_rate": 0.001959448424967171, "loss": 0.2063, "step": 10114 }, { "epoch": 0.017936504137045103, "grad_norm": 0.3515625, "learning_rate": 0.001959430758533517, "loss": 0.2479, "step": 10116 }, { "epoch": 0.01794005030235492, "grad_norm": 0.83203125, "learning_rate": 0.001959413088341212, "loss": 0.2144, "step": 10118 }, { "epoch": 0.017943596467664736, "grad_norm": 0.55859375, "learning_rate": 0.0019593954143903333, "loss": 0.2429, "step": 10120 }, { "epoch": 0.01794714263297455, "grad_norm": 0.5078125, "learning_rate": 0.0019593777366809584, "loss": 0.2281, "step": 10122 }, { "epoch": 0.017950688798284365, "grad_norm": 3.78125, "learning_rate": 0.0019593600552131646, "loss": 0.1981, "step": 10124 }, { "epoch": 0.01795423496359418, "grad_norm": 1.890625, "learning_rate": 0.0019593423699870286, "loss": 0.3672, "step": 10126 }, { "epoch": 0.017957781128903994, "grad_norm": 0.3359375, "learning_rate": 0.0019593246810026286, "loss": 0.2159, "step": 10128 }, { "epoch": 0.017961327294213812, "grad_norm": 0.3046875, "learning_rate": 0.0019593069882600416, "loss": 0.2239, "step": 10130 }, { "epoch": 0.017964873459523627, "grad_norm": 0.3828125, "learning_rate": 0.001959289291759345, "loss": 0.2239, "step": 10132 }, { "epoch": 0.01796841962483344, "grad_norm": 2.21875, "learning_rate": 0.0019592715915006157, "loss": 0.2045, "step": 10134 }, { "epoch": 0.017971965790143256, "grad_norm": 0.375, "learning_rate": 0.0019592538874839325, "loss": 0.2109, "step": 10136 }, { "epoch": 0.01797551195545307, "grad_norm": 0.439453125, "learning_rate": 0.001959236179709371, "loss": 0.2654, "step": 10138 }, { "epoch": 0.017979058120762885, "grad_norm": 0.357421875, "learning_rate": 0.00195921846817701, "loss": 0.1866, "step": 10140 }, { "epoch": 0.017982604286072703, "grad_norm": 0.4765625, "learning_rate": 0.0019592007528869263, "loss": 0.2253, "step": 10142 }, { "epoch": 0.017986150451382518, "grad_norm": 3.5625, "learning_rate": 0.0019591830338391977, "loss": 0.3771, "step": 10144 }, { "epoch": 0.017989696616692332, "grad_norm": 0.431640625, "learning_rate": 0.001959165311033902, "loss": 0.2116, "step": 10146 }, { "epoch": 0.017993242782002147, "grad_norm": 0.341796875, "learning_rate": 0.0019591475844711157, "loss": 0.244, "step": 10148 }, { "epoch": 0.01799678894731196, "grad_norm": 0.208984375, "learning_rate": 0.001959129854150917, "loss": 0.2034, "step": 10150 }, { "epoch": 0.01800033511262178, "grad_norm": 0.447265625, "learning_rate": 0.001959112120073384, "loss": 0.2257, "step": 10152 }, { "epoch": 0.018003881277931594, "grad_norm": 0.482421875, "learning_rate": 0.0019590943822385925, "loss": 0.1996, "step": 10154 }, { "epoch": 0.01800742744324141, "grad_norm": 0.484375, "learning_rate": 0.001959076640646622, "loss": 0.2547, "step": 10156 }, { "epoch": 0.018010973608551223, "grad_norm": 0.34765625, "learning_rate": 0.001959058895297549, "loss": 0.1912, "step": 10158 }, { "epoch": 0.018014519773861037, "grad_norm": 0.431640625, "learning_rate": 0.0019590411461914516, "loss": 0.2222, "step": 10160 }, { "epoch": 0.018018065939170852, "grad_norm": 0.373046875, "learning_rate": 0.001959023393328407, "loss": 0.1483, "step": 10162 }, { "epoch": 0.01802161210448067, "grad_norm": 0.88671875, "learning_rate": 0.001959005636708493, "loss": 0.1871, "step": 10164 }, { "epoch": 0.018025158269790485, "grad_norm": 0.94140625, "learning_rate": 0.0019589878763317872, "loss": 0.3038, "step": 10166 }, { "epoch": 0.0180287044351003, "grad_norm": 0.64453125, "learning_rate": 0.001958970112198368, "loss": 0.2151, "step": 10168 }, { "epoch": 0.018032250600410114, "grad_norm": 0.75, "learning_rate": 0.0019589523443083122, "loss": 0.2968, "step": 10170 }, { "epoch": 0.018035796765719928, "grad_norm": 0.44140625, "learning_rate": 0.0019589345726616974, "loss": 0.2659, "step": 10172 }, { "epoch": 0.018039342931029743, "grad_norm": 1.6953125, "learning_rate": 0.0019589167972586022, "loss": 0.2288, "step": 10174 }, { "epoch": 0.01804288909633956, "grad_norm": 3.46875, "learning_rate": 0.001958899018099104, "loss": 0.2663, "step": 10176 }, { "epoch": 0.018046435261649375, "grad_norm": 0.8359375, "learning_rate": 0.0019588812351832795, "loss": 0.1911, "step": 10178 }, { "epoch": 0.01804998142695919, "grad_norm": 1.1015625, "learning_rate": 0.001958863448511208, "loss": 0.2843, "step": 10180 }, { "epoch": 0.018053527592269004, "grad_norm": 0.9140625, "learning_rate": 0.001958845658082967, "loss": 0.2965, "step": 10182 }, { "epoch": 0.01805707375757882, "grad_norm": 0.87109375, "learning_rate": 0.0019588278638986334, "loss": 0.1668, "step": 10184 }, { "epoch": 0.018060619922888637, "grad_norm": 1.03125, "learning_rate": 0.0019588100659582858, "loss": 0.4837, "step": 10186 }, { "epoch": 0.01806416608819845, "grad_norm": 0.451171875, "learning_rate": 0.0019587922642620016, "loss": 0.1943, "step": 10188 }, { "epoch": 0.018067712253508266, "grad_norm": 0.306640625, "learning_rate": 0.0019587744588098594, "loss": 0.1822, "step": 10190 }, { "epoch": 0.01807125841881808, "grad_norm": 0.3828125, "learning_rate": 0.001958756649601936, "loss": 0.169, "step": 10192 }, { "epoch": 0.018074804584127895, "grad_norm": 0.466796875, "learning_rate": 0.00195873883663831, "loss": 0.201, "step": 10194 }, { "epoch": 0.01807835074943771, "grad_norm": 1.3828125, "learning_rate": 0.0019587210199190595, "loss": 0.2239, "step": 10196 }, { "epoch": 0.018081896914747528, "grad_norm": 0.2275390625, "learning_rate": 0.0019587031994442615, "loss": 0.2852, "step": 10198 }, { "epoch": 0.018085443080057342, "grad_norm": 0.5625, "learning_rate": 0.0019586853752139952, "loss": 0.2208, "step": 10200 }, { "epoch": 0.018088989245367157, "grad_norm": 1.7265625, "learning_rate": 0.001958667547228337, "loss": 0.2622, "step": 10202 }, { "epoch": 0.01809253541067697, "grad_norm": 0.96875, "learning_rate": 0.001958649715487366, "loss": 0.176, "step": 10204 }, { "epoch": 0.018096081575986786, "grad_norm": 0.5859375, "learning_rate": 0.0019586318799911604, "loss": 0.2102, "step": 10206 }, { "epoch": 0.0180996277412966, "grad_norm": 0.80859375, "learning_rate": 0.0019586140407397974, "loss": 0.4113, "step": 10208 }, { "epoch": 0.01810317390660642, "grad_norm": 0.42578125, "learning_rate": 0.0019585961977333552, "loss": 0.245, "step": 10210 }, { "epoch": 0.018106720071916233, "grad_norm": 3.78125, "learning_rate": 0.001958578350971912, "loss": 0.3333, "step": 10212 }, { "epoch": 0.018110266237226048, "grad_norm": 0.6796875, "learning_rate": 0.001958560500455546, "loss": 0.2492, "step": 10214 }, { "epoch": 0.018113812402535862, "grad_norm": 0.453125, "learning_rate": 0.001958542646184335, "loss": 0.2149, "step": 10216 }, { "epoch": 0.018117358567845677, "grad_norm": 0.73828125, "learning_rate": 0.001958524788158357, "loss": 0.2814, "step": 10218 }, { "epoch": 0.018120904733155495, "grad_norm": 0.65234375, "learning_rate": 0.0019585069263776903, "loss": 0.277, "step": 10220 }, { "epoch": 0.01812445089846531, "grad_norm": 0.51171875, "learning_rate": 0.001958489060842413, "loss": 0.3089, "step": 10222 }, { "epoch": 0.018127997063775124, "grad_norm": 1.40625, "learning_rate": 0.0019584711915526035, "loss": 0.2348, "step": 10224 }, { "epoch": 0.01813154322908494, "grad_norm": 0.73828125, "learning_rate": 0.0019584533185083393, "loss": 0.2089, "step": 10226 }, { "epoch": 0.018135089394394753, "grad_norm": 0.63671875, "learning_rate": 0.0019584354417096993, "loss": 0.2093, "step": 10228 }, { "epoch": 0.018138635559704568, "grad_norm": 0.76953125, "learning_rate": 0.001958417561156761, "loss": 0.3352, "step": 10230 }, { "epoch": 0.018142181725014386, "grad_norm": 0.71484375, "learning_rate": 0.0019583996768496033, "loss": 0.2888, "step": 10232 }, { "epoch": 0.0181457278903242, "grad_norm": 1.0, "learning_rate": 0.0019583817887883037, "loss": 0.328, "step": 10234 }, { "epoch": 0.018149274055634015, "grad_norm": 0.255859375, "learning_rate": 0.0019583638969729407, "loss": 0.1834, "step": 10236 }, { "epoch": 0.01815282022094383, "grad_norm": 1.7265625, "learning_rate": 0.0019583460014035927, "loss": 0.2001, "step": 10238 }, { "epoch": 0.018156366386253644, "grad_norm": 0.326171875, "learning_rate": 0.0019583281020803377, "loss": 0.2567, "step": 10240 }, { "epoch": 0.01815991255156346, "grad_norm": 0.73828125, "learning_rate": 0.0019583101990032544, "loss": 0.2188, "step": 10242 }, { "epoch": 0.018163458716873276, "grad_norm": 1.8671875, "learning_rate": 0.0019582922921724207, "loss": 0.3756, "step": 10244 }, { "epoch": 0.01816700488218309, "grad_norm": 1.2265625, "learning_rate": 0.001958274381587915, "loss": 0.2334, "step": 10246 }, { "epoch": 0.018170551047492906, "grad_norm": 2.109375, "learning_rate": 0.0019582564672498163, "loss": 0.4077, "step": 10248 }, { "epoch": 0.01817409721280272, "grad_norm": 0.44140625, "learning_rate": 0.001958238549158202, "loss": 0.1877, "step": 10250 }, { "epoch": 0.018177643378112535, "grad_norm": 0.59375, "learning_rate": 0.0019582206273131507, "loss": 0.2476, "step": 10252 }, { "epoch": 0.018181189543422353, "grad_norm": 0.51171875, "learning_rate": 0.0019582027017147406, "loss": 0.176, "step": 10254 }, { "epoch": 0.018184735708732167, "grad_norm": 0.4375, "learning_rate": 0.001958184772363051, "loss": 0.36, "step": 10256 }, { "epoch": 0.018188281874041982, "grad_norm": 0.6171875, "learning_rate": 0.0019581668392581594, "loss": 0.2001, "step": 10258 }, { "epoch": 0.018191828039351796, "grad_norm": 0.353515625, "learning_rate": 0.0019581489024001445, "loss": 0.215, "step": 10260 }, { "epoch": 0.01819537420466161, "grad_norm": 1.6875, "learning_rate": 0.0019581309617890848, "loss": 0.2457, "step": 10262 }, { "epoch": 0.018198920369971425, "grad_norm": 0.3359375, "learning_rate": 0.001958113017425059, "loss": 0.2749, "step": 10264 }, { "epoch": 0.018202466535281243, "grad_norm": 1.0234375, "learning_rate": 0.001958095069308145, "loss": 0.235, "step": 10266 }, { "epoch": 0.018206012700591058, "grad_norm": 1.4921875, "learning_rate": 0.0019580771174384217, "loss": 0.2405, "step": 10268 }, { "epoch": 0.018209558865900873, "grad_norm": 0.216796875, "learning_rate": 0.001958059161815967, "loss": 0.2308, "step": 10270 }, { "epoch": 0.018213105031210687, "grad_norm": 0.474609375, "learning_rate": 0.0019580412024408607, "loss": 0.4021, "step": 10272 }, { "epoch": 0.0182166511965205, "grad_norm": 0.51171875, "learning_rate": 0.0019580232393131805, "loss": 0.1867, "step": 10274 }, { "epoch": 0.018220197361830316, "grad_norm": 0.28515625, "learning_rate": 0.001958005272433005, "loss": 0.2155, "step": 10276 }, { "epoch": 0.018223743527140134, "grad_norm": 0.21875, "learning_rate": 0.001957987301800413, "loss": 0.2168, "step": 10278 }, { "epoch": 0.01822728969244995, "grad_norm": 0.478515625, "learning_rate": 0.0019579693274154823, "loss": 0.1634, "step": 10280 }, { "epoch": 0.018230835857759763, "grad_norm": 0.54296875, "learning_rate": 0.0019579513492782924, "loss": 0.2428, "step": 10282 }, { "epoch": 0.018234382023069578, "grad_norm": 0.7578125, "learning_rate": 0.0019579333673889216, "loss": 0.2462, "step": 10284 }, { "epoch": 0.018237928188379392, "grad_norm": 0.51953125, "learning_rate": 0.0019579153817474485, "loss": 0.2854, "step": 10286 }, { "epoch": 0.01824147435368921, "grad_norm": 1.4609375, "learning_rate": 0.0019578973923539524, "loss": 0.368, "step": 10288 }, { "epoch": 0.018245020518999025, "grad_norm": 0.2734375, "learning_rate": 0.001957879399208511, "loss": 0.2404, "step": 10290 }, { "epoch": 0.01824856668430884, "grad_norm": 0.3046875, "learning_rate": 0.001957861402311204, "loss": 0.3172, "step": 10292 }, { "epoch": 0.018252112849618654, "grad_norm": 0.453125, "learning_rate": 0.0019578434016621084, "loss": 0.1834, "step": 10294 }, { "epoch": 0.01825565901492847, "grad_norm": 0.70703125, "learning_rate": 0.001957825397261305, "loss": 0.2276, "step": 10296 }, { "epoch": 0.018259205180238283, "grad_norm": 0.70703125, "learning_rate": 0.001957807389108871, "loss": 0.2495, "step": 10298 }, { "epoch": 0.0182627513455481, "grad_norm": 0.31640625, "learning_rate": 0.0019577893772048864, "loss": 0.244, "step": 10300 }, { "epoch": 0.018266297510857916, "grad_norm": 0.46484375, "learning_rate": 0.001957771361549429, "loss": 0.2325, "step": 10302 }, { "epoch": 0.01826984367616773, "grad_norm": 0.5625, "learning_rate": 0.0019577533421425777, "loss": 0.2792, "step": 10304 }, { "epoch": 0.018273389841477545, "grad_norm": 0.6328125, "learning_rate": 0.0019577353189844117, "loss": 0.2202, "step": 10306 }, { "epoch": 0.01827693600678736, "grad_norm": 0.546875, "learning_rate": 0.00195771729207501, "loss": 0.1957, "step": 10308 }, { "epoch": 0.018280482172097174, "grad_norm": 1.046875, "learning_rate": 0.0019576992614144507, "loss": 0.1921, "step": 10310 }, { "epoch": 0.018284028337406992, "grad_norm": 0.314453125, "learning_rate": 0.001957681227002813, "loss": 0.2769, "step": 10312 }, { "epoch": 0.018287574502716807, "grad_norm": 1.0234375, "learning_rate": 0.001957663188840176, "loss": 0.2295, "step": 10314 }, { "epoch": 0.01829112066802662, "grad_norm": 0.3984375, "learning_rate": 0.0019576451469266185, "loss": 0.2137, "step": 10316 }, { "epoch": 0.018294666833336436, "grad_norm": 0.55078125, "learning_rate": 0.001957627101262219, "loss": 0.4195, "step": 10318 }, { "epoch": 0.01829821299864625, "grad_norm": 0.57421875, "learning_rate": 0.001957609051847057, "loss": 0.3452, "step": 10320 }, { "epoch": 0.018301759163956068, "grad_norm": 0.3984375, "learning_rate": 0.001957590998681211, "loss": 0.3118, "step": 10322 }, { "epoch": 0.018305305329265883, "grad_norm": 0.9765625, "learning_rate": 0.0019575729417647602, "loss": 0.2397, "step": 10324 }, { "epoch": 0.018308851494575697, "grad_norm": 0.6015625, "learning_rate": 0.001957554881097783, "loss": 0.2115, "step": 10326 }, { "epoch": 0.018312397659885512, "grad_norm": 0.78125, "learning_rate": 0.0019575368166803594, "loss": 0.2911, "step": 10328 }, { "epoch": 0.018315943825195326, "grad_norm": 1.2734375, "learning_rate": 0.001957518748512568, "loss": 0.251, "step": 10330 }, { "epoch": 0.01831948999050514, "grad_norm": 0.5390625, "learning_rate": 0.0019575006765944875, "loss": 0.2515, "step": 10332 }, { "epoch": 0.01832303615581496, "grad_norm": 0.8828125, "learning_rate": 0.001957482600926197, "loss": 0.2356, "step": 10334 }, { "epoch": 0.018326582321124774, "grad_norm": 1.5859375, "learning_rate": 0.0019574645215077757, "loss": 0.2841, "step": 10336 }, { "epoch": 0.018330128486434588, "grad_norm": 0.373046875, "learning_rate": 0.001957446438339303, "loss": 0.2069, "step": 10338 }, { "epoch": 0.018333674651744403, "grad_norm": 0.349609375, "learning_rate": 0.0019574283514208575, "loss": 0.2113, "step": 10340 }, { "epoch": 0.018337220817054217, "grad_norm": 5.59375, "learning_rate": 0.001957410260752518, "loss": 0.2449, "step": 10342 }, { "epoch": 0.018340766982364032, "grad_norm": 0.515625, "learning_rate": 0.0019573921663343648, "loss": 0.1909, "step": 10344 }, { "epoch": 0.01834431314767385, "grad_norm": 0.63671875, "learning_rate": 0.0019573740681664755, "loss": 0.2399, "step": 10346 }, { "epoch": 0.018347859312983664, "grad_norm": 0.5703125, "learning_rate": 0.0019573559662489303, "loss": 0.2411, "step": 10348 }, { "epoch": 0.01835140547829348, "grad_norm": 2.9375, "learning_rate": 0.0019573378605818085, "loss": 0.4718, "step": 10350 }, { "epoch": 0.018354951643603293, "grad_norm": 0.26953125, "learning_rate": 0.001957319751165189, "loss": 0.2428, "step": 10352 }, { "epoch": 0.018358497808913108, "grad_norm": 2.59375, "learning_rate": 0.0019573016379991503, "loss": 0.215, "step": 10354 }, { "epoch": 0.018362043974222926, "grad_norm": 0.6875, "learning_rate": 0.0019572835210837727, "loss": 0.2647, "step": 10356 }, { "epoch": 0.01836559013953274, "grad_norm": 0.6015625, "learning_rate": 0.0019572654004191346, "loss": 0.2356, "step": 10358 }, { "epoch": 0.018369136304842555, "grad_norm": 0.5859375, "learning_rate": 0.001957247276005316, "loss": 0.1624, "step": 10360 }, { "epoch": 0.01837268247015237, "grad_norm": 0.72265625, "learning_rate": 0.0019572291478423954, "loss": 0.1786, "step": 10362 }, { "epoch": 0.018376228635462184, "grad_norm": 0.671875, "learning_rate": 0.001957211015930452, "loss": 0.2001, "step": 10364 }, { "epoch": 0.018379774800772, "grad_norm": 0.90625, "learning_rate": 0.001957192880269567, "loss": 0.3353, "step": 10366 }, { "epoch": 0.018383320966081817, "grad_norm": 0.5546875, "learning_rate": 0.001957174740859817, "loss": 0.1889, "step": 10368 }, { "epoch": 0.01838686713139163, "grad_norm": 1.1171875, "learning_rate": 0.001957156597701283, "loss": 0.3182, "step": 10370 }, { "epoch": 0.018390413296701446, "grad_norm": 0.53515625, "learning_rate": 0.0019571384507940438, "loss": 0.2011, "step": 10372 }, { "epoch": 0.01839395946201126, "grad_norm": 0.259765625, "learning_rate": 0.0019571203001381788, "loss": 0.215, "step": 10374 }, { "epoch": 0.018397505627321075, "grad_norm": 0.3984375, "learning_rate": 0.001957102145733768, "loss": 0.1834, "step": 10376 }, { "epoch": 0.01840105179263089, "grad_norm": 0.69140625, "learning_rate": 0.0019570839875808895, "loss": 0.5054, "step": 10378 }, { "epoch": 0.018404597957940708, "grad_norm": 0.32421875, "learning_rate": 0.001957065825679624, "loss": 0.1915, "step": 10380 }, { "epoch": 0.018408144123250522, "grad_norm": 0.82421875, "learning_rate": 0.0019570476600300505, "loss": 0.2373, "step": 10382 }, { "epoch": 0.018411690288560337, "grad_norm": 0.6171875, "learning_rate": 0.0019570294906322483, "loss": 0.2582, "step": 10384 }, { "epoch": 0.01841523645387015, "grad_norm": 0.388671875, "learning_rate": 0.0019570113174862966, "loss": 0.2506, "step": 10386 }, { "epoch": 0.018418782619179966, "grad_norm": 1.1640625, "learning_rate": 0.0019569931405922754, "loss": 0.2311, "step": 10388 }, { "epoch": 0.018422328784489784, "grad_norm": 0.7109375, "learning_rate": 0.0019569749599502645, "loss": 0.2086, "step": 10390 }, { "epoch": 0.0184258749497996, "grad_norm": 0.60546875, "learning_rate": 0.0019569567755603422, "loss": 0.2449, "step": 10392 }, { "epoch": 0.018429421115109413, "grad_norm": 0.3515625, "learning_rate": 0.001956938587422589, "loss": 0.1607, "step": 10394 }, { "epoch": 0.018432967280419228, "grad_norm": 0.65625, "learning_rate": 0.0019569203955370844, "loss": 0.2871, "step": 10396 }, { "epoch": 0.018436513445729042, "grad_norm": 1.3515625, "learning_rate": 0.001956902199903907, "loss": 0.2812, "step": 10398 }, { "epoch": 0.018440059611038857, "grad_norm": 0.484375, "learning_rate": 0.0019568840005231383, "loss": 0.223, "step": 10400 }, { "epoch": 0.018443605776348675, "grad_norm": 0.6796875, "learning_rate": 0.001956865797394856, "loss": 0.2565, "step": 10402 }, { "epoch": 0.01844715194165849, "grad_norm": 0.45703125, "learning_rate": 0.0019568475905191404, "loss": 0.2083, "step": 10404 }, { "epoch": 0.018450698106968304, "grad_norm": 0.59375, "learning_rate": 0.0019568293798960714, "loss": 0.2588, "step": 10406 }, { "epoch": 0.01845424427227812, "grad_norm": 0.75390625, "learning_rate": 0.0019568111655257282, "loss": 0.2578, "step": 10408 }, { "epoch": 0.018457790437587933, "grad_norm": 0.7890625, "learning_rate": 0.001956792947408191, "loss": 0.2195, "step": 10410 }, { "epoch": 0.018461336602897747, "grad_norm": 0.21484375, "learning_rate": 0.0019567747255435385, "loss": 0.2523, "step": 10412 }, { "epoch": 0.018464882768207565, "grad_norm": 0.546875, "learning_rate": 0.0019567564999318516, "loss": 0.2143, "step": 10414 }, { "epoch": 0.01846842893351738, "grad_norm": 0.353515625, "learning_rate": 0.001956738270573209, "loss": 0.248, "step": 10416 }, { "epoch": 0.018471975098827195, "grad_norm": 1.0703125, "learning_rate": 0.001956720037467691, "loss": 0.2287, "step": 10418 }, { "epoch": 0.01847552126413701, "grad_norm": 0.94921875, "learning_rate": 0.0019567018006153777, "loss": 0.2996, "step": 10420 }, { "epoch": 0.018479067429446824, "grad_norm": 0.8125, "learning_rate": 0.001956683560016348, "loss": 0.1991, "step": 10422 }, { "epoch": 0.01848261359475664, "grad_norm": 0.271484375, "learning_rate": 0.001956665315670682, "loss": 0.1997, "step": 10424 }, { "epoch": 0.018486159760066456, "grad_norm": 0.435546875, "learning_rate": 0.0019566470675784595, "loss": 0.2157, "step": 10426 }, { "epoch": 0.01848970592537627, "grad_norm": 0.765625, "learning_rate": 0.00195662881573976, "loss": 0.2223, "step": 10428 }, { "epoch": 0.018493252090686085, "grad_norm": 1.109375, "learning_rate": 0.001956610560154664, "loss": 0.1762, "step": 10430 }, { "epoch": 0.0184967982559959, "grad_norm": 0.6328125, "learning_rate": 0.001956592300823251, "loss": 0.2002, "step": 10432 }, { "epoch": 0.018500344421305714, "grad_norm": 0.62109375, "learning_rate": 0.001956574037745601, "loss": 0.2455, "step": 10434 }, { "epoch": 0.018503890586615532, "grad_norm": 0.494140625, "learning_rate": 0.0019565557709217934, "loss": 0.2304, "step": 10436 }, { "epoch": 0.018507436751925347, "grad_norm": 1.5625, "learning_rate": 0.0019565375003519087, "loss": 0.5566, "step": 10438 }, { "epoch": 0.01851098291723516, "grad_norm": 1.1640625, "learning_rate": 0.001956519226036026, "loss": 0.3678, "step": 10440 }, { "epoch": 0.018514529082544976, "grad_norm": 0.55859375, "learning_rate": 0.0019565009479742264, "loss": 0.2132, "step": 10442 }, { "epoch": 0.01851807524785479, "grad_norm": 0.546875, "learning_rate": 0.0019564826661665887, "loss": 0.2474, "step": 10444 }, { "epoch": 0.018521621413164605, "grad_norm": 0.435546875, "learning_rate": 0.0019564643806131935, "loss": 0.2052, "step": 10446 }, { "epoch": 0.018525167578474423, "grad_norm": 1.3046875, "learning_rate": 0.0019564460913141205, "loss": 0.2361, "step": 10448 }, { "epoch": 0.018528713743784238, "grad_norm": 0.3828125, "learning_rate": 0.0019564277982694494, "loss": 0.2359, "step": 10450 }, { "epoch": 0.018532259909094052, "grad_norm": 0.65625, "learning_rate": 0.0019564095014792614, "loss": 0.2016, "step": 10452 }, { "epoch": 0.018535806074403867, "grad_norm": 0.65234375, "learning_rate": 0.0019563912009436355, "loss": 0.3994, "step": 10454 }, { "epoch": 0.01853935223971368, "grad_norm": 0.6015625, "learning_rate": 0.0019563728966626517, "loss": 0.2731, "step": 10456 }, { "epoch": 0.0185428984050235, "grad_norm": 0.2490234375, "learning_rate": 0.00195635458863639, "loss": 0.2361, "step": 10458 }, { "epoch": 0.018546444570333314, "grad_norm": 0.52734375, "learning_rate": 0.0019563362768649315, "loss": 0.2998, "step": 10460 }, { "epoch": 0.01854999073564313, "grad_norm": 0.6015625, "learning_rate": 0.001956317961348355, "loss": 0.2047, "step": 10462 }, { "epoch": 0.018553536900952943, "grad_norm": 2.0625, "learning_rate": 0.0019562996420867413, "loss": 0.3433, "step": 10464 }, { "epoch": 0.018557083066262758, "grad_norm": 0.61328125, "learning_rate": 0.0019562813190801705, "loss": 0.1959, "step": 10466 }, { "epoch": 0.018560629231572572, "grad_norm": 0.478515625, "learning_rate": 0.001956262992328722, "loss": 0.2983, "step": 10468 }, { "epoch": 0.01856417539688239, "grad_norm": 0.2890625, "learning_rate": 0.001956244661832477, "loss": 0.1771, "step": 10470 }, { "epoch": 0.018567721562192205, "grad_norm": 0.365234375, "learning_rate": 0.0019562263275915157, "loss": 0.2537, "step": 10472 }, { "epoch": 0.01857126772750202, "grad_norm": 0.53515625, "learning_rate": 0.001956207989605917, "loss": 0.1784, "step": 10474 }, { "epoch": 0.018574813892811834, "grad_norm": 0.5859375, "learning_rate": 0.0019561896478757623, "loss": 0.1929, "step": 10476 }, { "epoch": 0.01857836005812165, "grad_norm": 1.8515625, "learning_rate": 0.0019561713024011315, "loss": 0.3238, "step": 10478 }, { "epoch": 0.018581906223431463, "grad_norm": 0.3203125, "learning_rate": 0.0019561529531821045, "loss": 0.2495, "step": 10480 }, { "epoch": 0.01858545238874128, "grad_norm": 0.41015625, "learning_rate": 0.001956134600218762, "loss": 0.202, "step": 10482 }, { "epoch": 0.018588998554051096, "grad_norm": 0.7578125, "learning_rate": 0.001956116243511184, "loss": 0.208, "step": 10484 }, { "epoch": 0.01859254471936091, "grad_norm": 0.80078125, "learning_rate": 0.0019560978830594506, "loss": 0.2025, "step": 10486 }, { "epoch": 0.018596090884670725, "grad_norm": 1.53125, "learning_rate": 0.0019560795188636425, "loss": 0.2007, "step": 10488 }, { "epoch": 0.01859963704998054, "grad_norm": 0.44921875, "learning_rate": 0.0019560611509238397, "loss": 0.2096, "step": 10490 }, { "epoch": 0.018603183215290357, "grad_norm": 1.5546875, "learning_rate": 0.001956042779240123, "loss": 0.2777, "step": 10492 }, { "epoch": 0.018606729380600172, "grad_norm": 0.58984375, "learning_rate": 0.001956024403812572, "loss": 0.3095, "step": 10494 }, { "epoch": 0.018610275545909986, "grad_norm": 1.296875, "learning_rate": 0.001956006024641268, "loss": 0.41, "step": 10496 }, { "epoch": 0.0186138217112198, "grad_norm": 0.5859375, "learning_rate": 0.0019559876417262908, "loss": 0.1939, "step": 10498 }, { "epoch": 0.018617367876529616, "grad_norm": 2.125, "learning_rate": 0.0019559692550677205, "loss": 0.2805, "step": 10500 }, { "epoch": 0.01862091404183943, "grad_norm": 0.5859375, "learning_rate": 0.0019559508646656383, "loss": 0.2347, "step": 10502 }, { "epoch": 0.018624460207149248, "grad_norm": 0.83203125, "learning_rate": 0.0019559324705201242, "loss": 0.2676, "step": 10504 }, { "epoch": 0.018628006372459063, "grad_norm": 0.69140625, "learning_rate": 0.001955914072631258, "loss": 0.2571, "step": 10506 }, { "epoch": 0.018631552537768877, "grad_norm": 0.248046875, "learning_rate": 0.0019558956709991217, "loss": 0.1935, "step": 10508 }, { "epoch": 0.01863509870307869, "grad_norm": 0.478515625, "learning_rate": 0.0019558772656237946, "loss": 0.2045, "step": 10510 }, { "epoch": 0.018638644868388506, "grad_norm": 0.3203125, "learning_rate": 0.001955858856505357, "loss": 0.1858, "step": 10512 }, { "epoch": 0.01864219103369832, "grad_norm": 0.44140625, "learning_rate": 0.0019558404436438906, "loss": 0.1759, "step": 10514 }, { "epoch": 0.01864573719900814, "grad_norm": 0.322265625, "learning_rate": 0.0019558220270394747, "loss": 0.2053, "step": 10516 }, { "epoch": 0.018649283364317953, "grad_norm": 0.3125, "learning_rate": 0.0019558036066921907, "loss": 0.2209, "step": 10518 }, { "epoch": 0.018652829529627768, "grad_norm": 0.90234375, "learning_rate": 0.0019557851826021186, "loss": 0.2606, "step": 10520 }, { "epoch": 0.018656375694937583, "grad_norm": 0.40625, "learning_rate": 0.0019557667547693393, "loss": 0.2002, "step": 10522 }, { "epoch": 0.018659921860247397, "grad_norm": 0.3359375, "learning_rate": 0.0019557483231939336, "loss": 0.1602, "step": 10524 }, { "epoch": 0.018663468025557215, "grad_norm": 0.375, "learning_rate": 0.0019557298878759813, "loss": 0.2976, "step": 10526 }, { "epoch": 0.01866701419086703, "grad_norm": 0.68359375, "learning_rate": 0.001955711448815564, "loss": 0.2863, "step": 10528 }, { "epoch": 0.018670560356176844, "grad_norm": 8.375, "learning_rate": 0.001955693006012762, "loss": 0.3923, "step": 10530 }, { "epoch": 0.01867410652148666, "grad_norm": 0.515625, "learning_rate": 0.001955674559467655, "loss": 0.1933, "step": 10532 }, { "epoch": 0.018677652686796473, "grad_norm": 0.77734375, "learning_rate": 0.0019556561091803254, "loss": 0.1943, "step": 10534 }, { "epoch": 0.018681198852106288, "grad_norm": 0.392578125, "learning_rate": 0.0019556376551508525, "loss": 0.1805, "step": 10536 }, { "epoch": 0.018684745017416106, "grad_norm": 0.83984375, "learning_rate": 0.0019556191973793178, "loss": 0.2526, "step": 10538 }, { "epoch": 0.01868829118272592, "grad_norm": 1.328125, "learning_rate": 0.0019556007358658015, "loss": 0.2645, "step": 10540 }, { "epoch": 0.018691837348035735, "grad_norm": 0.3828125, "learning_rate": 0.0019555822706103843, "loss": 0.2595, "step": 10542 }, { "epoch": 0.01869538351334555, "grad_norm": 0.4375, "learning_rate": 0.001955563801613148, "loss": 0.2784, "step": 10544 }, { "epoch": 0.018698929678655364, "grad_norm": 4.9375, "learning_rate": 0.001955545328874172, "loss": 0.388, "step": 10546 }, { "epoch": 0.01870247584396518, "grad_norm": 0.55859375, "learning_rate": 0.001955526852393538, "loss": 0.2246, "step": 10548 }, { "epoch": 0.018706022009274997, "grad_norm": 0.439453125, "learning_rate": 0.001955508372171326, "loss": 0.258, "step": 10550 }, { "epoch": 0.01870956817458481, "grad_norm": 1.265625, "learning_rate": 0.001955489888207618, "loss": 0.4772, "step": 10552 }, { "epoch": 0.018713114339894626, "grad_norm": 1.03125, "learning_rate": 0.001955471400502494, "loss": 0.207, "step": 10554 }, { "epoch": 0.01871666050520444, "grad_norm": 0.84765625, "learning_rate": 0.0019554529090560348, "loss": 0.2586, "step": 10556 }, { "epoch": 0.018720206670514255, "grad_norm": 0.56640625, "learning_rate": 0.0019554344138683214, "loss": 0.2212, "step": 10558 }, { "epoch": 0.018723752835824073, "grad_norm": 1.875, "learning_rate": 0.001955415914939435, "loss": 0.3548, "step": 10560 }, { "epoch": 0.018727299001133887, "grad_norm": 0.435546875, "learning_rate": 0.001955397412269456, "loss": 0.1997, "step": 10562 }, { "epoch": 0.018730845166443702, "grad_norm": 1.125, "learning_rate": 0.0019553789058584657, "loss": 0.2845, "step": 10564 }, { "epoch": 0.018734391331753517, "grad_norm": 0.9453125, "learning_rate": 0.0019553603957065454, "loss": 0.1796, "step": 10566 }, { "epoch": 0.01873793749706333, "grad_norm": 0.2373046875, "learning_rate": 0.001955341881813775, "loss": 0.247, "step": 10568 }, { "epoch": 0.018741483662373146, "grad_norm": 0.349609375, "learning_rate": 0.0019553233641802364, "loss": 0.2784, "step": 10570 }, { "epoch": 0.018745029827682964, "grad_norm": 0.490234375, "learning_rate": 0.00195530484280601, "loss": 0.1938, "step": 10572 }, { "epoch": 0.018748575992992778, "grad_norm": 0.62109375, "learning_rate": 0.001955286317691177, "loss": 0.2325, "step": 10574 }, { "epoch": 0.018752122158302593, "grad_norm": 0.486328125, "learning_rate": 0.0019552677888358184, "loss": 0.277, "step": 10576 }, { "epoch": 0.018755668323612407, "grad_norm": 0.439453125, "learning_rate": 0.001955249256240016, "loss": 0.1885, "step": 10578 }, { "epoch": 0.018759214488922222, "grad_norm": 0.85546875, "learning_rate": 0.0019552307199038493, "loss": 0.2588, "step": 10580 }, { "epoch": 0.018762760654232036, "grad_norm": 0.458984375, "learning_rate": 0.0019552121798274004, "loss": 0.1862, "step": 10582 }, { "epoch": 0.018766306819541854, "grad_norm": 1.671875, "learning_rate": 0.00195519363601075, "loss": 0.2548, "step": 10584 }, { "epoch": 0.01876985298485167, "grad_norm": 0.345703125, "learning_rate": 0.00195517508845398, "loss": 0.2568, "step": 10586 }, { "epoch": 0.018773399150161484, "grad_norm": 0.236328125, "learning_rate": 0.0019551565371571707, "loss": 0.2379, "step": 10588 }, { "epoch": 0.018776945315471298, "grad_norm": 0.455078125, "learning_rate": 0.0019551379821204033, "loss": 0.2563, "step": 10590 }, { "epoch": 0.018780491480781113, "grad_norm": 0.8828125, "learning_rate": 0.001955119423343759, "loss": 0.2551, "step": 10592 }, { "epoch": 0.01878403764609093, "grad_norm": 0.359375, "learning_rate": 0.001955100860827319, "loss": 0.22, "step": 10594 }, { "epoch": 0.018787583811400745, "grad_norm": 0.76953125, "learning_rate": 0.001955082294571165, "loss": 0.2162, "step": 10596 }, { "epoch": 0.01879112997671056, "grad_norm": 0.515625, "learning_rate": 0.0019550637245753775, "loss": 0.3297, "step": 10598 }, { "epoch": 0.018794676142020374, "grad_norm": 1.0390625, "learning_rate": 0.001955045150840038, "loss": 0.2616, "step": 10600 }, { "epoch": 0.01879822230733019, "grad_norm": 0.6796875, "learning_rate": 0.0019550265733652276, "loss": 0.2574, "step": 10602 }, { "epoch": 0.018801768472640003, "grad_norm": 0.83203125, "learning_rate": 0.0019550079921510275, "loss": 0.2556, "step": 10604 }, { "epoch": 0.01880531463794982, "grad_norm": 0.306640625, "learning_rate": 0.001954989407197519, "loss": 0.2032, "step": 10606 }, { "epoch": 0.018808860803259636, "grad_norm": 1.4375, "learning_rate": 0.001954970818504784, "loss": 0.2785, "step": 10608 }, { "epoch": 0.01881240696856945, "grad_norm": 0.92578125, "learning_rate": 0.001954952226072903, "loss": 0.2119, "step": 10610 }, { "epoch": 0.018815953133879265, "grad_norm": 0.376953125, "learning_rate": 0.0019549336299019573, "loss": 0.2636, "step": 10612 }, { "epoch": 0.01881949929918908, "grad_norm": 0.310546875, "learning_rate": 0.0019549150299920286, "loss": 0.3351, "step": 10614 }, { "epoch": 0.018823045464498894, "grad_norm": 0.57421875, "learning_rate": 0.0019548964263431984, "loss": 0.1953, "step": 10616 }, { "epoch": 0.018826591629808712, "grad_norm": 0.41796875, "learning_rate": 0.0019548778189555477, "loss": 0.2252, "step": 10618 }, { "epoch": 0.018830137795118527, "grad_norm": 1.4453125, "learning_rate": 0.001954859207829158, "loss": 0.2535, "step": 10620 }, { "epoch": 0.01883368396042834, "grad_norm": 0.46484375, "learning_rate": 0.0019548405929641108, "loss": 0.2568, "step": 10622 }, { "epoch": 0.018837230125738156, "grad_norm": 0.76171875, "learning_rate": 0.001954821974360487, "loss": 0.2194, "step": 10624 }, { "epoch": 0.01884077629104797, "grad_norm": 3.609375, "learning_rate": 0.0019548033520183686, "loss": 0.3385, "step": 10626 }, { "epoch": 0.01884432245635779, "grad_norm": 0.64453125, "learning_rate": 0.001954784725937837, "loss": 0.1935, "step": 10628 }, { "epoch": 0.018847868621667603, "grad_norm": 0.625, "learning_rate": 0.0019547660961189736, "loss": 0.344, "step": 10630 }, { "epoch": 0.018851414786977418, "grad_norm": 0.31640625, "learning_rate": 0.0019547474625618596, "loss": 0.1781, "step": 10632 }, { "epoch": 0.018854960952287232, "grad_norm": 0.7578125, "learning_rate": 0.0019547288252665766, "loss": 0.2042, "step": 10634 }, { "epoch": 0.018858507117597047, "grad_norm": 0.515625, "learning_rate": 0.0019547101842332065, "loss": 0.2229, "step": 10636 }, { "epoch": 0.01886205328290686, "grad_norm": 0.59375, "learning_rate": 0.0019546915394618304, "loss": 0.2343, "step": 10638 }, { "epoch": 0.01886559944821668, "grad_norm": 1.8046875, "learning_rate": 0.0019546728909525302, "loss": 0.4387, "step": 10640 }, { "epoch": 0.018869145613526494, "grad_norm": 0.365234375, "learning_rate": 0.0019546542387053867, "loss": 0.1795, "step": 10642 }, { "epoch": 0.01887269177883631, "grad_norm": 0.60546875, "learning_rate": 0.0019546355827204827, "loss": 0.2605, "step": 10644 }, { "epoch": 0.018876237944146123, "grad_norm": 1.03125, "learning_rate": 0.0019546169229978983, "loss": 0.3554, "step": 10646 }, { "epoch": 0.018879784109455938, "grad_norm": 0.68359375, "learning_rate": 0.0019545982595377165, "loss": 0.2676, "step": 10648 }, { "epoch": 0.018883330274765752, "grad_norm": 0.72265625, "learning_rate": 0.001954579592340018, "loss": 0.2549, "step": 10650 }, { "epoch": 0.01888687644007557, "grad_norm": 1.9453125, "learning_rate": 0.001954560921404885, "loss": 0.2426, "step": 10652 }, { "epoch": 0.018890422605385385, "grad_norm": 0.96484375, "learning_rate": 0.001954542246732399, "loss": 0.303, "step": 10654 }, { "epoch": 0.0188939687706952, "grad_norm": 0.5078125, "learning_rate": 0.0019545235683226412, "loss": 0.2656, "step": 10656 }, { "epoch": 0.018897514936005014, "grad_norm": 0.3046875, "learning_rate": 0.0019545048861756937, "loss": 0.2267, "step": 10658 }, { "epoch": 0.01890106110131483, "grad_norm": 0.7421875, "learning_rate": 0.0019544862002916384, "loss": 0.3153, "step": 10660 }, { "epoch": 0.018904607266624646, "grad_norm": 0.31640625, "learning_rate": 0.001954467510670557, "loss": 0.1953, "step": 10662 }, { "epoch": 0.01890815343193446, "grad_norm": 0.953125, "learning_rate": 0.0019544488173125307, "loss": 0.291, "step": 10664 }, { "epoch": 0.018911699597244275, "grad_norm": 0.244140625, "learning_rate": 0.001954430120217642, "loss": 0.283, "step": 10666 }, { "epoch": 0.01891524576255409, "grad_norm": 0.65234375, "learning_rate": 0.0019544114193859713, "loss": 0.2134, "step": 10668 }, { "epoch": 0.018918791927863905, "grad_norm": 0.51953125, "learning_rate": 0.0019543927148176024, "loss": 0.2538, "step": 10670 }, { "epoch": 0.01892233809317372, "grad_norm": 0.4609375, "learning_rate": 0.0019543740065126156, "loss": 0.2825, "step": 10672 }, { "epoch": 0.018925884258483537, "grad_norm": 0.51953125, "learning_rate": 0.001954355294471093, "loss": 0.2298, "step": 10674 }, { "epoch": 0.01892943042379335, "grad_norm": 0.8984375, "learning_rate": 0.0019543365786931174, "loss": 0.2322, "step": 10676 }, { "epoch": 0.018932976589103166, "grad_norm": 0.546875, "learning_rate": 0.001954317859178769, "loss": 0.2352, "step": 10678 }, { "epoch": 0.01893652275441298, "grad_norm": 0.8828125, "learning_rate": 0.0019542991359281312, "loss": 0.2686, "step": 10680 }, { "epoch": 0.018940068919722795, "grad_norm": 0.36328125, "learning_rate": 0.0019542804089412846, "loss": 0.2246, "step": 10682 }, { "epoch": 0.01894361508503261, "grad_norm": 0.4609375, "learning_rate": 0.001954261678218312, "loss": 0.2252, "step": 10684 }, { "epoch": 0.018947161250342428, "grad_norm": 0.353515625, "learning_rate": 0.001954242943759295, "loss": 0.2093, "step": 10686 }, { "epoch": 0.018950707415652242, "grad_norm": 0.29296875, "learning_rate": 0.0019542242055643156, "loss": 0.2244, "step": 10688 }, { "epoch": 0.018954253580962057, "grad_norm": 0.75, "learning_rate": 0.001954205463633456, "loss": 0.2106, "step": 10690 }, { "epoch": 0.01895779974627187, "grad_norm": 0.5234375, "learning_rate": 0.0019541867179667972, "loss": 0.24, "step": 10692 }, { "epoch": 0.018961345911581686, "grad_norm": 1.046875, "learning_rate": 0.0019541679685644224, "loss": 0.2106, "step": 10694 }, { "epoch": 0.018964892076891504, "grad_norm": 1.3515625, "learning_rate": 0.001954149215426413, "loss": 0.2169, "step": 10696 }, { "epoch": 0.01896843824220132, "grad_norm": 0.60546875, "learning_rate": 0.0019541304585528507, "loss": 0.2906, "step": 10698 }, { "epoch": 0.018971984407511133, "grad_norm": 1.6953125, "learning_rate": 0.0019541116979438183, "loss": 0.2548, "step": 10700 }, { "epoch": 0.018975530572820948, "grad_norm": 0.48046875, "learning_rate": 0.001954092933599397, "loss": 0.1975, "step": 10702 }, { "epoch": 0.018979076738130762, "grad_norm": 1.265625, "learning_rate": 0.00195407416551967, "loss": 0.2005, "step": 10704 }, { "epoch": 0.018982622903440577, "grad_norm": 0.6015625, "learning_rate": 0.0019540553937047187, "loss": 0.3157, "step": 10706 }, { "epoch": 0.018986169068750395, "grad_norm": 0.482421875, "learning_rate": 0.0019540366181546244, "loss": 0.2572, "step": 10708 }, { "epoch": 0.01898971523406021, "grad_norm": 1.0, "learning_rate": 0.001954017838869471, "loss": 0.2349, "step": 10710 }, { "epoch": 0.018993261399370024, "grad_norm": 1.578125, "learning_rate": 0.001953999055849339, "loss": 0.2347, "step": 10712 }, { "epoch": 0.01899680756467984, "grad_norm": 0.5390625, "learning_rate": 0.001953980269094311, "loss": 0.2762, "step": 10714 }, { "epoch": 0.019000353729989653, "grad_norm": 1.7890625, "learning_rate": 0.00195396147860447, "loss": 0.2047, "step": 10716 }, { "epoch": 0.019003899895299468, "grad_norm": 0.390625, "learning_rate": 0.001953942684379897, "loss": 0.2115, "step": 10718 }, { "epoch": 0.019007446060609286, "grad_norm": 2.1875, "learning_rate": 0.0019539238864206753, "loss": 0.2352, "step": 10720 }, { "epoch": 0.0190109922259191, "grad_norm": 0.45703125, "learning_rate": 0.0019539050847268862, "loss": 0.1856, "step": 10722 }, { "epoch": 0.019014538391228915, "grad_norm": 1.6796875, "learning_rate": 0.001953886279298612, "loss": 0.2969, "step": 10724 }, { "epoch": 0.01901808455653873, "grad_norm": 0.828125, "learning_rate": 0.001953867470135936, "loss": 0.2008, "step": 10726 }, { "epoch": 0.019021630721848544, "grad_norm": 1.5703125, "learning_rate": 0.001953848657238939, "loss": 0.2502, "step": 10728 }, { "epoch": 0.019025176887158362, "grad_norm": 0.47265625, "learning_rate": 0.001953829840607704, "loss": 0.3565, "step": 10730 }, { "epoch": 0.019028723052468176, "grad_norm": 0.52734375, "learning_rate": 0.001953811020242313, "loss": 0.2566, "step": 10732 }, { "epoch": 0.01903226921777799, "grad_norm": 3.265625, "learning_rate": 0.001953792196142849, "loss": 0.3169, "step": 10734 }, { "epoch": 0.019035815383087806, "grad_norm": 0.95703125, "learning_rate": 0.001953773368309394, "loss": 0.2331, "step": 10736 }, { "epoch": 0.01903936154839762, "grad_norm": 0.61328125, "learning_rate": 0.00195375453674203, "loss": 0.1891, "step": 10738 }, { "epoch": 0.019042907713707435, "grad_norm": 3.265625, "learning_rate": 0.0019537357014408393, "loss": 0.2587, "step": 10740 }, { "epoch": 0.019046453879017253, "grad_norm": 0.486328125, "learning_rate": 0.001953716862405905, "loss": 0.2695, "step": 10742 }, { "epoch": 0.019050000044327067, "grad_norm": 0.302734375, "learning_rate": 0.001953698019637309, "loss": 0.3266, "step": 10744 }, { "epoch": 0.019053546209636882, "grad_norm": 1.140625, "learning_rate": 0.0019536791731351337, "loss": 0.2231, "step": 10746 }, { "epoch": 0.019057092374946696, "grad_norm": 0.796875, "learning_rate": 0.0019536603228994614, "loss": 0.296, "step": 10748 }, { "epoch": 0.01906063854025651, "grad_norm": 0.640625, "learning_rate": 0.0019536414689303745, "loss": 0.2146, "step": 10750 }, { "epoch": 0.019064184705566325, "grad_norm": 1.0625, "learning_rate": 0.001953622611227956, "loss": 0.3058, "step": 10752 }, { "epoch": 0.019067730870876144, "grad_norm": 0.30859375, "learning_rate": 0.001953603749792288, "loss": 0.3183, "step": 10754 }, { "epoch": 0.019071277036185958, "grad_norm": 0.310546875, "learning_rate": 0.001953584884623453, "loss": 0.199, "step": 10756 }, { "epoch": 0.019074823201495773, "grad_norm": 0.6640625, "learning_rate": 0.0019535660157215338, "loss": 0.2279, "step": 10758 }, { "epoch": 0.019078369366805587, "grad_norm": 0.359375, "learning_rate": 0.0019535471430866124, "loss": 0.2577, "step": 10760 }, { "epoch": 0.0190819155321154, "grad_norm": 0.3828125, "learning_rate": 0.0019535282667187716, "loss": 0.2329, "step": 10762 }, { "epoch": 0.01908546169742522, "grad_norm": 0.9375, "learning_rate": 0.001953509386618094, "loss": 0.2759, "step": 10764 }, { "epoch": 0.019089007862735034, "grad_norm": 1.2421875, "learning_rate": 0.001953490502784662, "loss": 0.2235, "step": 10766 }, { "epoch": 0.01909255402804485, "grad_norm": 0.490234375, "learning_rate": 0.0019534716152185584, "loss": 0.3044, "step": 10768 }, { "epoch": 0.019096100193354663, "grad_norm": 0.86328125, "learning_rate": 0.0019534527239198655, "loss": 0.3898, "step": 10770 }, { "epoch": 0.019099646358664478, "grad_norm": 0.59375, "learning_rate": 0.0019534338288886666, "loss": 0.2063, "step": 10772 }, { "epoch": 0.019103192523974293, "grad_norm": 1.5859375, "learning_rate": 0.0019534149301250435, "loss": 0.2357, "step": 10774 }, { "epoch": 0.01910673868928411, "grad_norm": 0.45703125, "learning_rate": 0.0019533960276290796, "loss": 0.2266, "step": 10776 }, { "epoch": 0.019110284854593925, "grad_norm": 0.40234375, "learning_rate": 0.0019533771214008564, "loss": 0.1959, "step": 10778 }, { "epoch": 0.01911383101990374, "grad_norm": 0.376953125, "learning_rate": 0.001953358211440458, "loss": 0.2363, "step": 10780 }, { "epoch": 0.019117377185213554, "grad_norm": 0.3203125, "learning_rate": 0.0019533392977479666, "loss": 0.191, "step": 10782 }, { "epoch": 0.01912092335052337, "grad_norm": 0.5546875, "learning_rate": 0.0019533203803234643, "loss": 0.2211, "step": 10784 }, { "epoch": 0.019124469515833183, "grad_norm": 0.6640625, "learning_rate": 0.0019533014591670344, "loss": 0.2336, "step": 10786 }, { "epoch": 0.019128015681143, "grad_norm": 1.125, "learning_rate": 0.0019532825342787603, "loss": 0.2508, "step": 10788 }, { "epoch": 0.019131561846452816, "grad_norm": 0.65625, "learning_rate": 0.001953263605658723, "loss": 0.2136, "step": 10790 }, { "epoch": 0.01913510801176263, "grad_norm": 4.5, "learning_rate": 0.001953244673307007, "loss": 0.301, "step": 10792 }, { "epoch": 0.019138654177072445, "grad_norm": 0.39453125, "learning_rate": 0.0019532257372236943, "loss": 0.2218, "step": 10794 }, { "epoch": 0.01914220034238226, "grad_norm": 0.462890625, "learning_rate": 0.001953206797408868, "loss": 0.2472, "step": 10796 }, { "epoch": 0.019145746507692078, "grad_norm": 0.66796875, "learning_rate": 0.0019531878538626103, "loss": 0.2439, "step": 10798 }, { "epoch": 0.019149292673001892, "grad_norm": 1.828125, "learning_rate": 0.001953168906585005, "loss": 0.2473, "step": 10800 }, { "epoch": 0.019152838838311707, "grad_norm": 0.357421875, "learning_rate": 0.001953149955576134, "loss": 0.1757, "step": 10802 }, { "epoch": 0.01915638500362152, "grad_norm": 0.6015625, "learning_rate": 0.0019531310008360806, "loss": 0.2036, "step": 10804 }, { "epoch": 0.019159931168931336, "grad_norm": 2.375, "learning_rate": 0.0019531120423649283, "loss": 0.2246, "step": 10806 }, { "epoch": 0.01916347733424115, "grad_norm": 0.71484375, "learning_rate": 0.0019530930801627594, "loss": 0.1578, "step": 10808 }, { "epoch": 0.01916702349955097, "grad_norm": 0.44921875, "learning_rate": 0.0019530741142296565, "loss": 0.2797, "step": 10810 }, { "epoch": 0.019170569664860783, "grad_norm": 0.55078125, "learning_rate": 0.0019530551445657031, "loss": 0.2009, "step": 10812 }, { "epoch": 0.019174115830170597, "grad_norm": 0.6640625, "learning_rate": 0.0019530361711709823, "loss": 0.3717, "step": 10814 }, { "epoch": 0.019177661995480412, "grad_norm": 1.90625, "learning_rate": 0.0019530171940455763, "loss": 0.3369, "step": 10816 }, { "epoch": 0.019181208160790227, "grad_norm": 0.671875, "learning_rate": 0.0019529982131895691, "loss": 0.2041, "step": 10818 }, { "epoch": 0.01918475432610004, "grad_norm": 0.318359375, "learning_rate": 0.001952979228603043, "loss": 0.2526, "step": 10820 }, { "epoch": 0.01918830049140986, "grad_norm": 0.453125, "learning_rate": 0.001952960240286081, "loss": 0.2245, "step": 10822 }, { "epoch": 0.019191846656719674, "grad_norm": 1.3359375, "learning_rate": 0.0019529412482387667, "loss": 0.2152, "step": 10824 }, { "epoch": 0.019195392822029488, "grad_norm": 0.64453125, "learning_rate": 0.0019529222524611825, "loss": 0.2138, "step": 10826 }, { "epoch": 0.019198938987339303, "grad_norm": 0.279296875, "learning_rate": 0.0019529032529534123, "loss": 0.2304, "step": 10828 }, { "epoch": 0.019202485152649117, "grad_norm": 0.98828125, "learning_rate": 0.0019528842497155382, "loss": 0.2946, "step": 10830 }, { "epoch": 0.019206031317958935, "grad_norm": 0.4375, "learning_rate": 0.0019528652427476438, "loss": 0.3555, "step": 10832 }, { "epoch": 0.01920957748326875, "grad_norm": 0.314453125, "learning_rate": 0.0019528462320498126, "loss": 0.4899, "step": 10834 }, { "epoch": 0.019213123648578564, "grad_norm": 1.8671875, "learning_rate": 0.0019528272176221272, "loss": 0.3046, "step": 10836 }, { "epoch": 0.01921666981388838, "grad_norm": 2.109375, "learning_rate": 0.001952808199464671, "loss": 0.2421, "step": 10838 }, { "epoch": 0.019220215979198194, "grad_norm": 0.49609375, "learning_rate": 0.0019527891775775268, "loss": 0.4896, "step": 10840 }, { "epoch": 0.019223762144508008, "grad_norm": 0.953125, "learning_rate": 0.0019527701519607783, "loss": 0.3056, "step": 10842 }, { "epoch": 0.019227308309817826, "grad_norm": 0.98046875, "learning_rate": 0.0019527511226145088, "loss": 0.2108, "step": 10844 }, { "epoch": 0.01923085447512764, "grad_norm": 0.578125, "learning_rate": 0.001952732089538801, "loss": 0.3534, "step": 10846 }, { "epoch": 0.019234400640437455, "grad_norm": 0.8046875, "learning_rate": 0.0019527130527337382, "loss": 0.2177, "step": 10848 }, { "epoch": 0.01923794680574727, "grad_norm": 0.51171875, "learning_rate": 0.0019526940121994036, "loss": 0.3468, "step": 10850 }, { "epoch": 0.019241492971057084, "grad_norm": 0.376953125, "learning_rate": 0.0019526749679358813, "loss": 0.3032, "step": 10852 }, { "epoch": 0.0192450391363669, "grad_norm": 0.255859375, "learning_rate": 0.0019526559199432537, "loss": 0.212, "step": 10854 }, { "epoch": 0.019248585301676717, "grad_norm": 0.78125, "learning_rate": 0.0019526368682216044, "loss": 0.2494, "step": 10856 }, { "epoch": 0.01925213146698653, "grad_norm": 0.34375, "learning_rate": 0.0019526178127710165, "loss": 0.1968, "step": 10858 }, { "epoch": 0.019255677632296346, "grad_norm": 0.443359375, "learning_rate": 0.001952598753591574, "loss": 0.2657, "step": 10860 }, { "epoch": 0.01925922379760616, "grad_norm": 0.59375, "learning_rate": 0.0019525796906833598, "loss": 0.2805, "step": 10862 }, { "epoch": 0.019262769962915975, "grad_norm": 0.85546875, "learning_rate": 0.0019525606240464565, "loss": 0.2177, "step": 10864 }, { "epoch": 0.01926631612822579, "grad_norm": 0.26953125, "learning_rate": 0.0019525415536809493, "loss": 0.2162, "step": 10866 }, { "epoch": 0.019269862293535608, "grad_norm": 0.25, "learning_rate": 0.0019525224795869196, "loss": 0.3353, "step": 10868 }, { "epoch": 0.019273408458845422, "grad_norm": 0.400390625, "learning_rate": 0.0019525034017644522, "loss": 0.2781, "step": 10870 }, { "epoch": 0.019276954624155237, "grad_norm": 0.87890625, "learning_rate": 0.0019524843202136303, "loss": 0.2958, "step": 10872 }, { "epoch": 0.01928050078946505, "grad_norm": 0.31640625, "learning_rate": 0.0019524652349345371, "loss": 0.2147, "step": 10874 }, { "epoch": 0.019284046954774866, "grad_norm": 0.412109375, "learning_rate": 0.0019524461459272562, "loss": 0.2359, "step": 10876 }, { "epoch": 0.019287593120084684, "grad_norm": 0.81640625, "learning_rate": 0.0019524270531918707, "loss": 0.2246, "step": 10878 }, { "epoch": 0.0192911392853945, "grad_norm": 0.37109375, "learning_rate": 0.0019524079567284644, "loss": 0.2572, "step": 10880 }, { "epoch": 0.019294685450704313, "grad_norm": 0.462890625, "learning_rate": 0.001952388856537121, "loss": 0.211, "step": 10882 }, { "epoch": 0.019298231616014128, "grad_norm": 0.671875, "learning_rate": 0.001952369752617924, "loss": 0.2447, "step": 10884 }, { "epoch": 0.019301777781323942, "grad_norm": 0.54296875, "learning_rate": 0.0019523506449709567, "loss": 0.2415, "step": 10886 }, { "epoch": 0.019305323946633757, "grad_norm": 0.349609375, "learning_rate": 0.0019523315335963026, "loss": 0.2132, "step": 10888 }, { "epoch": 0.019308870111943575, "grad_norm": 0.7890625, "learning_rate": 0.0019523124184940457, "loss": 0.2173, "step": 10890 }, { "epoch": 0.01931241627725339, "grad_norm": 0.58203125, "learning_rate": 0.0019522932996642694, "loss": 0.2229, "step": 10892 }, { "epoch": 0.019315962442563204, "grad_norm": 0.83203125, "learning_rate": 0.0019522741771070573, "loss": 0.2663, "step": 10894 }, { "epoch": 0.01931950860787302, "grad_norm": 0.375, "learning_rate": 0.0019522550508224929, "loss": 0.2241, "step": 10896 }, { "epoch": 0.019323054773182833, "grad_norm": 0.298828125, "learning_rate": 0.0019522359208106598, "loss": 0.2131, "step": 10898 }, { "epoch": 0.019326600938492648, "grad_norm": 0.46484375, "learning_rate": 0.0019522167870716421, "loss": 0.2677, "step": 10900 }, { "epoch": 0.019330147103802466, "grad_norm": 2.15625, "learning_rate": 0.001952197649605523, "loss": 0.3025, "step": 10902 }, { "epoch": 0.01933369326911228, "grad_norm": 0.515625, "learning_rate": 0.0019521785084123864, "loss": 0.2222, "step": 10904 }, { "epoch": 0.019337239434422095, "grad_norm": 0.453125, "learning_rate": 0.0019521593634923163, "loss": 0.2296, "step": 10906 }, { "epoch": 0.01934078559973191, "grad_norm": 0.5390625, "learning_rate": 0.0019521402148453958, "loss": 0.2072, "step": 10908 }, { "epoch": 0.019344331765041724, "grad_norm": 0.41796875, "learning_rate": 0.0019521210624717093, "loss": 0.2531, "step": 10910 }, { "epoch": 0.019347877930351542, "grad_norm": 0.353515625, "learning_rate": 0.0019521019063713397, "loss": 0.2032, "step": 10912 }, { "epoch": 0.019351424095661356, "grad_norm": 0.314453125, "learning_rate": 0.0019520827465443718, "loss": 0.2349, "step": 10914 }, { "epoch": 0.01935497026097117, "grad_norm": 1.375, "learning_rate": 0.0019520635829908886, "loss": 0.4852, "step": 10916 }, { "epoch": 0.019358516426280985, "grad_norm": 0.302734375, "learning_rate": 0.0019520444157109745, "loss": 0.1609, "step": 10918 }, { "epoch": 0.0193620625915908, "grad_norm": 0.99609375, "learning_rate": 0.001952025244704713, "loss": 0.2318, "step": 10920 }, { "epoch": 0.019365608756900615, "grad_norm": 1.7421875, "learning_rate": 0.0019520060699721878, "loss": 0.3149, "step": 10922 }, { "epoch": 0.019369154922210433, "grad_norm": 0.625, "learning_rate": 0.001951986891513483, "loss": 0.222, "step": 10924 }, { "epoch": 0.019372701087520247, "grad_norm": 0.53515625, "learning_rate": 0.0019519677093286826, "loss": 0.2297, "step": 10926 }, { "epoch": 0.01937624725283006, "grad_norm": 0.84375, "learning_rate": 0.0019519485234178705, "loss": 0.2908, "step": 10928 }, { "epoch": 0.019379793418139876, "grad_norm": 0.9453125, "learning_rate": 0.00195192933378113, "loss": 0.2186, "step": 10930 }, { "epoch": 0.01938333958344969, "grad_norm": 0.53125, "learning_rate": 0.0019519101404185456, "loss": 0.3206, "step": 10932 }, { "epoch": 0.019386885748759505, "grad_norm": 0.8671875, "learning_rate": 0.0019518909433302012, "loss": 0.2265, "step": 10934 }, { "epoch": 0.019390431914069323, "grad_norm": 0.78125, "learning_rate": 0.0019518717425161807, "loss": 0.1825, "step": 10936 }, { "epoch": 0.019393978079379138, "grad_norm": 0.40625, "learning_rate": 0.0019518525379765676, "loss": 0.193, "step": 10938 }, { "epoch": 0.019397524244688952, "grad_norm": 0.408203125, "learning_rate": 0.0019518333297114468, "loss": 0.2487, "step": 10940 }, { "epoch": 0.019401070409998767, "grad_norm": 0.3984375, "learning_rate": 0.0019518141177209015, "loss": 0.3763, "step": 10942 }, { "epoch": 0.01940461657530858, "grad_norm": 2.046875, "learning_rate": 0.0019517949020050162, "loss": 0.2061, "step": 10944 }, { "epoch": 0.0194081627406184, "grad_norm": 0.51171875, "learning_rate": 0.0019517756825638748, "loss": 0.2514, "step": 10946 }, { "epoch": 0.019411708905928214, "grad_norm": 0.96484375, "learning_rate": 0.0019517564593975615, "loss": 0.177, "step": 10948 }, { "epoch": 0.01941525507123803, "grad_norm": 0.7109375, "learning_rate": 0.0019517372325061598, "loss": 0.2142, "step": 10950 }, { "epoch": 0.019418801236547843, "grad_norm": 0.29296875, "learning_rate": 0.0019517180018897543, "loss": 0.1947, "step": 10952 }, { "epoch": 0.019422347401857658, "grad_norm": 0.3125, "learning_rate": 0.001951698767548429, "loss": 0.2146, "step": 10954 }, { "epoch": 0.019425893567167472, "grad_norm": 0.7890625, "learning_rate": 0.0019516795294822681, "loss": 0.2465, "step": 10956 }, { "epoch": 0.01942943973247729, "grad_norm": 0.58984375, "learning_rate": 0.0019516602876913558, "loss": 0.286, "step": 10958 }, { "epoch": 0.019432985897787105, "grad_norm": 0.5546875, "learning_rate": 0.0019516410421757757, "loss": 0.2414, "step": 10960 }, { "epoch": 0.01943653206309692, "grad_norm": 0.7890625, "learning_rate": 0.0019516217929356127, "loss": 0.2892, "step": 10962 }, { "epoch": 0.019440078228406734, "grad_norm": 1.5625, "learning_rate": 0.0019516025399709507, "loss": 0.3304, "step": 10964 }, { "epoch": 0.01944362439371655, "grad_norm": 0.7578125, "learning_rate": 0.0019515832832818739, "loss": 0.2137, "step": 10966 }, { "epoch": 0.019447170559026363, "grad_norm": 0.76171875, "learning_rate": 0.001951564022868466, "loss": 0.1856, "step": 10968 }, { "epoch": 0.01945071672433618, "grad_norm": 0.349609375, "learning_rate": 0.0019515447587308123, "loss": 0.2202, "step": 10970 }, { "epoch": 0.019454262889645996, "grad_norm": 0.671875, "learning_rate": 0.001951525490868996, "loss": 0.225, "step": 10972 }, { "epoch": 0.01945780905495581, "grad_norm": 0.43359375, "learning_rate": 0.001951506219283102, "loss": 0.263, "step": 10974 }, { "epoch": 0.019461355220265625, "grad_norm": 1.2109375, "learning_rate": 0.0019514869439732146, "loss": 0.2425, "step": 10976 }, { "epoch": 0.01946490138557544, "grad_norm": 1.5625, "learning_rate": 0.001951467664939418, "loss": 0.2397, "step": 10978 }, { "epoch": 0.019468447550885257, "grad_norm": 1.71875, "learning_rate": 0.001951448382181796, "loss": 0.527, "step": 10980 }, { "epoch": 0.019471993716195072, "grad_norm": 0.357421875, "learning_rate": 0.0019514290957004334, "loss": 0.24, "step": 10982 }, { "epoch": 0.019475539881504886, "grad_norm": 0.69140625, "learning_rate": 0.0019514098054954146, "loss": 0.2465, "step": 10984 }, { "epoch": 0.0194790860468147, "grad_norm": 0.53515625, "learning_rate": 0.0019513905115668237, "loss": 0.2122, "step": 10986 }, { "epoch": 0.019482632212124516, "grad_norm": 1.4296875, "learning_rate": 0.0019513712139147456, "loss": 0.4023, "step": 10988 }, { "epoch": 0.01948617837743433, "grad_norm": 0.28125, "learning_rate": 0.0019513519125392645, "loss": 0.2558, "step": 10990 }, { "epoch": 0.019489724542744148, "grad_norm": 1.78125, "learning_rate": 0.001951332607440464, "loss": 0.3743, "step": 10992 }, { "epoch": 0.019493270708053963, "grad_norm": 0.330078125, "learning_rate": 0.0019513132986184296, "loss": 0.26, "step": 10994 }, { "epoch": 0.019496816873363777, "grad_norm": 0.88671875, "learning_rate": 0.0019512939860732452, "loss": 0.2315, "step": 10996 }, { "epoch": 0.019500363038673592, "grad_norm": 0.21484375, "learning_rate": 0.0019512746698049958, "loss": 0.2222, "step": 10998 }, { "epoch": 0.019503909203983406, "grad_norm": 0.625, "learning_rate": 0.001951255349813765, "loss": 0.2595, "step": 11000 }, { "epoch": 0.01950745536929322, "grad_norm": 0.349609375, "learning_rate": 0.001951236026099638, "loss": 0.3253, "step": 11002 }, { "epoch": 0.01951100153460304, "grad_norm": 0.39453125, "learning_rate": 0.0019512166986626989, "loss": 0.2113, "step": 11004 }, { "epoch": 0.019514547699912854, "grad_norm": 0.32421875, "learning_rate": 0.0019511973675030326, "loss": 0.221, "step": 11006 }, { "epoch": 0.019518093865222668, "grad_norm": 0.86328125, "learning_rate": 0.0019511780326207234, "loss": 0.191, "step": 11008 }, { "epoch": 0.019521640030532483, "grad_norm": 0.36328125, "learning_rate": 0.0019511586940158556, "loss": 0.2271, "step": 11010 }, { "epoch": 0.019525186195842297, "grad_norm": 0.9140625, "learning_rate": 0.0019511393516885142, "loss": 0.2208, "step": 11012 }, { "epoch": 0.019528732361152115, "grad_norm": 0.98046875, "learning_rate": 0.001951120005638784, "loss": 0.2405, "step": 11014 }, { "epoch": 0.01953227852646193, "grad_norm": 0.58203125, "learning_rate": 0.0019511006558667492, "loss": 0.2341, "step": 11016 }, { "epoch": 0.019535824691771744, "grad_norm": 1.6796875, "learning_rate": 0.0019510813023724944, "loss": 0.5106, "step": 11018 }, { "epoch": 0.01953937085708156, "grad_norm": 0.890625, "learning_rate": 0.0019510619451561042, "loss": 0.2823, "step": 11020 }, { "epoch": 0.019542917022391373, "grad_norm": 0.54296875, "learning_rate": 0.0019510425842176639, "loss": 0.2425, "step": 11022 }, { "epoch": 0.019546463187701188, "grad_norm": 0.36328125, "learning_rate": 0.001951023219557257, "loss": 0.2201, "step": 11024 }, { "epoch": 0.019550009353011006, "grad_norm": 0.734375, "learning_rate": 0.0019510038511749692, "loss": 0.281, "step": 11026 }, { "epoch": 0.01955355551832082, "grad_norm": 0.484375, "learning_rate": 0.0019509844790708848, "loss": 0.2588, "step": 11028 }, { "epoch": 0.019557101683630635, "grad_norm": 0.43359375, "learning_rate": 0.0019509651032450887, "loss": 0.2392, "step": 11030 }, { "epoch": 0.01956064784894045, "grad_norm": 0.73828125, "learning_rate": 0.0019509457236976657, "loss": 0.3737, "step": 11032 }, { "epoch": 0.019564194014250264, "grad_norm": 0.375, "learning_rate": 0.0019509263404287004, "loss": 0.3482, "step": 11034 }, { "epoch": 0.01956774017956008, "grad_norm": 0.82421875, "learning_rate": 0.0019509069534382772, "loss": 0.2544, "step": 11036 }, { "epoch": 0.019571286344869897, "grad_norm": 0.40234375, "learning_rate": 0.0019508875627264814, "loss": 0.2289, "step": 11038 }, { "epoch": 0.01957483251017971, "grad_norm": 0.302734375, "learning_rate": 0.0019508681682933978, "loss": 0.2791, "step": 11040 }, { "epoch": 0.019578378675489526, "grad_norm": 0.423828125, "learning_rate": 0.001950848770139111, "loss": 0.2202, "step": 11042 }, { "epoch": 0.01958192484079934, "grad_norm": 0.41015625, "learning_rate": 0.0019508293682637056, "loss": 0.3792, "step": 11044 }, { "epoch": 0.019585471006109155, "grad_norm": 0.3984375, "learning_rate": 0.0019508099626672673, "loss": 0.1706, "step": 11046 }, { "epoch": 0.019589017171418973, "grad_norm": 0.953125, "learning_rate": 0.0019507905533498802, "loss": 0.229, "step": 11048 }, { "epoch": 0.019592563336728788, "grad_norm": 0.3359375, "learning_rate": 0.0019507711403116293, "loss": 0.2952, "step": 11050 }, { "epoch": 0.019596109502038602, "grad_norm": 0.259765625, "learning_rate": 0.0019507517235525997, "loss": 0.4037, "step": 11052 }, { "epoch": 0.019599655667348417, "grad_norm": 0.34765625, "learning_rate": 0.0019507323030728762, "loss": 0.2403, "step": 11054 }, { "epoch": 0.01960320183265823, "grad_norm": 0.310546875, "learning_rate": 0.0019507128788725438, "loss": 0.3066, "step": 11056 }, { "epoch": 0.019606747997968046, "grad_norm": 0.4921875, "learning_rate": 0.0019506934509516875, "loss": 0.2749, "step": 11058 }, { "epoch": 0.019610294163277864, "grad_norm": 12.125, "learning_rate": 0.001950674019310392, "loss": 0.2871, "step": 11060 }, { "epoch": 0.01961384032858768, "grad_norm": 10.8125, "learning_rate": 0.0019506545839487427, "loss": 0.3001, "step": 11062 }, { "epoch": 0.019617386493897493, "grad_norm": 0.95703125, "learning_rate": 0.001950635144866824, "loss": 0.2572, "step": 11064 }, { "epoch": 0.019620932659207307, "grad_norm": 0.47265625, "learning_rate": 0.0019506157020647216, "loss": 0.2778, "step": 11066 }, { "epoch": 0.019624478824517122, "grad_norm": 0.373046875, "learning_rate": 0.0019505962555425205, "loss": 0.2602, "step": 11068 }, { "epoch": 0.019628024989826937, "grad_norm": 0.66015625, "learning_rate": 0.0019505768053003048, "loss": 0.4013, "step": 11070 }, { "epoch": 0.019631571155136755, "grad_norm": 0.328125, "learning_rate": 0.001950557351338161, "loss": 0.2023, "step": 11072 }, { "epoch": 0.01963511732044657, "grad_norm": 1.3671875, "learning_rate": 0.001950537893656173, "loss": 0.2618, "step": 11074 }, { "epoch": 0.019638663485756384, "grad_norm": 0.3046875, "learning_rate": 0.0019505184322544259, "loss": 0.2211, "step": 11076 }, { "epoch": 0.019642209651066198, "grad_norm": 0.94921875, "learning_rate": 0.0019504989671330056, "loss": 0.2719, "step": 11078 }, { "epoch": 0.019645755816376013, "grad_norm": 0.6171875, "learning_rate": 0.001950479498291997, "loss": 0.3977, "step": 11080 }, { "epoch": 0.01964930198168583, "grad_norm": 0.64453125, "learning_rate": 0.0019504600257314849, "loss": 0.2069, "step": 11082 }, { "epoch": 0.019652848146995645, "grad_norm": 0.396484375, "learning_rate": 0.0019504405494515545, "loss": 0.2656, "step": 11084 }, { "epoch": 0.01965639431230546, "grad_norm": 1.6875, "learning_rate": 0.0019504210694522913, "loss": 0.3066, "step": 11086 }, { "epoch": 0.019659940477615274, "grad_norm": 7.4375, "learning_rate": 0.0019504015857337803, "loss": 0.2919, "step": 11088 }, { "epoch": 0.01966348664292509, "grad_norm": 0.287109375, "learning_rate": 0.0019503820982961068, "loss": 0.2172, "step": 11090 }, { "epoch": 0.019667032808234904, "grad_norm": 2.203125, "learning_rate": 0.001950362607139356, "loss": 0.6718, "step": 11092 }, { "epoch": 0.01967057897354472, "grad_norm": 0.9765625, "learning_rate": 0.0019503431122636131, "loss": 0.2758, "step": 11094 }, { "epoch": 0.019674125138854536, "grad_norm": 0.671875, "learning_rate": 0.0019503236136689632, "loss": 0.2416, "step": 11096 }, { "epoch": 0.01967767130416435, "grad_norm": 0.56640625, "learning_rate": 0.0019503041113554918, "loss": 0.2775, "step": 11098 }, { "epoch": 0.019681217469474165, "grad_norm": 0.39453125, "learning_rate": 0.0019502846053232844, "loss": 0.2401, "step": 11100 }, { "epoch": 0.01968476363478398, "grad_norm": 0.490234375, "learning_rate": 0.0019502650955724255, "loss": 0.2443, "step": 11102 }, { "epoch": 0.019688309800093794, "grad_norm": 2.03125, "learning_rate": 0.0019502455821030014, "loss": 0.24, "step": 11104 }, { "epoch": 0.019691855965403612, "grad_norm": 0.4453125, "learning_rate": 0.001950226064915097, "loss": 0.2128, "step": 11106 }, { "epoch": 0.019695402130713427, "grad_norm": 4.28125, "learning_rate": 0.0019502065440087975, "loss": 0.4816, "step": 11108 }, { "epoch": 0.01969894829602324, "grad_norm": 0.54296875, "learning_rate": 0.0019501870193841884, "loss": 0.1832, "step": 11110 }, { "epoch": 0.019702494461333056, "grad_norm": 1.734375, "learning_rate": 0.0019501674910413554, "loss": 0.2731, "step": 11112 }, { "epoch": 0.01970604062664287, "grad_norm": 0.515625, "learning_rate": 0.0019501479589803831, "loss": 0.2385, "step": 11114 }, { "epoch": 0.01970958679195269, "grad_norm": 0.390625, "learning_rate": 0.001950128423201358, "loss": 0.2106, "step": 11116 }, { "epoch": 0.019713132957262503, "grad_norm": 1.453125, "learning_rate": 0.0019501088837043648, "loss": 0.3689, "step": 11118 }, { "epoch": 0.019716679122572318, "grad_norm": 0.4921875, "learning_rate": 0.0019500893404894892, "loss": 0.2387, "step": 11120 }, { "epoch": 0.019720225287882132, "grad_norm": 0.96484375, "learning_rate": 0.0019500697935568166, "loss": 0.277, "step": 11122 }, { "epoch": 0.019723771453191947, "grad_norm": 1.3359375, "learning_rate": 0.0019500502429064324, "loss": 0.34, "step": 11124 }, { "epoch": 0.01972731761850176, "grad_norm": 0.53125, "learning_rate": 0.001950030688538422, "loss": 0.2054, "step": 11126 }, { "epoch": 0.01973086378381158, "grad_norm": 1.0546875, "learning_rate": 0.0019500111304528716, "loss": 0.2229, "step": 11128 }, { "epoch": 0.019734409949121394, "grad_norm": 3.0625, "learning_rate": 0.001949991568649866, "loss": 0.2168, "step": 11130 }, { "epoch": 0.01973795611443121, "grad_norm": 0.890625, "learning_rate": 0.001949972003129491, "loss": 0.3146, "step": 11132 }, { "epoch": 0.019741502279741023, "grad_norm": 1.375, "learning_rate": 0.0019499524338918322, "loss": 0.308, "step": 11134 }, { "epoch": 0.019745048445050838, "grad_norm": 0.62109375, "learning_rate": 0.001949932860936975, "loss": 0.2132, "step": 11136 }, { "epoch": 0.019748594610360652, "grad_norm": 1.2265625, "learning_rate": 0.0019499132842650056, "loss": 0.2605, "step": 11138 }, { "epoch": 0.01975214077567047, "grad_norm": 0.3828125, "learning_rate": 0.001949893703876009, "loss": 0.2386, "step": 11140 }, { "epoch": 0.019755686940980285, "grad_norm": 0.330078125, "learning_rate": 0.001949874119770071, "loss": 0.2523, "step": 11142 }, { "epoch": 0.0197592331062901, "grad_norm": 0.30859375, "learning_rate": 0.0019498545319472772, "loss": 0.2623, "step": 11144 }, { "epoch": 0.019762779271599914, "grad_norm": 0.48046875, "learning_rate": 0.0019498349404077132, "loss": 0.2426, "step": 11146 }, { "epoch": 0.01976632543690973, "grad_norm": 0.396484375, "learning_rate": 0.001949815345151465, "loss": 0.283, "step": 11148 }, { "epoch": 0.019769871602219546, "grad_norm": 0.447265625, "learning_rate": 0.0019497957461786183, "loss": 0.2172, "step": 11150 }, { "epoch": 0.01977341776752936, "grad_norm": 0.408203125, "learning_rate": 0.001949776143489258, "loss": 0.2157, "step": 11152 }, { "epoch": 0.019776963932839176, "grad_norm": 0.29296875, "learning_rate": 0.0019497565370834712, "loss": 0.3706, "step": 11154 }, { "epoch": 0.01978051009814899, "grad_norm": 0.375, "learning_rate": 0.0019497369269613424, "loss": 0.2533, "step": 11156 }, { "epoch": 0.019784056263458805, "grad_norm": 0.984375, "learning_rate": 0.001949717313122958, "loss": 0.4503, "step": 11158 }, { "epoch": 0.01978760242876862, "grad_norm": 0.2314453125, "learning_rate": 0.0019496976955684037, "loss": 0.2002, "step": 11160 }, { "epoch": 0.019791148594078437, "grad_norm": 0.7109375, "learning_rate": 0.0019496780742977653, "loss": 0.4008, "step": 11162 }, { "epoch": 0.019794694759388252, "grad_norm": 0.81640625, "learning_rate": 0.0019496584493111282, "loss": 0.2809, "step": 11164 }, { "epoch": 0.019798240924698066, "grad_norm": 1.828125, "learning_rate": 0.001949638820608579, "loss": 0.285, "step": 11166 }, { "epoch": 0.01980178709000788, "grad_norm": 1.5390625, "learning_rate": 0.0019496191881902033, "loss": 0.3015, "step": 11168 }, { "epoch": 0.019805333255317695, "grad_norm": 0.39453125, "learning_rate": 0.0019495995520560864, "loss": 0.2502, "step": 11170 }, { "epoch": 0.01980887942062751, "grad_norm": 0.359375, "learning_rate": 0.0019495799122063143, "loss": 0.2487, "step": 11172 }, { "epoch": 0.019812425585937328, "grad_norm": 1.0078125, "learning_rate": 0.0019495602686409738, "loss": 0.326, "step": 11174 }, { "epoch": 0.019815971751247143, "grad_norm": 0.2412109375, "learning_rate": 0.0019495406213601496, "loss": 0.1826, "step": 11176 }, { "epoch": 0.019819517916556957, "grad_norm": 2.0, "learning_rate": 0.0019495209703639287, "loss": 0.2762, "step": 11178 }, { "epoch": 0.01982306408186677, "grad_norm": 1.0546875, "learning_rate": 0.0019495013156523961, "loss": 0.3166, "step": 11180 }, { "epoch": 0.019826610247176586, "grad_norm": 0.267578125, "learning_rate": 0.0019494816572256384, "loss": 0.1833, "step": 11182 }, { "epoch": 0.019830156412486404, "grad_norm": 0.57421875, "learning_rate": 0.0019494619950837413, "loss": 0.43, "step": 11184 }, { "epoch": 0.01983370257779622, "grad_norm": 1.7109375, "learning_rate": 0.001949442329226791, "loss": 0.2684, "step": 11186 }, { "epoch": 0.019837248743106033, "grad_norm": 0.7109375, "learning_rate": 0.0019494226596548734, "loss": 0.1878, "step": 11188 }, { "epoch": 0.019840794908415848, "grad_norm": 0.3359375, "learning_rate": 0.0019494029863680743, "loss": 0.2336, "step": 11190 }, { "epoch": 0.019844341073725662, "grad_norm": 2.328125, "learning_rate": 0.00194938330936648, "loss": 0.3088, "step": 11192 }, { "epoch": 0.019847887239035477, "grad_norm": 10.6875, "learning_rate": 0.0019493636286501766, "loss": 0.2701, "step": 11194 }, { "epoch": 0.019851433404345295, "grad_norm": 5.875, "learning_rate": 0.00194934394421925, "loss": 0.207, "step": 11196 }, { "epoch": 0.01985497956965511, "grad_norm": 1.0859375, "learning_rate": 0.001949324256073786, "loss": 0.2564, "step": 11198 }, { "epoch": 0.019858525734964924, "grad_norm": 0.5546875, "learning_rate": 0.0019493045642138714, "loss": 0.2302, "step": 11200 }, { "epoch": 0.01986207190027474, "grad_norm": 1.734375, "learning_rate": 0.001949284868639592, "loss": 0.2123, "step": 11202 }, { "epoch": 0.019865618065584553, "grad_norm": 0.396484375, "learning_rate": 0.0019492651693510338, "loss": 0.2276, "step": 11204 }, { "epoch": 0.019869164230894368, "grad_norm": 0.6171875, "learning_rate": 0.0019492454663482832, "loss": 0.2275, "step": 11206 }, { "epoch": 0.019872710396204186, "grad_norm": 0.33984375, "learning_rate": 0.001949225759631426, "loss": 0.2468, "step": 11208 }, { "epoch": 0.019876256561514, "grad_norm": 0.79296875, "learning_rate": 0.0019492060492005488, "loss": 0.2124, "step": 11210 }, { "epoch": 0.019879802726823815, "grad_norm": 0.63671875, "learning_rate": 0.0019491863350557378, "loss": 0.2968, "step": 11212 }, { "epoch": 0.01988334889213363, "grad_norm": 0.3125, "learning_rate": 0.0019491666171970786, "loss": 0.2171, "step": 11214 }, { "epoch": 0.019886895057443444, "grad_norm": 0.451171875, "learning_rate": 0.001949146895624658, "loss": 0.1843, "step": 11216 }, { "epoch": 0.019890441222753262, "grad_norm": 0.2890625, "learning_rate": 0.0019491271703385622, "loss": 0.2565, "step": 11218 }, { "epoch": 0.019893987388063077, "grad_norm": 0.515625, "learning_rate": 0.0019491074413388774, "loss": 0.2432, "step": 11220 }, { "epoch": 0.01989753355337289, "grad_norm": 0.89453125, "learning_rate": 0.0019490877086256898, "loss": 0.2286, "step": 11222 }, { "epoch": 0.019901079718682706, "grad_norm": 0.6640625, "learning_rate": 0.0019490679721990858, "loss": 0.2235, "step": 11224 }, { "epoch": 0.01990462588399252, "grad_norm": 0.267578125, "learning_rate": 0.0019490482320591515, "loss": 0.1426, "step": 11226 }, { "epoch": 0.019908172049302335, "grad_norm": 0.482421875, "learning_rate": 0.001949028488205974, "loss": 0.2499, "step": 11228 }, { "epoch": 0.019911718214612153, "grad_norm": 0.47265625, "learning_rate": 0.0019490087406396387, "loss": 0.256, "step": 11230 }, { "epoch": 0.019915264379921967, "grad_norm": 0.8125, "learning_rate": 0.0019489889893602322, "loss": 0.2721, "step": 11232 }, { "epoch": 0.019918810545231782, "grad_norm": 0.54296875, "learning_rate": 0.001948969234367841, "loss": 0.2636, "step": 11234 }, { "epoch": 0.019922356710541596, "grad_norm": 0.345703125, "learning_rate": 0.0019489494756625516, "loss": 0.2813, "step": 11236 }, { "epoch": 0.01992590287585141, "grad_norm": 0.494140625, "learning_rate": 0.0019489297132444505, "loss": 0.3022, "step": 11238 }, { "epoch": 0.019929449041161226, "grad_norm": 0.2265625, "learning_rate": 0.0019489099471136238, "loss": 0.4236, "step": 11240 }, { "epoch": 0.019932995206471044, "grad_norm": 2.25, "learning_rate": 0.0019488901772701582, "loss": 0.265, "step": 11242 }, { "epoch": 0.019936541371780858, "grad_norm": 0.337890625, "learning_rate": 0.0019488704037141397, "loss": 0.2515, "step": 11244 }, { "epoch": 0.019940087537090673, "grad_norm": 0.21484375, "learning_rate": 0.0019488506264456556, "loss": 0.4747, "step": 11246 }, { "epoch": 0.019943633702400487, "grad_norm": 0.451171875, "learning_rate": 0.0019488308454647916, "loss": 0.3037, "step": 11248 }, { "epoch": 0.019947179867710302, "grad_norm": 1.0625, "learning_rate": 0.0019488110607716346, "loss": 0.2295, "step": 11250 }, { "epoch": 0.01995072603302012, "grad_norm": 1.7890625, "learning_rate": 0.0019487912723662715, "loss": 0.2905, "step": 11252 }, { "epoch": 0.019954272198329934, "grad_norm": 0.28515625, "learning_rate": 0.001948771480248788, "loss": 0.2631, "step": 11254 }, { "epoch": 0.01995781836363975, "grad_norm": 0.462890625, "learning_rate": 0.001948751684419271, "loss": 0.2534, "step": 11256 }, { "epoch": 0.019961364528949564, "grad_norm": 0.482421875, "learning_rate": 0.0019487318848778073, "loss": 0.1893, "step": 11258 }, { "epoch": 0.019964910694259378, "grad_norm": 4.65625, "learning_rate": 0.0019487120816244834, "loss": 0.4082, "step": 11260 }, { "epoch": 0.019968456859569193, "grad_norm": 0.455078125, "learning_rate": 0.0019486922746593856, "loss": 0.216, "step": 11262 }, { "epoch": 0.01997200302487901, "grad_norm": 0.4609375, "learning_rate": 0.001948672463982601, "loss": 0.2413, "step": 11264 }, { "epoch": 0.019975549190188825, "grad_norm": 0.328125, "learning_rate": 0.0019486526495942158, "loss": 0.3381, "step": 11266 }, { "epoch": 0.01997909535549864, "grad_norm": 0.73828125, "learning_rate": 0.001948632831494317, "loss": 0.2167, "step": 11268 }, { "epoch": 0.019982641520808454, "grad_norm": 0.228515625, "learning_rate": 0.001948613009682991, "loss": 0.221, "step": 11270 }, { "epoch": 0.01998618768611827, "grad_norm": 0.498046875, "learning_rate": 0.001948593184160325, "loss": 0.2228, "step": 11272 }, { "epoch": 0.019989733851428083, "grad_norm": 0.451171875, "learning_rate": 0.001948573354926405, "loss": 0.3056, "step": 11274 }, { "epoch": 0.0199932800167379, "grad_norm": 0.3671875, "learning_rate": 0.0019485535219813182, "loss": 0.2726, "step": 11276 }, { "epoch": 0.019996826182047716, "grad_norm": 0.1884765625, "learning_rate": 0.0019485336853251511, "loss": 0.2645, "step": 11278 }, { "epoch": 0.02000037234735753, "grad_norm": 0.3125, "learning_rate": 0.001948513844957991, "loss": 0.2255, "step": 11280 }, { "epoch": 0.020003918512667345, "grad_norm": 1.765625, "learning_rate": 0.0019484940008799236, "loss": 0.2621, "step": 11282 }, { "epoch": 0.02000746467797716, "grad_norm": 0.380859375, "learning_rate": 0.0019484741530910363, "loss": 0.1661, "step": 11284 }, { "epoch": 0.020011010843286978, "grad_norm": 0.291015625, "learning_rate": 0.0019484543015914162, "loss": 0.3313, "step": 11286 }, { "epoch": 0.020014557008596792, "grad_norm": 0.490234375, "learning_rate": 0.0019484344463811501, "loss": 0.1959, "step": 11288 }, { "epoch": 0.020018103173906607, "grad_norm": 0.859375, "learning_rate": 0.001948414587460324, "loss": 0.2805, "step": 11290 }, { "epoch": 0.02002164933921642, "grad_norm": 0.52734375, "learning_rate": 0.0019483947248290256, "loss": 0.2232, "step": 11292 }, { "epoch": 0.020025195504526236, "grad_norm": 1.921875, "learning_rate": 0.0019483748584873412, "loss": 0.2612, "step": 11294 }, { "epoch": 0.02002874166983605, "grad_norm": 0.8046875, "learning_rate": 0.001948354988435358, "loss": 0.5627, "step": 11296 }, { "epoch": 0.02003228783514587, "grad_norm": 0.25, "learning_rate": 0.0019483351146731634, "loss": 0.215, "step": 11298 }, { "epoch": 0.020035834000455683, "grad_norm": 0.546875, "learning_rate": 0.0019483152372008433, "loss": 0.2519, "step": 11300 }, { "epoch": 0.020039380165765498, "grad_norm": 0.2578125, "learning_rate": 0.0019482953560184854, "loss": 0.2748, "step": 11302 }, { "epoch": 0.020042926331075312, "grad_norm": 0.8515625, "learning_rate": 0.001948275471126176, "loss": 0.2328, "step": 11304 }, { "epoch": 0.020046472496385127, "grad_norm": 0.26171875, "learning_rate": 0.0019482555825240026, "loss": 0.2138, "step": 11306 }, { "epoch": 0.02005001866169494, "grad_norm": 0.2578125, "learning_rate": 0.0019482356902120522, "loss": 0.2024, "step": 11308 }, { "epoch": 0.02005356482700476, "grad_norm": 0.49609375, "learning_rate": 0.0019482157941904112, "loss": 0.235, "step": 11310 }, { "epoch": 0.020057110992314574, "grad_norm": 1.9453125, "learning_rate": 0.0019481958944591675, "loss": 0.4309, "step": 11312 }, { "epoch": 0.02006065715762439, "grad_norm": 0.50390625, "learning_rate": 0.0019481759910184072, "loss": 0.2425, "step": 11314 }, { "epoch": 0.020064203322934203, "grad_norm": 0.33984375, "learning_rate": 0.0019481560838682183, "loss": 0.2217, "step": 11316 }, { "epoch": 0.020067749488244017, "grad_norm": 0.357421875, "learning_rate": 0.001948136173008687, "loss": 0.2315, "step": 11318 }, { "epoch": 0.020071295653553835, "grad_norm": 0.46484375, "learning_rate": 0.0019481162584399005, "loss": 0.2198, "step": 11320 }, { "epoch": 0.02007484181886365, "grad_norm": 0.80078125, "learning_rate": 0.0019480963401619464, "loss": 0.3774, "step": 11322 }, { "epoch": 0.020078387984173465, "grad_norm": 1.15625, "learning_rate": 0.0019480764181749118, "loss": 0.1891, "step": 11324 }, { "epoch": 0.02008193414948328, "grad_norm": 0.345703125, "learning_rate": 0.001948056492478883, "loss": 0.1933, "step": 11326 }, { "epoch": 0.020085480314793094, "grad_norm": 1.0859375, "learning_rate": 0.0019480365630739484, "loss": 0.42, "step": 11328 }, { "epoch": 0.020089026480102908, "grad_norm": 0.828125, "learning_rate": 0.001948016629960194, "loss": 0.1988, "step": 11330 }, { "epoch": 0.020092572645412726, "grad_norm": 1.0234375, "learning_rate": 0.0019479966931377076, "loss": 0.408, "step": 11332 }, { "epoch": 0.02009611881072254, "grad_norm": 0.5703125, "learning_rate": 0.001947976752606576, "loss": 0.2296, "step": 11334 }, { "epoch": 0.020099664976032355, "grad_norm": 0.25, "learning_rate": 0.0019479568083668871, "loss": 0.2747, "step": 11336 }, { "epoch": 0.02010321114134217, "grad_norm": 0.31640625, "learning_rate": 0.0019479368604187273, "loss": 0.2475, "step": 11338 }, { "epoch": 0.020106757306651984, "grad_norm": 2.40625, "learning_rate": 0.0019479169087621843, "loss": 0.3176, "step": 11340 }, { "epoch": 0.0201103034719618, "grad_norm": 0.49609375, "learning_rate": 0.0019478969533973452, "loss": 0.2713, "step": 11342 }, { "epoch": 0.020113849637271617, "grad_norm": 0.326171875, "learning_rate": 0.0019478769943242975, "loss": 0.2049, "step": 11344 }, { "epoch": 0.02011739580258143, "grad_norm": 0.3125, "learning_rate": 0.0019478570315431282, "loss": 0.217, "step": 11346 }, { "epoch": 0.020120941967891246, "grad_norm": 2.140625, "learning_rate": 0.0019478370650539247, "loss": 0.2857, "step": 11348 }, { "epoch": 0.02012448813320106, "grad_norm": 2.265625, "learning_rate": 0.001947817094856775, "loss": 0.3395, "step": 11350 }, { "epoch": 0.020128034298510875, "grad_norm": 0.484375, "learning_rate": 0.001947797120951765, "loss": 0.3197, "step": 11352 }, { "epoch": 0.020131580463820693, "grad_norm": 0.91015625, "learning_rate": 0.001947777143338983, "loss": 0.1779, "step": 11354 }, { "epoch": 0.020135126629130508, "grad_norm": 0.83203125, "learning_rate": 0.0019477571620185165, "loss": 0.201, "step": 11356 }, { "epoch": 0.020138672794440322, "grad_norm": 0.35546875, "learning_rate": 0.0019477371769904522, "loss": 0.2357, "step": 11358 }, { "epoch": 0.020142218959750137, "grad_norm": 1.0625, "learning_rate": 0.0019477171882548781, "loss": 0.5784, "step": 11360 }, { "epoch": 0.02014576512505995, "grad_norm": 1.5859375, "learning_rate": 0.0019476971958118817, "loss": 0.2947, "step": 11362 }, { "epoch": 0.020149311290369766, "grad_norm": 0.61328125, "learning_rate": 0.00194767719966155, "loss": 0.2103, "step": 11364 }, { "epoch": 0.020152857455679584, "grad_norm": 0.9296875, "learning_rate": 0.0019476571998039707, "loss": 0.1959, "step": 11366 }, { "epoch": 0.0201564036209894, "grad_norm": 0.5078125, "learning_rate": 0.0019476371962392307, "loss": 0.1873, "step": 11368 }, { "epoch": 0.020159949786299213, "grad_norm": 0.34765625, "learning_rate": 0.0019476171889674185, "loss": 0.2362, "step": 11370 }, { "epoch": 0.020163495951609028, "grad_norm": 0.57421875, "learning_rate": 0.0019475971779886207, "loss": 0.2522, "step": 11372 }, { "epoch": 0.020167042116918842, "grad_norm": 0.57421875, "learning_rate": 0.0019475771633029255, "loss": 0.2236, "step": 11374 }, { "epoch": 0.020170588282228657, "grad_norm": 0.5703125, "learning_rate": 0.0019475571449104202, "loss": 0.2101, "step": 11376 }, { "epoch": 0.020174134447538475, "grad_norm": 0.7109375, "learning_rate": 0.001947537122811192, "loss": 0.2019, "step": 11378 }, { "epoch": 0.02017768061284829, "grad_norm": 0.328125, "learning_rate": 0.0019475170970053289, "loss": 0.2312, "step": 11380 }, { "epoch": 0.020181226778158104, "grad_norm": 1.6875, "learning_rate": 0.0019474970674929182, "loss": 0.2084, "step": 11382 }, { "epoch": 0.02018477294346792, "grad_norm": 0.62890625, "learning_rate": 0.0019474770342740478, "loss": 0.2404, "step": 11384 }, { "epoch": 0.020188319108777733, "grad_norm": 0.546875, "learning_rate": 0.0019474569973488049, "loss": 0.184, "step": 11386 }, { "epoch": 0.02019186527408755, "grad_norm": 0.54296875, "learning_rate": 0.001947436956717277, "loss": 0.4145, "step": 11388 }, { "epoch": 0.020195411439397366, "grad_norm": 0.451171875, "learning_rate": 0.001947416912379553, "loss": 0.2548, "step": 11390 }, { "epoch": 0.02019895760470718, "grad_norm": 0.78125, "learning_rate": 0.001947396864335719, "loss": 0.2814, "step": 11392 }, { "epoch": 0.020202503770016995, "grad_norm": 0.59765625, "learning_rate": 0.0019473768125858632, "loss": 0.3115, "step": 11394 }, { "epoch": 0.02020604993532681, "grad_norm": 0.83984375, "learning_rate": 0.0019473567571300738, "loss": 0.2221, "step": 11396 }, { "epoch": 0.020209596100636624, "grad_norm": 0.408203125, "learning_rate": 0.001947336697968438, "loss": 0.325, "step": 11398 }, { "epoch": 0.020213142265946442, "grad_norm": 0.55859375, "learning_rate": 0.0019473166351010442, "loss": 0.1654, "step": 11400 }, { "epoch": 0.020216688431256256, "grad_norm": 0.5546875, "learning_rate": 0.001947296568527979, "loss": 0.2179, "step": 11402 }, { "epoch": 0.02022023459656607, "grad_norm": 0.26171875, "learning_rate": 0.0019472764982493309, "loss": 0.2067, "step": 11404 }, { "epoch": 0.020223780761875886, "grad_norm": 0.63671875, "learning_rate": 0.0019472564242651877, "loss": 0.2715, "step": 11406 }, { "epoch": 0.0202273269271857, "grad_norm": 0.421875, "learning_rate": 0.001947236346575637, "loss": 0.177, "step": 11408 }, { "epoch": 0.020230873092495515, "grad_norm": 1.375, "learning_rate": 0.0019472162651807668, "loss": 0.298, "step": 11410 }, { "epoch": 0.020234419257805333, "grad_norm": 0.208984375, "learning_rate": 0.0019471961800806646, "loss": 0.2611, "step": 11412 }, { "epoch": 0.020237965423115147, "grad_norm": 0.65625, "learning_rate": 0.0019471760912754185, "loss": 0.1969, "step": 11414 }, { "epoch": 0.020241511588424962, "grad_norm": 0.671875, "learning_rate": 0.001947155998765116, "loss": 0.2581, "step": 11416 }, { "epoch": 0.020245057753734776, "grad_norm": 0.33203125, "learning_rate": 0.0019471359025498454, "loss": 0.218, "step": 11418 }, { "epoch": 0.02024860391904459, "grad_norm": 3.015625, "learning_rate": 0.0019471158026296946, "loss": 0.4829, "step": 11420 }, { "epoch": 0.02025215008435441, "grad_norm": 0.482421875, "learning_rate": 0.0019470956990047512, "loss": 0.1781, "step": 11422 }, { "epoch": 0.020255696249664223, "grad_norm": 2.5, "learning_rate": 0.0019470755916751034, "loss": 0.3491, "step": 11424 }, { "epoch": 0.020259242414974038, "grad_norm": 0.578125, "learning_rate": 0.001947055480640839, "loss": 0.7109, "step": 11426 }, { "epoch": 0.020262788580283853, "grad_norm": 0.2470703125, "learning_rate": 0.0019470353659020456, "loss": 0.2406, "step": 11428 }, { "epoch": 0.020266334745593667, "grad_norm": 0.8515625, "learning_rate": 0.0019470152474588118, "loss": 0.2212, "step": 11430 }, { "epoch": 0.02026988091090348, "grad_norm": 0.474609375, "learning_rate": 0.0019469951253112253, "loss": 0.2106, "step": 11432 }, { "epoch": 0.0202734270762133, "grad_norm": 4.71875, "learning_rate": 0.001946974999459374, "loss": 0.3944, "step": 11434 }, { "epoch": 0.020276973241523114, "grad_norm": 0.275390625, "learning_rate": 0.0019469548699033463, "loss": 0.1671, "step": 11436 }, { "epoch": 0.02028051940683293, "grad_norm": 0.390625, "learning_rate": 0.0019469347366432297, "loss": 0.2384, "step": 11438 }, { "epoch": 0.020284065572142743, "grad_norm": 0.3203125, "learning_rate": 0.0019469145996791127, "loss": 0.2598, "step": 11440 }, { "epoch": 0.020287611737452558, "grad_norm": 0.76171875, "learning_rate": 0.001946894459011083, "loss": 0.436, "step": 11442 }, { "epoch": 0.020291157902762372, "grad_norm": 0.31640625, "learning_rate": 0.001946874314639229, "loss": 0.2505, "step": 11444 }, { "epoch": 0.02029470406807219, "grad_norm": 0.353515625, "learning_rate": 0.0019468541665636388, "loss": 0.2663, "step": 11446 }, { "epoch": 0.020298250233382005, "grad_norm": 0.68359375, "learning_rate": 0.0019468340147844004, "loss": 0.1851, "step": 11448 }, { "epoch": 0.02030179639869182, "grad_norm": 0.61328125, "learning_rate": 0.0019468138593016016, "loss": 0.2056, "step": 11450 }, { "epoch": 0.020305342564001634, "grad_norm": 0.65625, "learning_rate": 0.001946793700115331, "loss": 0.2989, "step": 11452 }, { "epoch": 0.02030888872931145, "grad_norm": 0.74609375, "learning_rate": 0.0019467735372256764, "loss": 0.3187, "step": 11454 }, { "epoch": 0.020312434894621267, "grad_norm": 0.3515625, "learning_rate": 0.0019467533706327268, "loss": 0.2322, "step": 11456 }, { "epoch": 0.02031598105993108, "grad_norm": 20.875, "learning_rate": 0.0019467332003365694, "loss": 0.256, "step": 11458 }, { "epoch": 0.020319527225240896, "grad_norm": 0.65234375, "learning_rate": 0.001946713026337293, "loss": 0.2115, "step": 11460 }, { "epoch": 0.02032307339055071, "grad_norm": 0.44140625, "learning_rate": 0.0019466928486349855, "loss": 0.2224, "step": 11462 }, { "epoch": 0.020326619555860525, "grad_norm": 1.03125, "learning_rate": 0.0019466726672297354, "loss": 0.2574, "step": 11464 }, { "epoch": 0.02033016572117034, "grad_norm": 1.1484375, "learning_rate": 0.001946652482121631, "loss": 0.2771, "step": 11466 }, { "epoch": 0.020333711886480157, "grad_norm": 0.5546875, "learning_rate": 0.0019466322933107602, "loss": 0.2037, "step": 11468 }, { "epoch": 0.020337258051789972, "grad_norm": 0.388671875, "learning_rate": 0.0019466121007972112, "loss": 0.2151, "step": 11470 }, { "epoch": 0.020340804217099787, "grad_norm": 0.3359375, "learning_rate": 0.0019465919045810734, "loss": 0.1725, "step": 11472 }, { "epoch": 0.0203443503824096, "grad_norm": 0.32421875, "learning_rate": 0.001946571704662434, "loss": 0.374, "step": 11474 }, { "epoch": 0.020347896547719416, "grad_norm": 0.423828125, "learning_rate": 0.0019465515010413817, "loss": 0.157, "step": 11476 }, { "epoch": 0.02035144271302923, "grad_norm": 0.84765625, "learning_rate": 0.0019465312937180048, "loss": 0.4223, "step": 11478 }, { "epoch": 0.02035498887833905, "grad_norm": 0.2275390625, "learning_rate": 0.001946511082692392, "loss": 0.1701, "step": 11480 }, { "epoch": 0.020358535043648863, "grad_norm": 1.4296875, "learning_rate": 0.0019464908679646309, "loss": 0.2756, "step": 11482 }, { "epoch": 0.020362081208958677, "grad_norm": 0.85546875, "learning_rate": 0.001946470649534811, "loss": 0.274, "step": 11484 }, { "epoch": 0.020365627374268492, "grad_norm": 0.4921875, "learning_rate": 0.0019464504274030198, "loss": 0.2443, "step": 11486 }, { "epoch": 0.020369173539578306, "grad_norm": 0.57421875, "learning_rate": 0.0019464302015693464, "loss": 0.1874, "step": 11488 }, { "epoch": 0.020372719704888125, "grad_norm": 0.33203125, "learning_rate": 0.0019464099720338788, "loss": 0.1898, "step": 11490 }, { "epoch": 0.02037626587019794, "grad_norm": 0.279296875, "learning_rate": 0.0019463897387967059, "loss": 0.2622, "step": 11492 }, { "epoch": 0.020379812035507754, "grad_norm": 0.455078125, "learning_rate": 0.0019463695018579158, "loss": 0.1773, "step": 11494 }, { "epoch": 0.020383358200817568, "grad_norm": 0.265625, "learning_rate": 0.0019463492612175968, "loss": 0.2106, "step": 11496 }, { "epoch": 0.020386904366127383, "grad_norm": 1.4453125, "learning_rate": 0.001946329016875838, "loss": 0.2889, "step": 11498 }, { "epoch": 0.020390450531437197, "grad_norm": 0.44140625, "learning_rate": 0.0019463087688327276, "loss": 0.2176, "step": 11500 }, { "epoch": 0.020393996696747015, "grad_norm": 0.78515625, "learning_rate": 0.0019462885170883544, "loss": 0.182, "step": 11502 }, { "epoch": 0.02039754286205683, "grad_norm": 0.3046875, "learning_rate": 0.0019462682616428067, "loss": 0.19, "step": 11504 }, { "epoch": 0.020401089027366644, "grad_norm": 1.234375, "learning_rate": 0.0019462480024961732, "loss": 0.2702, "step": 11506 }, { "epoch": 0.02040463519267646, "grad_norm": 0.88671875, "learning_rate": 0.0019462277396485423, "loss": 0.2376, "step": 11508 }, { "epoch": 0.020408181357986274, "grad_norm": 1.21875, "learning_rate": 0.0019462074731000033, "loss": 0.2597, "step": 11510 }, { "epoch": 0.020411727523296088, "grad_norm": 0.5, "learning_rate": 0.0019461872028506439, "loss": 0.1977, "step": 11512 }, { "epoch": 0.020415273688605906, "grad_norm": 0.58203125, "learning_rate": 0.0019461669289005535, "loss": 0.1813, "step": 11514 }, { "epoch": 0.02041881985391572, "grad_norm": 0.43359375, "learning_rate": 0.0019461466512498203, "loss": 0.2265, "step": 11516 }, { "epoch": 0.020422366019225535, "grad_norm": 4.8125, "learning_rate": 0.0019461263698985333, "loss": 0.2465, "step": 11518 }, { "epoch": 0.02042591218453535, "grad_norm": 6.5, "learning_rate": 0.0019461060848467806, "loss": 0.2049, "step": 11520 }, { "epoch": 0.020429458349845164, "grad_norm": 0.87890625, "learning_rate": 0.001946085796094652, "loss": 0.2118, "step": 11522 }, { "epoch": 0.020433004515154982, "grad_norm": 3.5625, "learning_rate": 0.0019460655036422352, "loss": 0.2437, "step": 11524 }, { "epoch": 0.020436550680464797, "grad_norm": 0.423828125, "learning_rate": 0.0019460452074896194, "loss": 0.1848, "step": 11526 }, { "epoch": 0.02044009684577461, "grad_norm": 0.6328125, "learning_rate": 0.0019460249076368934, "loss": 0.2134, "step": 11528 }, { "epoch": 0.020443643011084426, "grad_norm": 1.28125, "learning_rate": 0.0019460046040841459, "loss": 0.3412, "step": 11530 }, { "epoch": 0.02044718917639424, "grad_norm": 1.0390625, "learning_rate": 0.0019459842968314654, "loss": 0.2589, "step": 11532 }, { "epoch": 0.020450735341704055, "grad_norm": 1.1171875, "learning_rate": 0.0019459639858789414, "loss": 0.3058, "step": 11534 }, { "epoch": 0.020454281507013873, "grad_norm": 1.328125, "learning_rate": 0.0019459436712266618, "loss": 0.2385, "step": 11536 }, { "epoch": 0.020457827672323688, "grad_norm": 1.0625, "learning_rate": 0.0019459233528747164, "loss": 0.322, "step": 11538 }, { "epoch": 0.020461373837633502, "grad_norm": 0.55078125, "learning_rate": 0.0019459030308231935, "loss": 0.4301, "step": 11540 }, { "epoch": 0.020464920002943317, "grad_norm": 2.203125, "learning_rate": 0.0019458827050721824, "loss": 0.1894, "step": 11542 }, { "epoch": 0.02046846616825313, "grad_norm": 0.57421875, "learning_rate": 0.0019458623756217713, "loss": 0.2129, "step": 11544 }, { "epoch": 0.020472012333562946, "grad_norm": 1.203125, "learning_rate": 0.0019458420424720492, "loss": 0.2497, "step": 11546 }, { "epoch": 0.020475558498872764, "grad_norm": 0.33984375, "learning_rate": 0.001945821705623106, "loss": 0.2217, "step": 11548 }, { "epoch": 0.02047910466418258, "grad_norm": 0.5390625, "learning_rate": 0.0019458013650750297, "loss": 0.2514, "step": 11550 }, { "epoch": 0.020482650829492393, "grad_norm": 3.578125, "learning_rate": 0.0019457810208279097, "loss": 0.3662, "step": 11552 }, { "epoch": 0.020486196994802208, "grad_norm": 0.271484375, "learning_rate": 0.0019457606728818342, "loss": 0.2547, "step": 11554 }, { "epoch": 0.020489743160112022, "grad_norm": 0.416015625, "learning_rate": 0.0019457403212368935, "loss": 0.1947, "step": 11556 }, { "epoch": 0.02049328932542184, "grad_norm": 0.30078125, "learning_rate": 0.0019457199658931756, "loss": 0.232, "step": 11558 }, { "epoch": 0.020496835490731655, "grad_norm": 1.0859375, "learning_rate": 0.0019456996068507697, "loss": 0.2397, "step": 11560 }, { "epoch": 0.02050038165604147, "grad_norm": 0.6171875, "learning_rate": 0.001945679244109765, "loss": 0.2404, "step": 11562 }, { "epoch": 0.020503927821351284, "grad_norm": 0.41796875, "learning_rate": 0.0019456588776702508, "loss": 0.1804, "step": 11564 }, { "epoch": 0.0205074739866611, "grad_norm": 0.77734375, "learning_rate": 0.0019456385075323158, "loss": 0.2784, "step": 11566 }, { "epoch": 0.020511020151970913, "grad_norm": 0.9609375, "learning_rate": 0.0019456181336960491, "loss": 0.2522, "step": 11568 }, { "epoch": 0.02051456631728073, "grad_norm": 0.42578125, "learning_rate": 0.0019455977561615397, "loss": 0.2078, "step": 11570 }, { "epoch": 0.020518112482590545, "grad_norm": 0.5234375, "learning_rate": 0.0019455773749288772, "loss": 0.1963, "step": 11572 }, { "epoch": 0.02052165864790036, "grad_norm": 0.59765625, "learning_rate": 0.0019455569899981503, "loss": 0.2886, "step": 11574 }, { "epoch": 0.020525204813210175, "grad_norm": 0.55859375, "learning_rate": 0.0019455366013694483, "loss": 0.2404, "step": 11576 }, { "epoch": 0.02052875097851999, "grad_norm": 0.306640625, "learning_rate": 0.0019455162090428603, "loss": 0.3266, "step": 11578 }, { "epoch": 0.020532297143829804, "grad_norm": 1.25, "learning_rate": 0.0019454958130184755, "loss": 0.2087, "step": 11580 }, { "epoch": 0.02053584330913962, "grad_norm": 0.419921875, "learning_rate": 0.0019454754132963831, "loss": 0.2971, "step": 11582 }, { "epoch": 0.020539389474449436, "grad_norm": 0.61328125, "learning_rate": 0.0019454550098766726, "loss": 0.2213, "step": 11584 }, { "epoch": 0.02054293563975925, "grad_norm": 1.375, "learning_rate": 0.0019454346027594327, "loss": 0.2925, "step": 11586 }, { "epoch": 0.020546481805069065, "grad_norm": 1.453125, "learning_rate": 0.001945414191944753, "loss": 0.2987, "step": 11588 }, { "epoch": 0.02055002797037888, "grad_norm": 0.306640625, "learning_rate": 0.001945393777432723, "loss": 0.2941, "step": 11590 }, { "epoch": 0.020553574135688698, "grad_norm": 0.62109375, "learning_rate": 0.0019453733592234312, "loss": 0.2371, "step": 11592 }, { "epoch": 0.020557120300998512, "grad_norm": 0.796875, "learning_rate": 0.0019453529373169678, "loss": 0.3297, "step": 11594 }, { "epoch": 0.020560666466308327, "grad_norm": 0.60546875, "learning_rate": 0.0019453325117134216, "loss": 0.2031, "step": 11596 }, { "epoch": 0.02056421263161814, "grad_norm": 0.55078125, "learning_rate": 0.0019453120824128817, "loss": 0.281, "step": 11598 }, { "epoch": 0.020567758796927956, "grad_norm": 0.291015625, "learning_rate": 0.001945291649415438, "loss": 0.2049, "step": 11600 }, { "epoch": 0.02057130496223777, "grad_norm": 0.875, "learning_rate": 0.0019452712127211796, "loss": 0.3167, "step": 11602 }, { "epoch": 0.02057485112754759, "grad_norm": 0.451171875, "learning_rate": 0.001945250772330196, "loss": 0.2726, "step": 11604 }, { "epoch": 0.020578397292857403, "grad_norm": 0.24609375, "learning_rate": 0.0019452303282425766, "loss": 0.1802, "step": 11606 }, { "epoch": 0.020581943458167218, "grad_norm": 1.375, "learning_rate": 0.0019452098804584104, "loss": 0.2708, "step": 11608 }, { "epoch": 0.020585489623477032, "grad_norm": 0.625, "learning_rate": 0.0019451894289777873, "loss": 0.2571, "step": 11610 }, { "epoch": 0.020589035788786847, "grad_norm": 0.6484375, "learning_rate": 0.0019451689738007965, "loss": 0.2276, "step": 11612 }, { "epoch": 0.02059258195409666, "grad_norm": 0.44921875, "learning_rate": 0.0019451485149275278, "loss": 0.2533, "step": 11614 }, { "epoch": 0.02059612811940648, "grad_norm": 0.73828125, "learning_rate": 0.00194512805235807, "loss": 0.2683, "step": 11616 }, { "epoch": 0.020599674284716294, "grad_norm": 0.462890625, "learning_rate": 0.0019451075860925135, "loss": 0.3028, "step": 11618 }, { "epoch": 0.02060322045002611, "grad_norm": 0.65625, "learning_rate": 0.001945087116130947, "loss": 0.2313, "step": 11620 }, { "epoch": 0.020606766615335923, "grad_norm": 0.6953125, "learning_rate": 0.0019450666424734601, "loss": 0.2631, "step": 11622 }, { "epoch": 0.020610312780645738, "grad_norm": 0.38671875, "learning_rate": 0.001945046165120143, "loss": 0.2482, "step": 11624 }, { "epoch": 0.020613858945955556, "grad_norm": 0.85546875, "learning_rate": 0.0019450256840710847, "loss": 0.2074, "step": 11626 }, { "epoch": 0.02061740511126537, "grad_norm": 0.25390625, "learning_rate": 0.001945005199326375, "loss": 0.204, "step": 11628 }, { "epoch": 0.020620951276575185, "grad_norm": 0.380859375, "learning_rate": 0.0019449847108861033, "loss": 0.2374, "step": 11630 }, { "epoch": 0.020624497441885, "grad_norm": 0.2353515625, "learning_rate": 0.0019449642187503594, "loss": 0.22, "step": 11632 }, { "epoch": 0.020628043607194814, "grad_norm": 0.4375, "learning_rate": 0.001944943722919233, "loss": 0.2157, "step": 11634 }, { "epoch": 0.02063158977250463, "grad_norm": 0.76953125, "learning_rate": 0.0019449232233928128, "loss": 0.2011, "step": 11636 }, { "epoch": 0.020635135937814447, "grad_norm": 0.466796875, "learning_rate": 0.00194490272017119, "loss": 0.209, "step": 11638 }, { "epoch": 0.02063868210312426, "grad_norm": 1.3828125, "learning_rate": 0.0019448822132544531, "loss": 0.4375, "step": 11640 }, { "epoch": 0.020642228268434076, "grad_norm": 0.3671875, "learning_rate": 0.0019448617026426923, "loss": 0.175, "step": 11642 }, { "epoch": 0.02064577443374389, "grad_norm": 3.53125, "learning_rate": 0.0019448411883359969, "loss": 0.6556, "step": 11644 }, { "epoch": 0.020649320599053705, "grad_norm": 0.4453125, "learning_rate": 0.001944820670334457, "loss": 0.2209, "step": 11646 }, { "epoch": 0.02065286676436352, "grad_norm": 1.328125, "learning_rate": 0.0019448001486381625, "loss": 0.2888, "step": 11648 }, { "epoch": 0.020656412929673337, "grad_norm": 1.1171875, "learning_rate": 0.0019447796232472025, "loss": 0.3793, "step": 11650 }, { "epoch": 0.020659959094983152, "grad_norm": 0.34375, "learning_rate": 0.0019447590941616675, "loss": 0.2187, "step": 11652 }, { "epoch": 0.020663505260292966, "grad_norm": 0.4375, "learning_rate": 0.0019447385613816466, "loss": 0.1953, "step": 11654 }, { "epoch": 0.02066705142560278, "grad_norm": 0.322265625, "learning_rate": 0.0019447180249072304, "loss": 0.2351, "step": 11656 }, { "epoch": 0.020670597590912596, "grad_norm": 0.59765625, "learning_rate": 0.0019446974847385076, "loss": 0.2192, "step": 11658 }, { "epoch": 0.020674143756222414, "grad_norm": 0.72265625, "learning_rate": 0.0019446769408755689, "loss": 0.2595, "step": 11660 }, { "epoch": 0.020677689921532228, "grad_norm": 0.35546875, "learning_rate": 0.0019446563933185042, "loss": 0.2263, "step": 11662 }, { "epoch": 0.020681236086842043, "grad_norm": 0.3984375, "learning_rate": 0.0019446358420674027, "loss": 0.2359, "step": 11664 }, { "epoch": 0.020684782252151857, "grad_norm": 0.9453125, "learning_rate": 0.0019446152871223548, "loss": 0.25, "step": 11666 }, { "epoch": 0.020688328417461672, "grad_norm": 0.421875, "learning_rate": 0.0019445947284834502, "loss": 0.2671, "step": 11668 }, { "epoch": 0.020691874582771486, "grad_norm": 1.9296875, "learning_rate": 0.0019445741661507788, "loss": 0.6032, "step": 11670 }, { "epoch": 0.020695420748081304, "grad_norm": 0.58203125, "learning_rate": 0.0019445536001244305, "loss": 0.4077, "step": 11672 }, { "epoch": 0.02069896691339112, "grad_norm": 0.58203125, "learning_rate": 0.0019445330304044958, "loss": 0.2392, "step": 11674 }, { "epoch": 0.020702513078700933, "grad_norm": 1.7109375, "learning_rate": 0.0019445124569910637, "loss": 0.4682, "step": 11676 }, { "epoch": 0.020706059244010748, "grad_norm": 0.51953125, "learning_rate": 0.0019444918798842247, "loss": 0.2496, "step": 11678 }, { "epoch": 0.020709605409320563, "grad_norm": 0.484375, "learning_rate": 0.001944471299084069, "loss": 0.5108, "step": 11680 }, { "epoch": 0.020713151574630377, "grad_norm": 0.41015625, "learning_rate": 0.0019444507145906862, "loss": 0.2027, "step": 11682 }, { "epoch": 0.020716697739940195, "grad_norm": 0.57421875, "learning_rate": 0.0019444301264041667, "loss": 0.2185, "step": 11684 }, { "epoch": 0.02072024390525001, "grad_norm": 1.34375, "learning_rate": 0.0019444095345246002, "loss": 0.2453, "step": 11686 }, { "epoch": 0.020723790070559824, "grad_norm": 0.53125, "learning_rate": 0.0019443889389520767, "loss": 0.2386, "step": 11688 }, { "epoch": 0.02072733623586964, "grad_norm": 0.34765625, "learning_rate": 0.0019443683396866867, "loss": 0.2393, "step": 11690 }, { "epoch": 0.020730882401179453, "grad_norm": 0.86328125, "learning_rate": 0.00194434773672852, "loss": 0.2187, "step": 11692 }, { "epoch": 0.02073442856648927, "grad_norm": 0.578125, "learning_rate": 0.0019443271300776666, "loss": 0.2132, "step": 11694 }, { "epoch": 0.020737974731799086, "grad_norm": 0.50390625, "learning_rate": 0.0019443065197342168, "loss": 0.2072, "step": 11696 }, { "epoch": 0.0207415208971089, "grad_norm": 0.69140625, "learning_rate": 0.0019442859056982612, "loss": 0.2375, "step": 11698 }, { "epoch": 0.020745067062418715, "grad_norm": 0.27734375, "learning_rate": 0.001944265287969889, "loss": 0.2856, "step": 11700 }, { "epoch": 0.02074861322772853, "grad_norm": 0.5625, "learning_rate": 0.0019442446665491905, "loss": 0.2713, "step": 11702 }, { "epoch": 0.020752159393038344, "grad_norm": 0.359375, "learning_rate": 0.0019442240414362568, "loss": 0.2755, "step": 11704 }, { "epoch": 0.020755705558348162, "grad_norm": 0.419921875, "learning_rate": 0.0019442034126311773, "loss": 0.2994, "step": 11706 }, { "epoch": 0.020759251723657977, "grad_norm": 0.25, "learning_rate": 0.0019441827801340427, "loss": 0.2344, "step": 11708 }, { "epoch": 0.02076279788896779, "grad_norm": 0.86328125, "learning_rate": 0.001944162143944943, "loss": 0.3913, "step": 11710 }, { "epoch": 0.020766344054277606, "grad_norm": 1.640625, "learning_rate": 0.001944141504063968, "loss": 0.2525, "step": 11712 }, { "epoch": 0.02076989021958742, "grad_norm": 2.828125, "learning_rate": 0.0019441208604912088, "loss": 0.3243, "step": 11714 }, { "epoch": 0.020773436384897235, "grad_norm": 0.33984375, "learning_rate": 0.0019441002132267549, "loss": 0.2193, "step": 11716 }, { "epoch": 0.020776982550207053, "grad_norm": 0.314453125, "learning_rate": 0.0019440795622706975, "loss": 0.2688, "step": 11718 }, { "epoch": 0.020780528715516867, "grad_norm": 0.44921875, "learning_rate": 0.0019440589076231258, "loss": 0.2419, "step": 11720 }, { "epoch": 0.020784074880826682, "grad_norm": 0.314453125, "learning_rate": 0.001944038249284131, "loss": 0.2258, "step": 11722 }, { "epoch": 0.020787621046136497, "grad_norm": 0.953125, "learning_rate": 0.0019440175872538032, "loss": 0.2392, "step": 11724 }, { "epoch": 0.02079116721144631, "grad_norm": 0.55078125, "learning_rate": 0.0019439969215322328, "loss": 0.2375, "step": 11726 }, { "epoch": 0.02079471337675613, "grad_norm": 0.30859375, "learning_rate": 0.00194397625211951, "loss": 0.2131, "step": 11728 }, { "epoch": 0.020798259542065944, "grad_norm": 1.1484375, "learning_rate": 0.0019439555790157254, "loss": 0.2618, "step": 11730 }, { "epoch": 0.020801805707375758, "grad_norm": 0.330078125, "learning_rate": 0.0019439349022209692, "loss": 0.2294, "step": 11732 }, { "epoch": 0.020805351872685573, "grad_norm": 0.48828125, "learning_rate": 0.001943914221735332, "loss": 0.1592, "step": 11734 }, { "epoch": 0.020808898037995387, "grad_norm": 0.42578125, "learning_rate": 0.0019438935375589044, "loss": 0.1544, "step": 11736 }, { "epoch": 0.020812444203305202, "grad_norm": 2.0, "learning_rate": 0.0019438728496917763, "loss": 0.4247, "step": 11738 }, { "epoch": 0.02081599036861502, "grad_norm": 0.359375, "learning_rate": 0.0019438521581340387, "loss": 0.2018, "step": 11740 }, { "epoch": 0.020819536533924834, "grad_norm": 0.36328125, "learning_rate": 0.001943831462885782, "loss": 0.1906, "step": 11742 }, { "epoch": 0.02082308269923465, "grad_norm": 3.078125, "learning_rate": 0.0019438107639470966, "loss": 0.3867, "step": 11744 }, { "epoch": 0.020826628864544464, "grad_norm": 0.59375, "learning_rate": 0.001943790061318073, "loss": 0.2404, "step": 11746 }, { "epoch": 0.020830175029854278, "grad_norm": 0.43359375, "learning_rate": 0.0019437693549988018, "loss": 0.2527, "step": 11748 }, { "epoch": 0.020833721195164093, "grad_norm": 1.546875, "learning_rate": 0.0019437486449893737, "loss": 0.3323, "step": 11750 }, { "epoch": 0.02083726736047391, "grad_norm": 0.248046875, "learning_rate": 0.0019437279312898791, "loss": 0.2413, "step": 11752 }, { "epoch": 0.020840813525783725, "grad_norm": 0.279296875, "learning_rate": 0.0019437072139004087, "loss": 0.2021, "step": 11754 }, { "epoch": 0.02084435969109354, "grad_norm": 0.8359375, "learning_rate": 0.0019436864928210527, "loss": 0.1949, "step": 11756 }, { "epoch": 0.020847905856403354, "grad_norm": 0.515625, "learning_rate": 0.0019436657680519023, "loss": 0.2339, "step": 11758 }, { "epoch": 0.02085145202171317, "grad_norm": 0.46875, "learning_rate": 0.0019436450395930477, "loss": 0.2197, "step": 11760 }, { "epoch": 0.020854998187022987, "grad_norm": 0.27734375, "learning_rate": 0.0019436243074445801, "loss": 0.2461, "step": 11762 }, { "epoch": 0.0208585443523328, "grad_norm": 1.1640625, "learning_rate": 0.0019436035716065897, "loss": 0.3151, "step": 11764 }, { "epoch": 0.020862090517642616, "grad_norm": 1.53125, "learning_rate": 0.0019435828320791668, "loss": 0.3223, "step": 11766 }, { "epoch": 0.02086563668295243, "grad_norm": 3.875, "learning_rate": 0.0019435620888624031, "loss": 0.4664, "step": 11768 }, { "epoch": 0.020869182848262245, "grad_norm": 0.7578125, "learning_rate": 0.0019435413419563888, "loss": 0.3204, "step": 11770 }, { "epoch": 0.02087272901357206, "grad_norm": 0.51171875, "learning_rate": 0.0019435205913612146, "loss": 0.2213, "step": 11772 }, { "epoch": 0.020876275178881878, "grad_norm": 0.423828125, "learning_rate": 0.0019434998370769713, "loss": 0.2593, "step": 11774 }, { "epoch": 0.020879821344191692, "grad_norm": 0.3828125, "learning_rate": 0.0019434790791037495, "loss": 0.2493, "step": 11776 }, { "epoch": 0.020883367509501507, "grad_norm": 0.306640625, "learning_rate": 0.0019434583174416402, "loss": 0.1507, "step": 11778 }, { "epoch": 0.02088691367481132, "grad_norm": 0.341796875, "learning_rate": 0.0019434375520907346, "loss": 0.264, "step": 11780 }, { "epoch": 0.020890459840121136, "grad_norm": 1.203125, "learning_rate": 0.0019434167830511228, "loss": 0.2504, "step": 11782 }, { "epoch": 0.02089400600543095, "grad_norm": 0.435546875, "learning_rate": 0.0019433960103228958, "loss": 0.1792, "step": 11784 }, { "epoch": 0.02089755217074077, "grad_norm": 0.255859375, "learning_rate": 0.0019433752339061442, "loss": 0.2101, "step": 11786 }, { "epoch": 0.020901098336050583, "grad_norm": 1.1640625, "learning_rate": 0.0019433544538009597, "loss": 0.1699, "step": 11788 }, { "epoch": 0.020904644501360398, "grad_norm": 0.59375, "learning_rate": 0.0019433336700074328, "loss": 0.2343, "step": 11790 }, { "epoch": 0.020908190666670212, "grad_norm": 0.52734375, "learning_rate": 0.0019433128825256541, "loss": 0.2101, "step": 11792 }, { "epoch": 0.020911736831980027, "grad_norm": 0.4765625, "learning_rate": 0.0019432920913557148, "loss": 0.2671, "step": 11794 }, { "epoch": 0.020915282997289845, "grad_norm": 1.7890625, "learning_rate": 0.0019432712964977058, "loss": 0.2376, "step": 11796 }, { "epoch": 0.02091882916259966, "grad_norm": 0.302734375, "learning_rate": 0.0019432504979517177, "loss": 0.2463, "step": 11798 }, { "epoch": 0.020922375327909474, "grad_norm": 0.9921875, "learning_rate": 0.001943229695717842, "loss": 0.3419, "step": 11800 }, { "epoch": 0.02092592149321929, "grad_norm": 0.5546875, "learning_rate": 0.0019432088897961693, "loss": 0.1954, "step": 11802 }, { "epoch": 0.020929467658529103, "grad_norm": 0.41796875, "learning_rate": 0.0019431880801867908, "loss": 0.1908, "step": 11804 }, { "epoch": 0.020933013823838918, "grad_norm": 0.359375, "learning_rate": 0.0019431672668897974, "loss": 0.2249, "step": 11806 }, { "epoch": 0.020936559989148736, "grad_norm": 0.71484375, "learning_rate": 0.00194314644990528, "loss": 0.2014, "step": 11808 }, { "epoch": 0.02094010615445855, "grad_norm": 0.76953125, "learning_rate": 0.0019431256292333297, "loss": 0.277, "step": 11810 }, { "epoch": 0.020943652319768365, "grad_norm": 0.388671875, "learning_rate": 0.0019431048048740378, "loss": 0.4032, "step": 11812 }, { "epoch": 0.02094719848507818, "grad_norm": 0.267578125, "learning_rate": 0.0019430839768274954, "loss": 0.2798, "step": 11814 }, { "epoch": 0.020950744650387994, "grad_norm": 2.34375, "learning_rate": 0.001943063145093793, "loss": 0.5086, "step": 11816 }, { "epoch": 0.02095429081569781, "grad_norm": 0.546875, "learning_rate": 0.0019430423096730223, "loss": 0.194, "step": 11818 }, { "epoch": 0.020957836981007626, "grad_norm": 0.359375, "learning_rate": 0.0019430214705652745, "loss": 0.1704, "step": 11820 }, { "epoch": 0.02096138314631744, "grad_norm": 0.5703125, "learning_rate": 0.0019430006277706402, "loss": 0.1989, "step": 11822 }, { "epoch": 0.020964929311627255, "grad_norm": 2.0, "learning_rate": 0.0019429797812892107, "loss": 0.2876, "step": 11824 }, { "epoch": 0.02096847547693707, "grad_norm": 0.3515625, "learning_rate": 0.0019429589311210776, "loss": 0.2928, "step": 11826 }, { "epoch": 0.020972021642246885, "grad_norm": 0.92578125, "learning_rate": 0.0019429380772663317, "loss": 0.2403, "step": 11828 }, { "epoch": 0.020975567807556703, "grad_norm": 1.328125, "learning_rate": 0.0019429172197250645, "loss": 0.2353, "step": 11830 }, { "epoch": 0.020979113972866517, "grad_norm": 0.70703125, "learning_rate": 0.0019428963584973665, "loss": 0.1783, "step": 11832 }, { "epoch": 0.02098266013817633, "grad_norm": 1.1328125, "learning_rate": 0.0019428754935833297, "loss": 0.2238, "step": 11834 }, { "epoch": 0.020986206303486146, "grad_norm": 0.5390625, "learning_rate": 0.001942854624983045, "loss": 0.2795, "step": 11836 }, { "epoch": 0.02098975246879596, "grad_norm": 0.80859375, "learning_rate": 0.0019428337526966038, "loss": 0.252, "step": 11838 }, { "epoch": 0.020993298634105775, "grad_norm": 1.0625, "learning_rate": 0.0019428128767240973, "loss": 0.2096, "step": 11840 }, { "epoch": 0.020996844799415593, "grad_norm": 0.65234375, "learning_rate": 0.0019427919970656168, "loss": 0.1612, "step": 11842 }, { "epoch": 0.021000390964725408, "grad_norm": 0.52734375, "learning_rate": 0.001942771113721254, "loss": 0.3048, "step": 11844 }, { "epoch": 0.021003937130035222, "grad_norm": 0.29296875, "learning_rate": 0.0019427502266910997, "loss": 0.239, "step": 11846 }, { "epoch": 0.021007483295345037, "grad_norm": 0.3671875, "learning_rate": 0.0019427293359752453, "loss": 0.2246, "step": 11848 }, { "epoch": 0.02101102946065485, "grad_norm": 0.625, "learning_rate": 0.0019427084415737826, "loss": 0.1583, "step": 11850 }, { "epoch": 0.021014575625964666, "grad_norm": 0.470703125, "learning_rate": 0.0019426875434868022, "loss": 0.2764, "step": 11852 }, { "epoch": 0.021018121791274484, "grad_norm": 0.451171875, "learning_rate": 0.0019426666417143965, "loss": 0.1836, "step": 11854 }, { "epoch": 0.0210216679565843, "grad_norm": 0.78515625, "learning_rate": 0.0019426457362566561, "loss": 0.2434, "step": 11856 }, { "epoch": 0.021025214121894113, "grad_norm": 0.380859375, "learning_rate": 0.001942624827113673, "loss": 0.2406, "step": 11858 }, { "epoch": 0.021028760287203928, "grad_norm": 0.353515625, "learning_rate": 0.001942603914285538, "loss": 0.2339, "step": 11860 }, { "epoch": 0.021032306452513742, "grad_norm": 1.1875, "learning_rate": 0.0019425829977723428, "loss": 0.2634, "step": 11862 }, { "epoch": 0.02103585261782356, "grad_norm": 0.318359375, "learning_rate": 0.0019425620775741792, "loss": 0.1601, "step": 11864 }, { "epoch": 0.021039398783133375, "grad_norm": 0.38671875, "learning_rate": 0.001942541153691139, "loss": 0.2079, "step": 11866 }, { "epoch": 0.02104294494844319, "grad_norm": 0.53125, "learning_rate": 0.0019425202261233124, "loss": 0.2634, "step": 11868 }, { "epoch": 0.021046491113753004, "grad_norm": 1.4296875, "learning_rate": 0.001942499294870792, "loss": 0.2744, "step": 11870 }, { "epoch": 0.02105003727906282, "grad_norm": 0.40625, "learning_rate": 0.0019424783599336693, "loss": 0.2494, "step": 11872 }, { "epoch": 0.021053583444372633, "grad_norm": 0.380859375, "learning_rate": 0.0019424574213120355, "loss": 0.2023, "step": 11874 }, { "epoch": 0.02105712960968245, "grad_norm": 0.37890625, "learning_rate": 0.001942436479005982, "loss": 0.2424, "step": 11876 }, { "epoch": 0.021060675774992266, "grad_norm": 0.95703125, "learning_rate": 0.0019424155330156011, "loss": 0.2661, "step": 11878 }, { "epoch": 0.02106422194030208, "grad_norm": 2.546875, "learning_rate": 0.0019423945833409839, "loss": 0.4265, "step": 11880 }, { "epoch": 0.021067768105611895, "grad_norm": 0.310546875, "learning_rate": 0.0019423736299822218, "loss": 0.149, "step": 11882 }, { "epoch": 0.02107131427092171, "grad_norm": 0.609375, "learning_rate": 0.001942352672939407, "loss": 0.1588, "step": 11884 }, { "epoch": 0.021074860436231524, "grad_norm": 0.53125, "learning_rate": 0.001942331712212631, "loss": 0.205, "step": 11886 }, { "epoch": 0.021078406601541342, "grad_norm": 0.46484375, "learning_rate": 0.0019423107478019853, "loss": 0.1862, "step": 11888 }, { "epoch": 0.021081952766851157, "grad_norm": 0.66015625, "learning_rate": 0.0019422897797075616, "loss": 0.2353, "step": 11890 }, { "epoch": 0.02108549893216097, "grad_norm": 0.37109375, "learning_rate": 0.0019422688079294517, "loss": 0.1906, "step": 11892 }, { "epoch": 0.021089045097470786, "grad_norm": 1.3203125, "learning_rate": 0.0019422478324677473, "loss": 0.2766, "step": 11894 }, { "epoch": 0.0210925912627806, "grad_norm": 0.375, "learning_rate": 0.00194222685332254, "loss": 0.3606, "step": 11896 }, { "epoch": 0.021096137428090418, "grad_norm": 1.1796875, "learning_rate": 0.0019422058704939218, "loss": 0.4156, "step": 11898 }, { "epoch": 0.021099683593400233, "grad_norm": 0.7890625, "learning_rate": 0.0019421848839819844, "loss": 0.17, "step": 11900 }, { "epoch": 0.021103229758710047, "grad_norm": 0.37109375, "learning_rate": 0.0019421638937868193, "loss": 0.2583, "step": 11902 }, { "epoch": 0.021106775924019862, "grad_norm": 1.9140625, "learning_rate": 0.0019421428999085188, "loss": 0.438, "step": 11904 }, { "epoch": 0.021110322089329676, "grad_norm": 1.71875, "learning_rate": 0.0019421219023471742, "loss": 0.3924, "step": 11906 }, { "epoch": 0.02111386825463949, "grad_norm": 1.1015625, "learning_rate": 0.0019421009011028776, "loss": 0.2796, "step": 11908 }, { "epoch": 0.02111741441994931, "grad_norm": 1.1328125, "learning_rate": 0.0019420798961757206, "loss": 0.2339, "step": 11910 }, { "epoch": 0.021120960585259124, "grad_norm": 1.90625, "learning_rate": 0.0019420588875657958, "loss": 0.2266, "step": 11912 }, { "epoch": 0.021124506750568938, "grad_norm": 0.81640625, "learning_rate": 0.0019420378752731942, "loss": 0.2492, "step": 11914 }, { "epoch": 0.021128052915878753, "grad_norm": 2.21875, "learning_rate": 0.0019420168592980082, "loss": 0.4295, "step": 11916 }, { "epoch": 0.021131599081188567, "grad_norm": 1.5859375, "learning_rate": 0.0019419958396403294, "loss": 0.3756, "step": 11918 }, { "epoch": 0.021135145246498382, "grad_norm": 2.25, "learning_rate": 0.0019419748163002498, "loss": 0.2401, "step": 11920 }, { "epoch": 0.0211386914118082, "grad_norm": 0.703125, "learning_rate": 0.0019419537892778618, "loss": 0.2321, "step": 11922 }, { "epoch": 0.021142237577118014, "grad_norm": 0.333984375, "learning_rate": 0.0019419327585732565, "loss": 0.2658, "step": 11924 }, { "epoch": 0.02114578374242783, "grad_norm": 1.203125, "learning_rate": 0.0019419117241865267, "loss": 0.5251, "step": 11926 }, { "epoch": 0.021149329907737643, "grad_norm": 1.8828125, "learning_rate": 0.001941890686117764, "loss": 0.3023, "step": 11928 }, { "epoch": 0.021152876073047458, "grad_norm": 0.5546875, "learning_rate": 0.0019418696443670605, "loss": 0.2206, "step": 11930 }, { "epoch": 0.021156422238357276, "grad_norm": 0.3359375, "learning_rate": 0.001941848598934508, "loss": 0.2282, "step": 11932 }, { "epoch": 0.02115996840366709, "grad_norm": 0.306640625, "learning_rate": 0.0019418275498201988, "loss": 0.2521, "step": 11934 }, { "epoch": 0.021163514568976905, "grad_norm": 0.53125, "learning_rate": 0.001941806497024225, "loss": 0.2321, "step": 11936 }, { "epoch": 0.02116706073428672, "grad_norm": 0.359375, "learning_rate": 0.0019417854405466787, "loss": 0.248, "step": 11938 }, { "epoch": 0.021170606899596534, "grad_norm": 0.7109375, "learning_rate": 0.0019417643803876516, "loss": 0.2089, "step": 11940 }, { "epoch": 0.02117415306490635, "grad_norm": 0.51953125, "learning_rate": 0.001941743316547236, "loss": 0.1851, "step": 11942 }, { "epoch": 0.021177699230216167, "grad_norm": 0.37890625, "learning_rate": 0.0019417222490255247, "loss": 0.205, "step": 11944 }, { "epoch": 0.02118124539552598, "grad_norm": 0.357421875, "learning_rate": 0.0019417011778226083, "loss": 0.2196, "step": 11946 }, { "epoch": 0.021184791560835796, "grad_norm": 0.6796875, "learning_rate": 0.0019416801029385805, "loss": 0.2138, "step": 11948 }, { "epoch": 0.02118833772614561, "grad_norm": 0.328125, "learning_rate": 0.0019416590243735328, "loss": 0.2103, "step": 11950 }, { "epoch": 0.021191883891455425, "grad_norm": 0.6640625, "learning_rate": 0.001941637942127557, "loss": 0.1775, "step": 11952 }, { "epoch": 0.02119543005676524, "grad_norm": 1.0546875, "learning_rate": 0.001941616856200746, "loss": 0.3328, "step": 11954 }, { "epoch": 0.021198976222075058, "grad_norm": 0.408203125, "learning_rate": 0.0019415957665931917, "loss": 0.2312, "step": 11956 }, { "epoch": 0.021202522387384872, "grad_norm": 0.26953125, "learning_rate": 0.0019415746733049864, "loss": 0.1949, "step": 11958 }, { "epoch": 0.021206068552694687, "grad_norm": 0.57421875, "learning_rate": 0.0019415535763362224, "loss": 0.2523, "step": 11960 }, { "epoch": 0.0212096147180045, "grad_norm": 0.275390625, "learning_rate": 0.0019415324756869917, "loss": 0.2221, "step": 11962 }, { "epoch": 0.021213160883314316, "grad_norm": 0.66796875, "learning_rate": 0.001941511371357387, "loss": 0.2071, "step": 11964 }, { "epoch": 0.021216707048624134, "grad_norm": 0.275390625, "learning_rate": 0.0019414902633475002, "loss": 0.1605, "step": 11966 }, { "epoch": 0.02122025321393395, "grad_norm": 0.447265625, "learning_rate": 0.0019414691516574237, "loss": 0.2008, "step": 11968 }, { "epoch": 0.021223799379243763, "grad_norm": 0.41796875, "learning_rate": 0.0019414480362872502, "loss": 0.1891, "step": 11970 }, { "epoch": 0.021227345544553577, "grad_norm": 0.490234375, "learning_rate": 0.0019414269172370715, "loss": 0.2498, "step": 11972 }, { "epoch": 0.021230891709863392, "grad_norm": 0.42578125, "learning_rate": 0.0019414057945069804, "loss": 0.2774, "step": 11974 }, { "epoch": 0.021234437875173207, "grad_norm": 0.265625, "learning_rate": 0.001941384668097069, "loss": 0.1612, "step": 11976 }, { "epoch": 0.021237984040483025, "grad_norm": 0.9921875, "learning_rate": 0.0019413635380074296, "loss": 0.2101, "step": 11978 }, { "epoch": 0.02124153020579284, "grad_norm": 0.69140625, "learning_rate": 0.001941342404238155, "loss": 0.3062, "step": 11980 }, { "epoch": 0.021245076371102654, "grad_norm": 0.50390625, "learning_rate": 0.0019413212667893376, "loss": 0.2146, "step": 11982 }, { "epoch": 0.021248622536412468, "grad_norm": 0.4921875, "learning_rate": 0.0019413001256610696, "loss": 0.1958, "step": 11984 }, { "epoch": 0.021252168701722283, "grad_norm": 3.40625, "learning_rate": 0.0019412789808534434, "loss": 0.1789, "step": 11986 }, { "epoch": 0.021255714867032097, "grad_norm": 0.703125, "learning_rate": 0.0019412578323665518, "loss": 0.241, "step": 11988 }, { "epoch": 0.021259261032341915, "grad_norm": 0.466796875, "learning_rate": 0.0019412366802004871, "loss": 0.2686, "step": 11990 }, { "epoch": 0.02126280719765173, "grad_norm": 0.734375, "learning_rate": 0.0019412155243553415, "loss": 0.2405, "step": 11992 }, { "epoch": 0.021266353362961544, "grad_norm": 0.5, "learning_rate": 0.001941194364831208, "loss": 0.2495, "step": 11994 }, { "epoch": 0.02126989952827136, "grad_norm": 0.1669921875, "learning_rate": 0.001941173201628179, "loss": 0.1895, "step": 11996 }, { "epoch": 0.021273445693581174, "grad_norm": 0.361328125, "learning_rate": 0.001941152034746347, "loss": 0.1647, "step": 11998 }, { "epoch": 0.02127699185889099, "grad_norm": 0.68359375, "learning_rate": 0.0019411308641858046, "loss": 0.2204, "step": 12000 }, { "epoch": 0.021280538024200806, "grad_norm": 0.3046875, "learning_rate": 0.0019411096899466444, "loss": 0.1912, "step": 12002 }, { "epoch": 0.02128408418951062, "grad_norm": 0.49609375, "learning_rate": 0.001941088512028959, "loss": 0.2513, "step": 12004 }, { "epoch": 0.021287630354820435, "grad_norm": 0.41796875, "learning_rate": 0.0019410673304328411, "loss": 0.2244, "step": 12006 }, { "epoch": 0.02129117652013025, "grad_norm": 0.68359375, "learning_rate": 0.0019410461451583832, "loss": 0.2104, "step": 12008 }, { "epoch": 0.021294722685440064, "grad_norm": 0.2578125, "learning_rate": 0.0019410249562056782, "loss": 0.1873, "step": 12010 }, { "epoch": 0.021298268850749882, "grad_norm": 0.3203125, "learning_rate": 0.0019410037635748181, "loss": 0.4049, "step": 12012 }, { "epoch": 0.021301815016059697, "grad_norm": 0.9375, "learning_rate": 0.001940982567265896, "loss": 0.2183, "step": 12014 }, { "epoch": 0.02130536118136951, "grad_norm": 0.482421875, "learning_rate": 0.0019409613672790051, "loss": 0.2753, "step": 12016 }, { "epoch": 0.021308907346679326, "grad_norm": 0.53125, "learning_rate": 0.0019409401636142375, "loss": 0.2376, "step": 12018 }, { "epoch": 0.02131245351198914, "grad_norm": 0.365234375, "learning_rate": 0.001940918956271686, "loss": 0.2325, "step": 12020 }, { "epoch": 0.021315999677298955, "grad_norm": 0.75390625, "learning_rate": 0.0019408977452514437, "loss": 0.2987, "step": 12022 }, { "epoch": 0.021319545842608773, "grad_norm": 0.90625, "learning_rate": 0.0019408765305536032, "loss": 0.2472, "step": 12024 }, { "epoch": 0.021323092007918588, "grad_norm": 1.9765625, "learning_rate": 0.0019408553121782566, "loss": 0.2271, "step": 12026 }, { "epoch": 0.021326638173228402, "grad_norm": 0.5, "learning_rate": 0.0019408340901254978, "loss": 0.3015, "step": 12028 }, { "epoch": 0.021330184338538217, "grad_norm": 0.98046875, "learning_rate": 0.001940812864395419, "loss": 0.2415, "step": 12030 }, { "epoch": 0.02133373050384803, "grad_norm": 0.95703125, "learning_rate": 0.0019407916349881132, "loss": 0.1871, "step": 12032 }, { "epoch": 0.02133727666915785, "grad_norm": 0.2578125, "learning_rate": 0.001940770401903673, "loss": 0.2125, "step": 12034 }, { "epoch": 0.021340822834467664, "grad_norm": 0.3515625, "learning_rate": 0.0019407491651421917, "loss": 0.3204, "step": 12036 }, { "epoch": 0.02134436899977748, "grad_norm": 0.5546875, "learning_rate": 0.0019407279247037614, "loss": 0.1831, "step": 12038 }, { "epoch": 0.021347915165087293, "grad_norm": 0.69921875, "learning_rate": 0.001940706680588476, "loss": 0.1985, "step": 12040 }, { "epoch": 0.021351461330397108, "grad_norm": 0.42578125, "learning_rate": 0.0019406854327964275, "loss": 0.2091, "step": 12042 }, { "epoch": 0.021355007495706922, "grad_norm": 0.44140625, "learning_rate": 0.0019406641813277097, "loss": 0.2764, "step": 12044 }, { "epoch": 0.02135855366101674, "grad_norm": 0.6953125, "learning_rate": 0.0019406429261824149, "loss": 0.2291, "step": 12046 }, { "epoch": 0.021362099826326555, "grad_norm": 1.1484375, "learning_rate": 0.0019406216673606364, "loss": 0.3149, "step": 12048 }, { "epoch": 0.02136564599163637, "grad_norm": 0.6328125, "learning_rate": 0.0019406004048624665, "loss": 0.2321, "step": 12050 }, { "epoch": 0.021369192156946184, "grad_norm": 0.34375, "learning_rate": 0.0019405791386879992, "loss": 0.2101, "step": 12052 }, { "epoch": 0.021372738322256, "grad_norm": 0.66796875, "learning_rate": 0.0019405578688373268, "loss": 0.2775, "step": 12054 }, { "epoch": 0.021376284487565813, "grad_norm": 0.49609375, "learning_rate": 0.0019405365953105427, "loss": 0.1969, "step": 12056 }, { "epoch": 0.02137983065287563, "grad_norm": 0.6953125, "learning_rate": 0.0019405153181077394, "loss": 0.234, "step": 12058 }, { "epoch": 0.021383376818185446, "grad_norm": 0.36328125, "learning_rate": 0.0019404940372290107, "loss": 0.2024, "step": 12060 }, { "epoch": 0.02138692298349526, "grad_norm": 1.1640625, "learning_rate": 0.0019404727526744492, "loss": 0.2882, "step": 12062 }, { "epoch": 0.021390469148805075, "grad_norm": 0.55078125, "learning_rate": 0.0019404514644441482, "loss": 0.3102, "step": 12064 }, { "epoch": 0.02139401531411489, "grad_norm": 0.439453125, "learning_rate": 0.0019404301725382005, "loss": 0.234, "step": 12066 }, { "epoch": 0.021397561479424707, "grad_norm": 0.5703125, "learning_rate": 0.0019404088769566993, "loss": 0.2715, "step": 12068 }, { "epoch": 0.021401107644734522, "grad_norm": 0.73046875, "learning_rate": 0.001940387577699738, "loss": 0.1932, "step": 12070 }, { "epoch": 0.021404653810044336, "grad_norm": 2.5625, "learning_rate": 0.0019403662747674098, "loss": 0.221, "step": 12072 }, { "epoch": 0.02140819997535415, "grad_norm": 0.87109375, "learning_rate": 0.0019403449681598076, "loss": 0.1921, "step": 12074 }, { "epoch": 0.021411746140663965, "grad_norm": 1.9140625, "learning_rate": 0.0019403236578770244, "loss": 0.24, "step": 12076 }, { "epoch": 0.02141529230597378, "grad_norm": 0.44140625, "learning_rate": 0.0019403023439191539, "loss": 0.2054, "step": 12078 }, { "epoch": 0.021418838471283598, "grad_norm": 0.365234375, "learning_rate": 0.001940281026286289, "loss": 0.2254, "step": 12080 }, { "epoch": 0.021422384636593413, "grad_norm": 0.53515625, "learning_rate": 0.0019402597049785226, "loss": 0.2484, "step": 12082 }, { "epoch": 0.021425930801903227, "grad_norm": 0.37109375, "learning_rate": 0.0019402383799959487, "loss": 0.2282, "step": 12084 }, { "epoch": 0.02142947696721304, "grad_norm": 0.6640625, "learning_rate": 0.00194021705133866, "loss": 0.251, "step": 12086 }, { "epoch": 0.021433023132522856, "grad_norm": 0.30859375, "learning_rate": 0.00194019571900675, "loss": 0.1941, "step": 12088 }, { "epoch": 0.02143656929783267, "grad_norm": 0.365234375, "learning_rate": 0.0019401743830003123, "loss": 0.2044, "step": 12090 }, { "epoch": 0.02144011546314249, "grad_norm": 0.4921875, "learning_rate": 0.0019401530433194394, "loss": 0.2134, "step": 12092 }, { "epoch": 0.021443661628452303, "grad_norm": 0.7890625, "learning_rate": 0.0019401316999642256, "loss": 0.2312, "step": 12094 }, { "epoch": 0.021447207793762118, "grad_norm": 0.99609375, "learning_rate": 0.0019401103529347635, "loss": 0.2354, "step": 12096 }, { "epoch": 0.021450753959071932, "grad_norm": 1.015625, "learning_rate": 0.0019400890022311466, "loss": 0.3034, "step": 12098 }, { "epoch": 0.021454300124381747, "grad_norm": 1.4375, "learning_rate": 0.0019400676478534685, "loss": 0.2345, "step": 12100 }, { "epoch": 0.021457846289691565, "grad_norm": 0.53515625, "learning_rate": 0.0019400462898018223, "loss": 0.2946, "step": 12102 }, { "epoch": 0.02146139245500138, "grad_norm": 0.63671875, "learning_rate": 0.0019400249280763018, "loss": 0.2005, "step": 12104 }, { "epoch": 0.021464938620311194, "grad_norm": 1.1796875, "learning_rate": 0.0019400035626770003, "loss": 0.374, "step": 12106 }, { "epoch": 0.02146848478562101, "grad_norm": 0.58984375, "learning_rate": 0.001939982193604011, "loss": 0.2566, "step": 12108 }, { "epoch": 0.021472030950930823, "grad_norm": 0.220703125, "learning_rate": 0.0019399608208574273, "loss": 0.1892, "step": 12110 }, { "epoch": 0.021475577116240638, "grad_norm": 0.58203125, "learning_rate": 0.0019399394444373432, "loss": 0.2316, "step": 12112 }, { "epoch": 0.021479123281550456, "grad_norm": 0.431640625, "learning_rate": 0.0019399180643438518, "loss": 0.2632, "step": 12114 }, { "epoch": 0.02148266944686027, "grad_norm": 0.5, "learning_rate": 0.0019398966805770465, "loss": 0.2702, "step": 12116 }, { "epoch": 0.021486215612170085, "grad_norm": 0.326171875, "learning_rate": 0.0019398752931370214, "loss": 0.2617, "step": 12118 }, { "epoch": 0.0214897617774799, "grad_norm": 1.171875, "learning_rate": 0.0019398539020238693, "loss": 0.2637, "step": 12120 }, { "epoch": 0.021493307942789714, "grad_norm": 0.322265625, "learning_rate": 0.001939832507237684, "loss": 0.2221, "step": 12122 }, { "epoch": 0.02149685410809953, "grad_norm": 0.6484375, "learning_rate": 0.0019398111087785593, "loss": 0.2877, "step": 12124 }, { "epoch": 0.021500400273409347, "grad_norm": 0.42578125, "learning_rate": 0.0019397897066465886, "loss": 0.1861, "step": 12126 }, { "epoch": 0.02150394643871916, "grad_norm": 0.59375, "learning_rate": 0.0019397683008418654, "loss": 0.2165, "step": 12128 }, { "epoch": 0.021507492604028976, "grad_norm": 0.3984375, "learning_rate": 0.0019397468913644835, "loss": 0.2441, "step": 12130 }, { "epoch": 0.02151103876933879, "grad_norm": 0.37890625, "learning_rate": 0.0019397254782145365, "loss": 0.2458, "step": 12132 }, { "epoch": 0.021514584934648605, "grad_norm": 1.0, "learning_rate": 0.0019397040613921182, "loss": 0.4961, "step": 12134 }, { "epoch": 0.021518131099958423, "grad_norm": 0.97265625, "learning_rate": 0.001939682640897322, "loss": 0.1737, "step": 12136 }, { "epoch": 0.021521677265268237, "grad_norm": 0.2890625, "learning_rate": 0.001939661216730242, "loss": 0.2835, "step": 12138 }, { "epoch": 0.021525223430578052, "grad_norm": 0.486328125, "learning_rate": 0.001939639788890971, "loss": 0.2059, "step": 12140 }, { "epoch": 0.021528769595887867, "grad_norm": 0.4609375, "learning_rate": 0.0019396183573796033, "loss": 0.1787, "step": 12142 }, { "epoch": 0.02153231576119768, "grad_norm": 0.9453125, "learning_rate": 0.001939596922196233, "loss": 0.2147, "step": 12144 }, { "epoch": 0.021535861926507496, "grad_norm": 1.2109375, "learning_rate": 0.0019395754833409532, "loss": 0.3419, "step": 12146 }, { "epoch": 0.021539408091817314, "grad_norm": 0.296875, "learning_rate": 0.0019395540408138582, "loss": 0.2073, "step": 12148 }, { "epoch": 0.021542954257127128, "grad_norm": 0.51953125, "learning_rate": 0.0019395325946150411, "loss": 0.2068, "step": 12150 }, { "epoch": 0.021546500422436943, "grad_norm": 0.5234375, "learning_rate": 0.0019395111447445962, "loss": 0.2172, "step": 12152 }, { "epoch": 0.021550046587746757, "grad_norm": 0.82421875, "learning_rate": 0.0019394896912026172, "loss": 0.2032, "step": 12154 }, { "epoch": 0.021553592753056572, "grad_norm": 0.451171875, "learning_rate": 0.001939468233989198, "loss": 0.2949, "step": 12156 }, { "epoch": 0.021557138918366386, "grad_norm": 0.7109375, "learning_rate": 0.0019394467731044325, "loss": 0.2129, "step": 12158 }, { "epoch": 0.021560685083676204, "grad_norm": 0.69921875, "learning_rate": 0.0019394253085484143, "loss": 0.2023, "step": 12160 }, { "epoch": 0.02156423124898602, "grad_norm": 2.28125, "learning_rate": 0.0019394038403212372, "loss": 0.2987, "step": 12162 }, { "epoch": 0.021567777414295834, "grad_norm": 1.4453125, "learning_rate": 0.0019393823684229954, "loss": 0.2498, "step": 12164 }, { "epoch": 0.021571323579605648, "grad_norm": 0.3515625, "learning_rate": 0.0019393608928537829, "loss": 0.1891, "step": 12166 }, { "epoch": 0.021574869744915463, "grad_norm": 0.53515625, "learning_rate": 0.0019393394136136934, "loss": 0.2225, "step": 12168 }, { "epoch": 0.02157841591022528, "grad_norm": 0.392578125, "learning_rate": 0.0019393179307028207, "loss": 0.1897, "step": 12170 }, { "epoch": 0.021581962075535095, "grad_norm": 0.2099609375, "learning_rate": 0.0019392964441212592, "loss": 0.2067, "step": 12172 }, { "epoch": 0.02158550824084491, "grad_norm": 0.54296875, "learning_rate": 0.001939274953869102, "loss": 0.1903, "step": 12174 }, { "epoch": 0.021589054406154724, "grad_norm": 0.671875, "learning_rate": 0.001939253459946444, "loss": 0.2438, "step": 12176 }, { "epoch": 0.02159260057146454, "grad_norm": 0.6171875, "learning_rate": 0.0019392319623533787, "loss": 0.2248, "step": 12178 }, { "epoch": 0.021596146736774353, "grad_norm": 0.310546875, "learning_rate": 0.0019392104610900006, "loss": 0.1917, "step": 12180 }, { "epoch": 0.02159969290208417, "grad_norm": 0.48828125, "learning_rate": 0.001939188956156403, "loss": 0.1941, "step": 12182 }, { "epoch": 0.021603239067393986, "grad_norm": 1.28125, "learning_rate": 0.0019391674475526808, "loss": 0.252, "step": 12184 }, { "epoch": 0.0216067852327038, "grad_norm": 0.55078125, "learning_rate": 0.0019391459352789274, "loss": 0.4299, "step": 12186 }, { "epoch": 0.021610331398013615, "grad_norm": 0.3828125, "learning_rate": 0.001939124419335237, "loss": 0.2168, "step": 12188 }, { "epoch": 0.02161387756332343, "grad_norm": 0.318359375, "learning_rate": 0.001939102899721704, "loss": 0.2389, "step": 12190 }, { "epoch": 0.021617423728633244, "grad_norm": 0.58984375, "learning_rate": 0.0019390813764384224, "loss": 0.1927, "step": 12192 }, { "epoch": 0.021620969893943062, "grad_norm": 1.375, "learning_rate": 0.001939059849485486, "loss": 0.2345, "step": 12194 }, { "epoch": 0.021624516059252877, "grad_norm": 0.55078125, "learning_rate": 0.0019390383188629897, "loss": 0.1983, "step": 12196 }, { "epoch": 0.02162806222456269, "grad_norm": 0.49609375, "learning_rate": 0.0019390167845710264, "loss": 0.2746, "step": 12198 }, { "epoch": 0.021631608389872506, "grad_norm": 0.49609375, "learning_rate": 0.0019389952466096917, "loss": 0.1553, "step": 12200 }, { "epoch": 0.02163515455518232, "grad_norm": 1.21875, "learning_rate": 0.0019389737049790785, "loss": 0.2208, "step": 12202 }, { "epoch": 0.02163870072049214, "grad_norm": 0.24609375, "learning_rate": 0.0019389521596792822, "loss": 0.1653, "step": 12204 }, { "epoch": 0.021642246885801953, "grad_norm": 0.546875, "learning_rate": 0.0019389306107103963, "loss": 0.3346, "step": 12206 }, { "epoch": 0.021645793051111768, "grad_norm": 0.2451171875, "learning_rate": 0.0019389090580725153, "loss": 0.2473, "step": 12208 }, { "epoch": 0.021649339216421582, "grad_norm": 0.51171875, "learning_rate": 0.0019388875017657332, "loss": 0.2674, "step": 12210 }, { "epoch": 0.021652885381731397, "grad_norm": 0.255859375, "learning_rate": 0.0019388659417901447, "loss": 0.1683, "step": 12212 }, { "epoch": 0.02165643154704121, "grad_norm": 0.40625, "learning_rate": 0.0019388443781458437, "loss": 0.1817, "step": 12214 }, { "epoch": 0.02165997771235103, "grad_norm": 0.37890625, "learning_rate": 0.0019388228108329244, "loss": 0.22, "step": 12216 }, { "epoch": 0.021663523877660844, "grad_norm": 0.6875, "learning_rate": 0.0019388012398514818, "loss": 0.2446, "step": 12218 }, { "epoch": 0.02166707004297066, "grad_norm": 1.5078125, "learning_rate": 0.0019387796652016096, "loss": 0.3187, "step": 12220 }, { "epoch": 0.021670616208280473, "grad_norm": 0.341796875, "learning_rate": 0.0019387580868834022, "loss": 0.2163, "step": 12222 }, { "epoch": 0.021674162373590287, "grad_norm": 0.376953125, "learning_rate": 0.0019387365048969545, "loss": 0.1586, "step": 12224 }, { "epoch": 0.021677708538900102, "grad_norm": 0.421875, "learning_rate": 0.0019387149192423606, "loss": 0.2063, "step": 12226 }, { "epoch": 0.02168125470420992, "grad_norm": 0.48046875, "learning_rate": 0.0019386933299197144, "loss": 0.2062, "step": 12228 }, { "epoch": 0.021684800869519735, "grad_norm": 1.2578125, "learning_rate": 0.001938671736929111, "loss": 0.2413, "step": 12230 }, { "epoch": 0.02168834703482955, "grad_norm": 1.3515625, "learning_rate": 0.0019386501402706442, "loss": 0.2209, "step": 12232 }, { "epoch": 0.021691893200139364, "grad_norm": 0.82421875, "learning_rate": 0.0019386285399444092, "loss": 0.3747, "step": 12234 }, { "epoch": 0.021695439365449178, "grad_norm": 0.6796875, "learning_rate": 0.0019386069359505003, "loss": 0.2395, "step": 12236 }, { "epoch": 0.021698985530758996, "grad_norm": 0.359375, "learning_rate": 0.0019385853282890113, "loss": 0.1763, "step": 12238 }, { "epoch": 0.02170253169606881, "grad_norm": 0.419921875, "learning_rate": 0.0019385637169600375, "loss": 0.3008, "step": 12240 }, { "epoch": 0.021706077861378625, "grad_norm": 1.625, "learning_rate": 0.001938542101963673, "loss": 0.3692, "step": 12242 }, { "epoch": 0.02170962402668844, "grad_norm": 0.4765625, "learning_rate": 0.0019385204833000122, "loss": 0.1671, "step": 12244 }, { "epoch": 0.021713170191998254, "grad_norm": 0.8515625, "learning_rate": 0.00193849886096915, "loss": 0.2634, "step": 12246 }, { "epoch": 0.02171671635730807, "grad_norm": 1.40625, "learning_rate": 0.0019384772349711813, "loss": 0.3233, "step": 12248 }, { "epoch": 0.021720262522617887, "grad_norm": 0.408203125, "learning_rate": 0.0019384556053061996, "loss": 0.2088, "step": 12250 }, { "epoch": 0.0217238086879277, "grad_norm": 0.447265625, "learning_rate": 0.0019384339719743008, "loss": 0.1729, "step": 12252 }, { "epoch": 0.021727354853237516, "grad_norm": 0.462890625, "learning_rate": 0.001938412334975578, "loss": 0.2285, "step": 12254 }, { "epoch": 0.02173090101854733, "grad_norm": 1.1640625, "learning_rate": 0.001938390694310127, "loss": 0.33, "step": 12256 }, { "epoch": 0.021734447183857145, "grad_norm": 3.125, "learning_rate": 0.0019383690499780424, "loss": 0.3129, "step": 12258 }, { "epoch": 0.02173799334916696, "grad_norm": 0.5234375, "learning_rate": 0.0019383474019794183, "loss": 0.3055, "step": 12260 }, { "epoch": 0.021741539514476778, "grad_norm": 1.1640625, "learning_rate": 0.0019383257503143494, "loss": 0.2927, "step": 12262 }, { "epoch": 0.021745085679786592, "grad_norm": 1.71875, "learning_rate": 0.001938304094982931, "loss": 0.3187, "step": 12264 }, { "epoch": 0.021748631845096407, "grad_norm": 2.5, "learning_rate": 0.0019382824359852574, "loss": 0.2836, "step": 12266 }, { "epoch": 0.02175217801040622, "grad_norm": 1.0078125, "learning_rate": 0.0019382607733214232, "loss": 0.2343, "step": 12268 }, { "epoch": 0.021755724175716036, "grad_norm": 3.6875, "learning_rate": 0.0019382391069915233, "loss": 0.4043, "step": 12270 }, { "epoch": 0.021759270341025854, "grad_norm": 0.84765625, "learning_rate": 0.0019382174369956527, "loss": 0.1895, "step": 12272 }, { "epoch": 0.02176281650633567, "grad_norm": 0.41796875, "learning_rate": 0.0019381957633339058, "loss": 0.2292, "step": 12274 }, { "epoch": 0.021766362671645483, "grad_norm": 0.61328125, "learning_rate": 0.0019381740860063773, "loss": 0.2785, "step": 12276 }, { "epoch": 0.021769908836955298, "grad_norm": 0.341796875, "learning_rate": 0.0019381524050131622, "loss": 0.2036, "step": 12278 }, { "epoch": 0.021773455002265112, "grad_norm": 1.265625, "learning_rate": 0.0019381307203543557, "loss": 0.2037, "step": 12280 }, { "epoch": 0.021777001167574927, "grad_norm": 0.5234375, "learning_rate": 0.0019381090320300521, "loss": 0.2789, "step": 12282 }, { "epoch": 0.021780547332884745, "grad_norm": 0.474609375, "learning_rate": 0.0019380873400403466, "loss": 0.2316, "step": 12284 }, { "epoch": 0.02178409349819456, "grad_norm": 1.2734375, "learning_rate": 0.0019380656443853336, "loss": 0.336, "step": 12286 }, { "epoch": 0.021787639663504374, "grad_norm": 0.58203125, "learning_rate": 0.0019380439450651082, "loss": 0.2047, "step": 12288 }, { "epoch": 0.02179118582881419, "grad_norm": 0.70703125, "learning_rate": 0.0019380222420797655, "loss": 0.2368, "step": 12290 }, { "epoch": 0.021794731994124003, "grad_norm": 2.421875, "learning_rate": 0.0019380005354294003, "loss": 0.5012, "step": 12292 }, { "epoch": 0.021798278159433818, "grad_norm": 0.69921875, "learning_rate": 0.0019379788251141078, "loss": 0.2229, "step": 12294 }, { "epoch": 0.021801824324743636, "grad_norm": 1.8984375, "learning_rate": 0.0019379571111339824, "loss": 0.2811, "step": 12296 }, { "epoch": 0.02180537049005345, "grad_norm": 0.84375, "learning_rate": 0.0019379353934891193, "loss": 0.2322, "step": 12298 }, { "epoch": 0.021808916655363265, "grad_norm": 0.62890625, "learning_rate": 0.0019379136721796137, "loss": 0.2054, "step": 12300 }, { "epoch": 0.02181246282067308, "grad_norm": 0.4296875, "learning_rate": 0.0019378919472055605, "loss": 0.2031, "step": 12302 }, { "epoch": 0.021816008985982894, "grad_norm": 0.490234375, "learning_rate": 0.0019378702185670542, "loss": 0.3056, "step": 12304 }, { "epoch": 0.021819555151292712, "grad_norm": 0.46484375, "learning_rate": 0.0019378484862641909, "loss": 0.2262, "step": 12306 }, { "epoch": 0.021823101316602526, "grad_norm": 0.45703125, "learning_rate": 0.0019378267502970644, "loss": 0.2083, "step": 12308 }, { "epoch": 0.02182664748191234, "grad_norm": 0.6015625, "learning_rate": 0.0019378050106657705, "loss": 0.2274, "step": 12310 }, { "epoch": 0.021830193647222156, "grad_norm": 0.8046875, "learning_rate": 0.0019377832673704042, "loss": 0.2745, "step": 12312 }, { "epoch": 0.02183373981253197, "grad_norm": 0.44140625, "learning_rate": 0.0019377615204110605, "loss": 0.2309, "step": 12314 }, { "epoch": 0.021837285977841785, "grad_norm": 0.337890625, "learning_rate": 0.0019377397697878346, "loss": 0.253, "step": 12316 }, { "epoch": 0.021840832143151603, "grad_norm": 1.828125, "learning_rate": 0.0019377180155008217, "loss": 0.2683, "step": 12318 }, { "epoch": 0.021844378308461417, "grad_norm": 0.7421875, "learning_rate": 0.0019376962575501165, "loss": 0.1883, "step": 12320 }, { "epoch": 0.021847924473771232, "grad_norm": 0.6484375, "learning_rate": 0.0019376744959358147, "loss": 0.2341, "step": 12322 }, { "epoch": 0.021851470639081046, "grad_norm": 0.51171875, "learning_rate": 0.0019376527306580109, "loss": 0.2922, "step": 12324 }, { "epoch": 0.02185501680439086, "grad_norm": 1.3671875, "learning_rate": 0.0019376309617168011, "loss": 0.2842, "step": 12326 }, { "epoch": 0.021858562969700675, "grad_norm": 0.33203125, "learning_rate": 0.0019376091891122795, "loss": 0.2821, "step": 12328 }, { "epoch": 0.021862109135010493, "grad_norm": 0.466796875, "learning_rate": 0.0019375874128445424, "loss": 0.1806, "step": 12330 }, { "epoch": 0.021865655300320308, "grad_norm": 0.72265625, "learning_rate": 0.0019375656329136842, "loss": 0.2354, "step": 12332 }, { "epoch": 0.021869201465630123, "grad_norm": 0.49609375, "learning_rate": 0.0019375438493198006, "loss": 0.2346, "step": 12334 }, { "epoch": 0.021872747630939937, "grad_norm": 0.447265625, "learning_rate": 0.0019375220620629864, "loss": 0.1906, "step": 12336 }, { "epoch": 0.02187629379624975, "grad_norm": 1.6328125, "learning_rate": 0.0019375002711433374, "loss": 0.2535, "step": 12338 }, { "epoch": 0.02187983996155957, "grad_norm": 3.984375, "learning_rate": 0.0019374784765609488, "loss": 0.3205, "step": 12340 }, { "epoch": 0.021883386126869384, "grad_norm": 0.51953125, "learning_rate": 0.0019374566783159155, "loss": 0.2011, "step": 12342 }, { "epoch": 0.0218869322921792, "grad_norm": 0.2734375, "learning_rate": 0.0019374348764083333, "loss": 0.209, "step": 12344 }, { "epoch": 0.021890478457489013, "grad_norm": 0.7421875, "learning_rate": 0.0019374130708382975, "loss": 0.2034, "step": 12346 }, { "epoch": 0.021894024622798828, "grad_norm": 0.3828125, "learning_rate": 0.0019373912616059033, "loss": 0.2628, "step": 12348 }, { "epoch": 0.021897570788108642, "grad_norm": 0.56640625, "learning_rate": 0.0019373694487112459, "loss": 0.3466, "step": 12350 }, { "epoch": 0.02190111695341846, "grad_norm": 1.1015625, "learning_rate": 0.0019373476321544213, "loss": 0.2858, "step": 12352 }, { "epoch": 0.021904663118728275, "grad_norm": 0.515625, "learning_rate": 0.0019373258119355244, "loss": 0.251, "step": 12354 }, { "epoch": 0.02190820928403809, "grad_norm": 0.361328125, "learning_rate": 0.0019373039880546505, "loss": 0.2294, "step": 12356 }, { "epoch": 0.021911755449347904, "grad_norm": 0.44921875, "learning_rate": 0.0019372821605118957, "loss": 0.1893, "step": 12358 }, { "epoch": 0.02191530161465772, "grad_norm": 0.3203125, "learning_rate": 0.001937260329307355, "loss": 0.2073, "step": 12360 }, { "epoch": 0.021918847779967533, "grad_norm": 3.015625, "learning_rate": 0.0019372384944411239, "loss": 0.2401, "step": 12362 }, { "epoch": 0.02192239394527735, "grad_norm": 0.375, "learning_rate": 0.001937216655913298, "loss": 0.1999, "step": 12364 }, { "epoch": 0.021925940110587166, "grad_norm": 0.416015625, "learning_rate": 0.0019371948137239725, "loss": 0.1976, "step": 12366 }, { "epoch": 0.02192948627589698, "grad_norm": 0.396484375, "learning_rate": 0.0019371729678732436, "loss": 0.2518, "step": 12368 }, { "epoch": 0.021933032441206795, "grad_norm": 1.2421875, "learning_rate": 0.0019371511183612063, "loss": 0.1864, "step": 12370 }, { "epoch": 0.02193657860651661, "grad_norm": 0.53125, "learning_rate": 0.0019371292651879561, "loss": 0.2521, "step": 12372 }, { "epoch": 0.021940124771826428, "grad_norm": 0.439453125, "learning_rate": 0.0019371074083535892, "loss": 0.203, "step": 12374 }, { "epoch": 0.021943670937136242, "grad_norm": 1.8046875, "learning_rate": 0.0019370855478582, "loss": 0.3093, "step": 12376 }, { "epoch": 0.021947217102446057, "grad_norm": 0.421875, "learning_rate": 0.0019370636837018854, "loss": 0.2231, "step": 12378 }, { "epoch": 0.02195076326775587, "grad_norm": 0.306640625, "learning_rate": 0.0019370418158847402, "loss": 0.2516, "step": 12380 }, { "epoch": 0.021954309433065686, "grad_norm": 0.515625, "learning_rate": 0.0019370199444068606, "loss": 0.197, "step": 12382 }, { "epoch": 0.0219578555983755, "grad_norm": 0.84765625, "learning_rate": 0.0019369980692683418, "loss": 0.1843, "step": 12384 }, { "epoch": 0.02196140176368532, "grad_norm": 0.5546875, "learning_rate": 0.0019369761904692794, "loss": 0.2833, "step": 12386 }, { "epoch": 0.021964947928995133, "grad_norm": 0.51171875, "learning_rate": 0.0019369543080097699, "loss": 0.2317, "step": 12388 }, { "epoch": 0.021968494094304947, "grad_norm": 1.7890625, "learning_rate": 0.0019369324218899081, "loss": 0.2449, "step": 12390 }, { "epoch": 0.021972040259614762, "grad_norm": 0.330078125, "learning_rate": 0.0019369105321097896, "loss": 0.2416, "step": 12392 }, { "epoch": 0.021975586424924577, "grad_norm": 0.94140625, "learning_rate": 0.001936888638669511, "loss": 0.2719, "step": 12394 }, { "epoch": 0.02197913259023439, "grad_norm": 0.65625, "learning_rate": 0.0019368667415691678, "loss": 0.1933, "step": 12396 }, { "epoch": 0.02198267875554421, "grad_norm": 0.7421875, "learning_rate": 0.0019368448408088553, "loss": 0.1717, "step": 12398 }, { "epoch": 0.021986224920854024, "grad_norm": 1.25, "learning_rate": 0.0019368229363886699, "loss": 0.2045, "step": 12400 }, { "epoch": 0.021989771086163838, "grad_norm": 1.2734375, "learning_rate": 0.0019368010283087067, "loss": 0.2901, "step": 12402 }, { "epoch": 0.021993317251473653, "grad_norm": 0.384765625, "learning_rate": 0.001936779116569062, "loss": 0.1717, "step": 12404 }, { "epoch": 0.021996863416783467, "grad_norm": 0.83984375, "learning_rate": 0.0019367572011698313, "loss": 0.2326, "step": 12406 }, { "epoch": 0.022000409582093285, "grad_norm": 0.333984375, "learning_rate": 0.001936735282111111, "loss": 0.2094, "step": 12408 }, { "epoch": 0.0220039557474031, "grad_norm": 0.373046875, "learning_rate": 0.0019367133593929963, "loss": 0.1992, "step": 12410 }, { "epoch": 0.022007501912712914, "grad_norm": 0.81640625, "learning_rate": 0.0019366914330155836, "loss": 0.2278, "step": 12412 }, { "epoch": 0.02201104807802273, "grad_norm": 1.390625, "learning_rate": 0.0019366695029789684, "loss": 0.226, "step": 12414 }, { "epoch": 0.022014594243332544, "grad_norm": 0.6640625, "learning_rate": 0.001936647569283247, "loss": 0.2568, "step": 12416 }, { "epoch": 0.022018140408642358, "grad_norm": 0.39453125, "learning_rate": 0.001936625631928515, "loss": 0.2123, "step": 12418 }, { "epoch": 0.022021686573952176, "grad_norm": 0.54296875, "learning_rate": 0.0019366036909148686, "loss": 0.2135, "step": 12420 }, { "epoch": 0.02202523273926199, "grad_norm": 0.400390625, "learning_rate": 0.0019365817462424033, "loss": 0.2936, "step": 12422 }, { "epoch": 0.022028778904571805, "grad_norm": 0.36328125, "learning_rate": 0.0019365597979112154, "loss": 0.2172, "step": 12424 }, { "epoch": 0.02203232506988162, "grad_norm": 2.75, "learning_rate": 0.0019365378459214014, "loss": 0.279, "step": 12426 }, { "epoch": 0.022035871235191434, "grad_norm": 0.396484375, "learning_rate": 0.0019365158902730562, "loss": 0.287, "step": 12428 }, { "epoch": 0.02203941740050125, "grad_norm": 0.333984375, "learning_rate": 0.0019364939309662768, "loss": 0.1919, "step": 12430 }, { "epoch": 0.022042963565811067, "grad_norm": 0.5390625, "learning_rate": 0.0019364719680011586, "loss": 0.3153, "step": 12432 }, { "epoch": 0.02204650973112088, "grad_norm": 2.5, "learning_rate": 0.0019364500013777982, "loss": 0.2378, "step": 12434 }, { "epoch": 0.022050055896430696, "grad_norm": 1.875, "learning_rate": 0.0019364280310962907, "loss": 0.268, "step": 12436 }, { "epoch": 0.02205360206174051, "grad_norm": 1.265625, "learning_rate": 0.0019364060571567334, "loss": 0.4786, "step": 12438 }, { "epoch": 0.022057148227050325, "grad_norm": 0.44921875, "learning_rate": 0.0019363840795592218, "loss": 0.243, "step": 12440 }, { "epoch": 0.022060694392360143, "grad_norm": 0.197265625, "learning_rate": 0.0019363620983038519, "loss": 0.1825, "step": 12442 }, { "epoch": 0.022064240557669958, "grad_norm": 1.0234375, "learning_rate": 0.0019363401133907204, "loss": 0.2716, "step": 12444 }, { "epoch": 0.022067786722979772, "grad_norm": 0.57421875, "learning_rate": 0.0019363181248199224, "loss": 0.2539, "step": 12446 }, { "epoch": 0.022071332888289587, "grad_norm": 0.380859375, "learning_rate": 0.001936296132591555, "loss": 0.5329, "step": 12448 }, { "epoch": 0.0220748790535994, "grad_norm": 1.9296875, "learning_rate": 0.0019362741367057142, "loss": 0.3482, "step": 12450 }, { "epoch": 0.022078425218909216, "grad_norm": 0.451171875, "learning_rate": 0.0019362521371624958, "loss": 0.2587, "step": 12452 }, { "epoch": 0.022081971384219034, "grad_norm": 0.578125, "learning_rate": 0.0019362301339619968, "loss": 0.2146, "step": 12454 }, { "epoch": 0.02208551754952885, "grad_norm": 1.59375, "learning_rate": 0.0019362081271043126, "loss": 0.2371, "step": 12456 }, { "epoch": 0.022089063714838663, "grad_norm": 0.69921875, "learning_rate": 0.0019361861165895398, "loss": 0.186, "step": 12458 }, { "epoch": 0.022092609880148478, "grad_norm": 0.34375, "learning_rate": 0.0019361641024177748, "loss": 0.167, "step": 12460 }, { "epoch": 0.022096156045458292, "grad_norm": 0.56640625, "learning_rate": 0.0019361420845891136, "loss": 0.2413, "step": 12462 }, { "epoch": 0.022099702210768107, "grad_norm": 0.66796875, "learning_rate": 0.0019361200631036527, "loss": 0.2264, "step": 12464 }, { "epoch": 0.022103248376077925, "grad_norm": 0.326171875, "learning_rate": 0.0019360980379614884, "loss": 0.2081, "step": 12466 }, { "epoch": 0.02210679454138774, "grad_norm": 0.373046875, "learning_rate": 0.0019360760091627165, "loss": 0.2218, "step": 12468 }, { "epoch": 0.022110340706697554, "grad_norm": 0.21875, "learning_rate": 0.0019360539767074342, "loss": 0.1746, "step": 12470 }, { "epoch": 0.02211388687200737, "grad_norm": 0.7890625, "learning_rate": 0.001936031940595737, "loss": 0.2122, "step": 12472 }, { "epoch": 0.022117433037317183, "grad_norm": 0.77734375, "learning_rate": 0.0019360099008277221, "loss": 0.2285, "step": 12474 }, { "epoch": 0.022120979202627, "grad_norm": 1.3125, "learning_rate": 0.0019359878574034856, "loss": 0.3159, "step": 12476 }, { "epoch": 0.022124525367936815, "grad_norm": 0.4921875, "learning_rate": 0.0019359658103231237, "loss": 0.2407, "step": 12478 }, { "epoch": 0.02212807153324663, "grad_norm": 0.365234375, "learning_rate": 0.0019359437595867328, "loss": 0.21, "step": 12480 }, { "epoch": 0.022131617698556445, "grad_norm": 1.2734375, "learning_rate": 0.0019359217051944095, "loss": 0.2898, "step": 12482 }, { "epoch": 0.02213516386386626, "grad_norm": 1.25, "learning_rate": 0.0019358996471462505, "loss": 0.4589, "step": 12484 }, { "epoch": 0.022138710029176074, "grad_norm": 0.294921875, "learning_rate": 0.0019358775854423516, "loss": 0.1939, "step": 12486 }, { "epoch": 0.02214225619448589, "grad_norm": 2.765625, "learning_rate": 0.00193585552008281, "loss": 0.3736, "step": 12488 }, { "epoch": 0.022145802359795706, "grad_norm": 0.357421875, "learning_rate": 0.0019358334510677217, "loss": 0.272, "step": 12490 }, { "epoch": 0.02214934852510552, "grad_norm": 0.478515625, "learning_rate": 0.0019358113783971831, "loss": 0.2606, "step": 12492 }, { "epoch": 0.022152894690415335, "grad_norm": 3.109375, "learning_rate": 0.0019357893020712914, "loss": 0.4371, "step": 12494 }, { "epoch": 0.02215644085572515, "grad_norm": 0.267578125, "learning_rate": 0.0019357672220901428, "loss": 0.2283, "step": 12496 }, { "epoch": 0.022159987021034964, "grad_norm": 0.2578125, "learning_rate": 0.001935745138453834, "loss": 0.2839, "step": 12498 }, { "epoch": 0.022163533186344783, "grad_norm": 1.4140625, "learning_rate": 0.0019357230511624609, "loss": 0.3252, "step": 12500 }, { "epoch": 0.022167079351654597, "grad_norm": 0.35546875, "learning_rate": 0.001935700960216121, "loss": 0.2162, "step": 12502 }, { "epoch": 0.02217062551696441, "grad_norm": 0.54296875, "learning_rate": 0.0019356788656149104, "loss": 0.2722, "step": 12504 }, { "epoch": 0.022174171682274226, "grad_norm": 0.34375, "learning_rate": 0.0019356567673589258, "loss": 0.2602, "step": 12506 }, { "epoch": 0.02217771784758404, "grad_norm": 1.5703125, "learning_rate": 0.001935634665448264, "loss": 0.3855, "step": 12508 }, { "epoch": 0.02218126401289386, "grad_norm": 0.26953125, "learning_rate": 0.0019356125598830216, "loss": 0.2396, "step": 12510 }, { "epoch": 0.022184810178203673, "grad_norm": 0.51953125, "learning_rate": 0.0019355904506632954, "loss": 0.2376, "step": 12512 }, { "epoch": 0.022188356343513488, "grad_norm": 0.91015625, "learning_rate": 0.0019355683377891815, "loss": 0.3056, "step": 12514 }, { "epoch": 0.022191902508823302, "grad_norm": 1.28125, "learning_rate": 0.0019355462212607775, "loss": 0.2295, "step": 12516 }, { "epoch": 0.022195448674133117, "grad_norm": 0.296875, "learning_rate": 0.0019355241010781793, "loss": 0.2017, "step": 12518 }, { "epoch": 0.02219899483944293, "grad_norm": 0.2333984375, "learning_rate": 0.0019355019772414844, "loss": 0.225, "step": 12520 }, { "epoch": 0.02220254100475275, "grad_norm": 0.31640625, "learning_rate": 0.0019354798497507888, "loss": 0.214, "step": 12522 }, { "epoch": 0.022206087170062564, "grad_norm": 1.1796875, "learning_rate": 0.00193545771860619, "loss": 0.3216, "step": 12524 }, { "epoch": 0.02220963333537238, "grad_norm": 1.296875, "learning_rate": 0.001935435583807784, "loss": 0.2132, "step": 12526 }, { "epoch": 0.022213179500682193, "grad_norm": 0.3984375, "learning_rate": 0.0019354134453556681, "loss": 0.2195, "step": 12528 }, { "epoch": 0.022216725665992008, "grad_norm": 0.212890625, "learning_rate": 0.0019353913032499393, "loss": 0.197, "step": 12530 }, { "epoch": 0.022220271831301822, "grad_norm": 0.30859375, "learning_rate": 0.0019353691574906942, "loss": 0.3062, "step": 12532 }, { "epoch": 0.02222381799661164, "grad_norm": 0.39453125, "learning_rate": 0.0019353470080780296, "loss": 0.3812, "step": 12534 }, { "epoch": 0.022227364161921455, "grad_norm": 0.6171875, "learning_rate": 0.0019353248550120423, "loss": 0.4081, "step": 12536 }, { "epoch": 0.02223091032723127, "grad_norm": 0.47265625, "learning_rate": 0.0019353026982928294, "loss": 0.214, "step": 12538 }, { "epoch": 0.022234456492541084, "grad_norm": 0.287109375, "learning_rate": 0.0019352805379204876, "loss": 0.2167, "step": 12540 }, { "epoch": 0.0222380026578509, "grad_norm": 3.171875, "learning_rate": 0.001935258373895114, "loss": 0.2288, "step": 12542 }, { "epoch": 0.022241548823160717, "grad_norm": 1.1015625, "learning_rate": 0.0019352362062168055, "loss": 0.2162, "step": 12544 }, { "epoch": 0.02224509498847053, "grad_norm": 1.0234375, "learning_rate": 0.0019352140348856588, "loss": 0.2131, "step": 12546 }, { "epoch": 0.022248641153780346, "grad_norm": 0.396484375, "learning_rate": 0.0019351918599017713, "loss": 0.1651, "step": 12548 }, { "epoch": 0.02225218731909016, "grad_norm": 1.140625, "learning_rate": 0.0019351696812652396, "loss": 0.2031, "step": 12550 }, { "epoch": 0.022255733484399975, "grad_norm": 1.0, "learning_rate": 0.0019351474989761609, "loss": 0.2361, "step": 12552 }, { "epoch": 0.02225927964970979, "grad_norm": 0.734375, "learning_rate": 0.001935125313034632, "loss": 0.2252, "step": 12554 }, { "epoch": 0.022262825815019607, "grad_norm": 0.50390625, "learning_rate": 0.0019351031234407501, "loss": 0.2152, "step": 12556 }, { "epoch": 0.022266371980329422, "grad_norm": 0.28125, "learning_rate": 0.0019350809301946126, "loss": 0.2363, "step": 12558 }, { "epoch": 0.022269918145639236, "grad_norm": 0.796875, "learning_rate": 0.001935058733296316, "loss": 0.2747, "step": 12560 }, { "epoch": 0.02227346431094905, "grad_norm": 1.0859375, "learning_rate": 0.0019350365327459572, "loss": 0.2693, "step": 12562 }, { "epoch": 0.022277010476258866, "grad_norm": 0.384765625, "learning_rate": 0.001935014328543634, "loss": 0.2107, "step": 12564 }, { "epoch": 0.02228055664156868, "grad_norm": 0.515625, "learning_rate": 0.001934992120689443, "loss": 0.2243, "step": 12566 }, { "epoch": 0.022284102806878498, "grad_norm": 0.373046875, "learning_rate": 0.001934969909183482, "loss": 0.1968, "step": 12568 }, { "epoch": 0.022287648972188313, "grad_norm": 1.4140625, "learning_rate": 0.0019349476940258468, "loss": 0.2083, "step": 12570 }, { "epoch": 0.022291195137498127, "grad_norm": 0.5234375, "learning_rate": 0.0019349254752166359, "loss": 0.2048, "step": 12572 }, { "epoch": 0.022294741302807942, "grad_norm": 0.310546875, "learning_rate": 0.0019349032527559457, "loss": 0.2374, "step": 12574 }, { "epoch": 0.022298287468117756, "grad_norm": 0.33203125, "learning_rate": 0.0019348810266438738, "loss": 0.2298, "step": 12576 }, { "epoch": 0.022301833633427574, "grad_norm": 0.2080078125, "learning_rate": 0.0019348587968805173, "loss": 0.2126, "step": 12578 }, { "epoch": 0.02230537979873739, "grad_norm": 0.265625, "learning_rate": 0.001934836563465973, "loss": 0.2324, "step": 12580 }, { "epoch": 0.022308925964047203, "grad_norm": 0.1689453125, "learning_rate": 0.001934814326400339, "loss": 0.1943, "step": 12582 }, { "epoch": 0.022312472129357018, "grad_norm": 0.439453125, "learning_rate": 0.0019347920856837117, "loss": 0.1817, "step": 12584 }, { "epoch": 0.022316018294666833, "grad_norm": 0.78125, "learning_rate": 0.001934769841316189, "loss": 0.1601, "step": 12586 }, { "epoch": 0.022319564459976647, "grad_norm": 1.4765625, "learning_rate": 0.0019347475932978678, "loss": 0.2859, "step": 12588 }, { "epoch": 0.022323110625286465, "grad_norm": 0.5546875, "learning_rate": 0.0019347253416288452, "loss": 0.2455, "step": 12590 }, { "epoch": 0.02232665679059628, "grad_norm": 0.75390625, "learning_rate": 0.0019347030863092192, "loss": 0.3191, "step": 12592 }, { "epoch": 0.022330202955906094, "grad_norm": 3.296875, "learning_rate": 0.0019346808273390866, "loss": 0.4205, "step": 12594 }, { "epoch": 0.02233374912121591, "grad_norm": 0.55859375, "learning_rate": 0.001934658564718545, "loss": 0.2514, "step": 12596 }, { "epoch": 0.022337295286525723, "grad_norm": 0.53125, "learning_rate": 0.0019346362984476917, "loss": 0.2415, "step": 12598 }, { "epoch": 0.022340841451835538, "grad_norm": 0.62890625, "learning_rate": 0.001934614028526624, "loss": 0.4898, "step": 12600 }, { "epoch": 0.022344387617145356, "grad_norm": 0.2890625, "learning_rate": 0.0019345917549554394, "loss": 0.2184, "step": 12602 }, { "epoch": 0.02234793378245517, "grad_norm": 0.4140625, "learning_rate": 0.0019345694777342351, "loss": 0.2711, "step": 12604 }, { "epoch": 0.022351479947764985, "grad_norm": 0.357421875, "learning_rate": 0.0019345471968631087, "loss": 0.2013, "step": 12606 }, { "epoch": 0.0223550261130748, "grad_norm": 2.265625, "learning_rate": 0.0019345249123421575, "loss": 0.1935, "step": 12608 }, { "epoch": 0.022358572278384614, "grad_norm": 0.33203125, "learning_rate": 0.0019345026241714793, "loss": 0.2628, "step": 12610 }, { "epoch": 0.022362118443694432, "grad_norm": 0.80078125, "learning_rate": 0.0019344803323511715, "loss": 0.2409, "step": 12612 }, { "epoch": 0.022365664609004247, "grad_norm": 0.86328125, "learning_rate": 0.0019344580368813311, "loss": 0.3114, "step": 12614 }, { "epoch": 0.02236921077431406, "grad_norm": 0.27734375, "learning_rate": 0.001934435737762056, "loss": 0.2277, "step": 12616 }, { "epoch": 0.022372756939623876, "grad_norm": 0.1689453125, "learning_rate": 0.0019344134349934439, "loss": 0.2605, "step": 12618 }, { "epoch": 0.02237630310493369, "grad_norm": 0.95703125, "learning_rate": 0.001934391128575592, "loss": 0.3065, "step": 12620 }, { "epoch": 0.022379849270243505, "grad_norm": 1.375, "learning_rate": 0.0019343688185085978, "loss": 0.1976, "step": 12622 }, { "epoch": 0.022383395435553323, "grad_norm": 0.90234375, "learning_rate": 0.0019343465047925593, "loss": 0.2209, "step": 12624 }, { "epoch": 0.022386941600863138, "grad_norm": 0.28515625, "learning_rate": 0.0019343241874275737, "loss": 0.1795, "step": 12626 }, { "epoch": 0.022390487766172952, "grad_norm": 0.435546875, "learning_rate": 0.0019343018664137387, "loss": 0.235, "step": 12628 }, { "epoch": 0.022394033931482767, "grad_norm": 0.97265625, "learning_rate": 0.0019342795417511523, "loss": 0.1741, "step": 12630 }, { "epoch": 0.02239758009679258, "grad_norm": 0.376953125, "learning_rate": 0.0019342572134399114, "loss": 0.2232, "step": 12632 }, { "epoch": 0.022401126262102396, "grad_norm": 0.6171875, "learning_rate": 0.0019342348814801143, "loss": 0.2044, "step": 12634 }, { "epoch": 0.022404672427412214, "grad_norm": 0.83203125, "learning_rate": 0.0019342125458718584, "loss": 0.307, "step": 12636 }, { "epoch": 0.02240821859272203, "grad_norm": 2.875, "learning_rate": 0.001934190206615241, "loss": 0.2746, "step": 12638 }, { "epoch": 0.022411764758031843, "grad_norm": 0.70703125, "learning_rate": 0.0019341678637103605, "loss": 0.2264, "step": 12640 }, { "epoch": 0.022415310923341657, "grad_norm": 0.400390625, "learning_rate": 0.0019341455171573143, "loss": 0.2504, "step": 12642 }, { "epoch": 0.022418857088651472, "grad_norm": 0.88671875, "learning_rate": 0.0019341231669562002, "loss": 0.2102, "step": 12644 }, { "epoch": 0.02242240325396129, "grad_norm": 0.60546875, "learning_rate": 0.0019341008131071162, "loss": 0.1765, "step": 12646 }, { "epoch": 0.022425949419271105, "grad_norm": 0.9296875, "learning_rate": 0.0019340784556101589, "loss": 0.251, "step": 12648 }, { "epoch": 0.02242949558458092, "grad_norm": 1.015625, "learning_rate": 0.0019340560944654274, "loss": 0.2228, "step": 12650 }, { "epoch": 0.022433041749890734, "grad_norm": 0.58203125, "learning_rate": 0.001934033729673019, "loss": 0.2078, "step": 12652 }, { "epoch": 0.022436587915200548, "grad_norm": 0.75, "learning_rate": 0.0019340113612330316, "loss": 0.3198, "step": 12654 }, { "epoch": 0.022440134080510363, "grad_norm": 0.57421875, "learning_rate": 0.0019339889891455632, "loss": 0.2347, "step": 12656 }, { "epoch": 0.02244368024582018, "grad_norm": 0.365234375, "learning_rate": 0.001933966613410711, "loss": 0.245, "step": 12658 }, { "epoch": 0.022447226411129995, "grad_norm": 0.373046875, "learning_rate": 0.0019339442340285734, "loss": 0.3191, "step": 12660 }, { "epoch": 0.02245077257643981, "grad_norm": 0.32421875, "learning_rate": 0.0019339218509992478, "loss": 0.1804, "step": 12662 }, { "epoch": 0.022454318741749624, "grad_norm": 0.7421875, "learning_rate": 0.001933899464322833, "loss": 0.3906, "step": 12664 }, { "epoch": 0.02245786490705944, "grad_norm": 0.2236328125, "learning_rate": 0.0019338770739994258, "loss": 0.2101, "step": 12666 }, { "epoch": 0.022461411072369254, "grad_norm": 0.328125, "learning_rate": 0.001933854680029125, "loss": 0.2814, "step": 12668 }, { "epoch": 0.02246495723767907, "grad_norm": 0.45703125, "learning_rate": 0.001933832282412028, "loss": 0.2235, "step": 12670 }, { "epoch": 0.022468503402988886, "grad_norm": 0.54296875, "learning_rate": 0.001933809881148233, "loss": 0.2009, "step": 12672 }, { "epoch": 0.0224720495682987, "grad_norm": 0.5078125, "learning_rate": 0.0019337874762378378, "loss": 0.1984, "step": 12674 }, { "epoch": 0.022475595733608515, "grad_norm": 0.609375, "learning_rate": 0.0019337650676809408, "loss": 0.2714, "step": 12676 }, { "epoch": 0.02247914189891833, "grad_norm": 1.03125, "learning_rate": 0.0019337426554776396, "loss": 0.1851, "step": 12678 }, { "epoch": 0.022482688064228148, "grad_norm": 0.263671875, "learning_rate": 0.001933720239628032, "loss": 0.1721, "step": 12680 }, { "epoch": 0.022486234229537962, "grad_norm": 0.4375, "learning_rate": 0.0019336978201322168, "loss": 0.232, "step": 12682 }, { "epoch": 0.022489780394847777, "grad_norm": 0.396484375, "learning_rate": 0.0019336753969902915, "loss": 0.1737, "step": 12684 }, { "epoch": 0.02249332656015759, "grad_norm": 0.6953125, "learning_rate": 0.001933652970202354, "loss": 0.3207, "step": 12686 }, { "epoch": 0.022496872725467406, "grad_norm": 1.1484375, "learning_rate": 0.001933630539768503, "loss": 0.2332, "step": 12688 }, { "epoch": 0.02250041889077722, "grad_norm": 0.40234375, "learning_rate": 0.001933608105688836, "loss": 0.2303, "step": 12690 }, { "epoch": 0.02250396505608704, "grad_norm": 0.75, "learning_rate": 0.0019335856679634515, "loss": 0.1993, "step": 12692 }, { "epoch": 0.022507511221396853, "grad_norm": 0.7109375, "learning_rate": 0.0019335632265924474, "loss": 0.2327, "step": 12694 }, { "epoch": 0.022511057386706668, "grad_norm": 0.77734375, "learning_rate": 0.0019335407815759221, "loss": 0.2747, "step": 12696 }, { "epoch": 0.022514603552016482, "grad_norm": 0.6796875, "learning_rate": 0.0019335183329139735, "loss": 0.2459, "step": 12698 }, { "epoch": 0.022518149717326297, "grad_norm": 1.0703125, "learning_rate": 0.0019334958806067, "loss": 0.2798, "step": 12700 }, { "epoch": 0.02252169588263611, "grad_norm": 0.890625, "learning_rate": 0.0019334734246541992, "loss": 0.1991, "step": 12702 }, { "epoch": 0.02252524204794593, "grad_norm": 0.51171875, "learning_rate": 0.0019334509650565702, "loss": 0.24, "step": 12704 }, { "epoch": 0.022528788213255744, "grad_norm": 2.109375, "learning_rate": 0.0019334285018139108, "loss": 0.2288, "step": 12706 }, { "epoch": 0.02253233437856556, "grad_norm": 0.51171875, "learning_rate": 0.001933406034926319, "loss": 0.2194, "step": 12708 }, { "epoch": 0.022535880543875373, "grad_norm": 2.53125, "learning_rate": 0.0019333835643938937, "loss": 0.2483, "step": 12710 }, { "epoch": 0.022539426709185188, "grad_norm": 0.62109375, "learning_rate": 0.0019333610902167325, "loss": 0.2336, "step": 12712 }, { "epoch": 0.022542972874495006, "grad_norm": 1.3046875, "learning_rate": 0.001933338612394934, "loss": 0.2901, "step": 12714 }, { "epoch": 0.02254651903980482, "grad_norm": 0.640625, "learning_rate": 0.0019333161309285966, "loss": 0.268, "step": 12716 }, { "epoch": 0.022550065205114635, "grad_norm": 0.466796875, "learning_rate": 0.001933293645817818, "loss": 0.2504, "step": 12718 }, { "epoch": 0.02255361137042445, "grad_norm": 0.490234375, "learning_rate": 0.0019332711570626973, "loss": 0.1955, "step": 12720 }, { "epoch": 0.022557157535734264, "grad_norm": 0.322265625, "learning_rate": 0.0019332486646633327, "loss": 0.1811, "step": 12722 }, { "epoch": 0.02256070370104408, "grad_norm": 0.85546875, "learning_rate": 0.0019332261686198224, "loss": 0.3394, "step": 12724 }, { "epoch": 0.022564249866353896, "grad_norm": 0.58203125, "learning_rate": 0.001933203668932265, "loss": 0.2198, "step": 12726 }, { "epoch": 0.02256779603166371, "grad_norm": 0.96484375, "learning_rate": 0.0019331811656007588, "loss": 0.2544, "step": 12728 }, { "epoch": 0.022571342196973525, "grad_norm": 0.8515625, "learning_rate": 0.0019331586586254018, "loss": 0.2869, "step": 12730 }, { "epoch": 0.02257488836228334, "grad_norm": 2.03125, "learning_rate": 0.0019331361480062928, "loss": 0.1893, "step": 12732 }, { "epoch": 0.022578434527593155, "grad_norm": 0.8671875, "learning_rate": 0.0019331136337435305, "loss": 0.2712, "step": 12734 }, { "epoch": 0.02258198069290297, "grad_norm": 1.234375, "learning_rate": 0.001933091115837213, "loss": 0.2521, "step": 12736 }, { "epoch": 0.022585526858212787, "grad_norm": 0.7578125, "learning_rate": 0.001933068594287439, "loss": 0.1989, "step": 12738 }, { "epoch": 0.0225890730235226, "grad_norm": 0.2216796875, "learning_rate": 0.0019330460690943066, "loss": 0.3316, "step": 12740 }, { "epoch": 0.022592619188832416, "grad_norm": 0.52734375, "learning_rate": 0.0019330235402579147, "loss": 0.2238, "step": 12742 }, { "epoch": 0.02259616535414223, "grad_norm": 0.890625, "learning_rate": 0.0019330010077783615, "loss": 0.2815, "step": 12744 }, { "epoch": 0.022599711519452045, "grad_norm": 6.75, "learning_rate": 0.0019329784716557458, "loss": 0.392, "step": 12746 }, { "epoch": 0.022603257684761863, "grad_norm": 0.546875, "learning_rate": 0.0019329559318901665, "loss": 0.2155, "step": 12748 }, { "epoch": 0.022606803850071678, "grad_norm": 0.44921875, "learning_rate": 0.0019329333884817216, "loss": 0.2363, "step": 12750 }, { "epoch": 0.022610350015381492, "grad_norm": 0.466796875, "learning_rate": 0.0019329108414305098, "loss": 0.1871, "step": 12752 }, { "epoch": 0.022613896180691307, "grad_norm": 0.6484375, "learning_rate": 0.0019328882907366299, "loss": 0.223, "step": 12754 }, { "epoch": 0.02261744234600112, "grad_norm": 0.62890625, "learning_rate": 0.0019328657364001802, "loss": 0.227, "step": 12756 }, { "epoch": 0.022620988511310936, "grad_norm": 1.046875, "learning_rate": 0.0019328431784212598, "loss": 0.2606, "step": 12758 }, { "epoch": 0.022624534676620754, "grad_norm": 0.54296875, "learning_rate": 0.001932820616799967, "loss": 0.2807, "step": 12760 }, { "epoch": 0.02262808084193057, "grad_norm": 0.69921875, "learning_rate": 0.0019327980515364005, "loss": 0.2303, "step": 12762 }, { "epoch": 0.022631627007240383, "grad_norm": 0.65625, "learning_rate": 0.0019327754826306593, "loss": 0.2385, "step": 12764 }, { "epoch": 0.022635173172550198, "grad_norm": 0.98046875, "learning_rate": 0.0019327529100828418, "loss": 0.2364, "step": 12766 }, { "epoch": 0.022638719337860012, "grad_norm": 0.5859375, "learning_rate": 0.0019327303338930466, "loss": 0.179, "step": 12768 }, { "epoch": 0.022642265503169827, "grad_norm": 0.609375, "learning_rate": 0.0019327077540613727, "loss": 0.2257, "step": 12770 }, { "epoch": 0.022645811668479645, "grad_norm": 0.5859375, "learning_rate": 0.001932685170587919, "loss": 0.2471, "step": 12772 }, { "epoch": 0.02264935783378946, "grad_norm": 3.359375, "learning_rate": 0.0019326625834727837, "loss": 0.2343, "step": 12774 }, { "epoch": 0.022652903999099274, "grad_norm": 2.140625, "learning_rate": 0.0019326399927160661, "loss": 0.3141, "step": 12776 }, { "epoch": 0.02265645016440909, "grad_norm": 0.451171875, "learning_rate": 0.001932617398317865, "loss": 0.1911, "step": 12778 }, { "epoch": 0.022659996329718903, "grad_norm": 0.89453125, "learning_rate": 0.001932594800278279, "loss": 0.2355, "step": 12780 }, { "epoch": 0.02266354249502872, "grad_norm": 2.078125, "learning_rate": 0.0019325721985974068, "loss": 0.3804, "step": 12782 }, { "epoch": 0.022667088660338536, "grad_norm": 0.357421875, "learning_rate": 0.0019325495932753475, "loss": 0.232, "step": 12784 }, { "epoch": 0.02267063482564835, "grad_norm": 0.392578125, "learning_rate": 0.0019325269843121998, "loss": 0.2189, "step": 12786 }, { "epoch": 0.022674180990958165, "grad_norm": 3.703125, "learning_rate": 0.0019325043717080626, "loss": 0.4226, "step": 12788 }, { "epoch": 0.02267772715626798, "grad_norm": 0.439453125, "learning_rate": 0.0019324817554630352, "loss": 0.2359, "step": 12790 }, { "epoch": 0.022681273321577794, "grad_norm": 0.66015625, "learning_rate": 0.0019324591355772156, "loss": 0.25, "step": 12792 }, { "epoch": 0.022684819486887612, "grad_norm": 2.390625, "learning_rate": 0.0019324365120507038, "loss": 0.2969, "step": 12794 }, { "epoch": 0.022688365652197427, "grad_norm": 0.38671875, "learning_rate": 0.001932413884883598, "loss": 0.1732, "step": 12796 }, { "epoch": 0.02269191181750724, "grad_norm": 0.2314453125, "learning_rate": 0.0019323912540759973, "loss": 0.2004, "step": 12798 }, { "epoch": 0.022695457982817056, "grad_norm": 0.255859375, "learning_rate": 0.001932368619628001, "loss": 0.3216, "step": 12800 }, { "epoch": 0.02269900414812687, "grad_norm": 0.9375, "learning_rate": 0.0019323459815397078, "loss": 0.2472, "step": 12802 }, { "epoch": 0.022702550313436685, "grad_norm": 0.63671875, "learning_rate": 0.0019323233398112168, "loss": 0.2268, "step": 12804 }, { "epoch": 0.022706096478746503, "grad_norm": 0.79296875, "learning_rate": 0.0019323006944426267, "loss": 0.2901, "step": 12806 }, { "epoch": 0.022709642644056317, "grad_norm": 0.88671875, "learning_rate": 0.0019322780454340371, "loss": 0.2921, "step": 12808 }, { "epoch": 0.022713188809366132, "grad_norm": 0.431640625, "learning_rate": 0.0019322553927855468, "loss": 0.188, "step": 12810 }, { "epoch": 0.022716734974675946, "grad_norm": 0.921875, "learning_rate": 0.0019322327364972548, "loss": 0.1867, "step": 12812 }, { "epoch": 0.02272028113998576, "grad_norm": 0.796875, "learning_rate": 0.00193221007656926, "loss": 0.279, "step": 12814 }, { "epoch": 0.02272382730529558, "grad_norm": 0.275390625, "learning_rate": 0.0019321874130016619, "loss": 0.2007, "step": 12816 }, { "epoch": 0.022727373470605394, "grad_norm": 0.87890625, "learning_rate": 0.0019321647457945595, "loss": 0.2951, "step": 12818 }, { "epoch": 0.022730919635915208, "grad_norm": 0.41015625, "learning_rate": 0.0019321420749480519, "loss": 0.1994, "step": 12820 }, { "epoch": 0.022734465801225023, "grad_norm": 0.671875, "learning_rate": 0.0019321194004622382, "loss": 0.1762, "step": 12822 }, { "epoch": 0.022738011966534837, "grad_norm": 4.3125, "learning_rate": 0.0019320967223372175, "loss": 0.3836, "step": 12824 }, { "epoch": 0.022741558131844652, "grad_norm": 0.68359375, "learning_rate": 0.0019320740405730891, "loss": 0.246, "step": 12826 }, { "epoch": 0.02274510429715447, "grad_norm": 0.53515625, "learning_rate": 0.0019320513551699524, "loss": 0.1992, "step": 12828 }, { "epoch": 0.022748650462464284, "grad_norm": 0.98828125, "learning_rate": 0.001932028666127906, "loss": 0.2541, "step": 12830 }, { "epoch": 0.0227521966277741, "grad_norm": 0.369140625, "learning_rate": 0.0019320059734470496, "loss": 0.2374, "step": 12832 }, { "epoch": 0.022755742793083913, "grad_norm": 0.88671875, "learning_rate": 0.0019319832771274826, "loss": 0.1668, "step": 12834 }, { "epoch": 0.022759288958393728, "grad_norm": 0.71875, "learning_rate": 0.0019319605771693038, "loss": 0.3272, "step": 12836 }, { "epoch": 0.022762835123703543, "grad_norm": 1.6796875, "learning_rate": 0.0019319378735726128, "loss": 0.2246, "step": 12838 }, { "epoch": 0.02276638128901336, "grad_norm": 0.40625, "learning_rate": 0.0019319151663375088, "loss": 0.2337, "step": 12840 }, { "epoch": 0.022769927454323175, "grad_norm": 2.65625, "learning_rate": 0.001931892455464091, "loss": 0.2668, "step": 12842 }, { "epoch": 0.02277347361963299, "grad_norm": 0.2236328125, "learning_rate": 0.001931869740952459, "loss": 0.247, "step": 12844 }, { "epoch": 0.022777019784942804, "grad_norm": 7.03125, "learning_rate": 0.0019318470228027115, "loss": 0.1666, "step": 12846 }, { "epoch": 0.02278056595025262, "grad_norm": 0.486328125, "learning_rate": 0.001931824301014949, "loss": 0.1994, "step": 12848 }, { "epoch": 0.022784112115562437, "grad_norm": 1.65625, "learning_rate": 0.00193180157558927, "loss": 0.2013, "step": 12850 }, { "epoch": 0.02278765828087225, "grad_norm": 0.625, "learning_rate": 0.0019317788465257738, "loss": 0.2737, "step": 12852 }, { "epoch": 0.022791204446182066, "grad_norm": 0.640625, "learning_rate": 0.00193175611382456, "loss": 0.2346, "step": 12854 }, { "epoch": 0.02279475061149188, "grad_norm": 1.0546875, "learning_rate": 0.0019317333774857284, "loss": 0.2299, "step": 12856 }, { "epoch": 0.022798296776801695, "grad_norm": 0.40625, "learning_rate": 0.001931710637509378, "loss": 0.2206, "step": 12858 }, { "epoch": 0.02280184294211151, "grad_norm": 2.421875, "learning_rate": 0.0019316878938956084, "loss": 0.2176, "step": 12860 }, { "epoch": 0.022805389107421328, "grad_norm": 1.6328125, "learning_rate": 0.0019316651466445194, "loss": 0.3634, "step": 12862 }, { "epoch": 0.022808935272731142, "grad_norm": 0.78125, "learning_rate": 0.0019316423957562096, "loss": 0.2359, "step": 12864 }, { "epoch": 0.022812481438040957, "grad_norm": 0.92578125, "learning_rate": 0.0019316196412307793, "loss": 0.2436, "step": 12866 }, { "epoch": 0.02281602760335077, "grad_norm": 0.69140625, "learning_rate": 0.0019315968830683277, "loss": 0.212, "step": 12868 }, { "epoch": 0.022819573768660586, "grad_norm": 0.55078125, "learning_rate": 0.0019315741212689544, "loss": 0.2726, "step": 12870 }, { "epoch": 0.0228231199339704, "grad_norm": 0.451171875, "learning_rate": 0.001931551355832759, "loss": 0.1419, "step": 12872 }, { "epoch": 0.02282666609928022, "grad_norm": 0.5625, "learning_rate": 0.0019315285867598409, "loss": 0.1984, "step": 12874 }, { "epoch": 0.022830212264590033, "grad_norm": 0.81640625, "learning_rate": 0.0019315058140502996, "loss": 0.2207, "step": 12876 }, { "epoch": 0.022833758429899847, "grad_norm": 0.61328125, "learning_rate": 0.0019314830377042352, "loss": 0.1625, "step": 12878 }, { "epoch": 0.022837304595209662, "grad_norm": 0.212890625, "learning_rate": 0.0019314602577217468, "loss": 0.1847, "step": 12880 }, { "epoch": 0.022840850760519477, "grad_norm": 0.28125, "learning_rate": 0.001931437474102934, "loss": 0.2147, "step": 12882 }, { "epoch": 0.022844396925829295, "grad_norm": 0.337890625, "learning_rate": 0.001931414686847897, "loss": 0.3228, "step": 12884 }, { "epoch": 0.02284794309113911, "grad_norm": 1.0859375, "learning_rate": 0.001931391895956735, "loss": 0.1632, "step": 12886 }, { "epoch": 0.022851489256448924, "grad_norm": 0.6484375, "learning_rate": 0.0019313691014295478, "loss": 0.2619, "step": 12888 }, { "epoch": 0.02285503542175874, "grad_norm": 0.4296875, "learning_rate": 0.0019313463032664352, "loss": 0.1789, "step": 12890 }, { "epoch": 0.022858581587068553, "grad_norm": 0.703125, "learning_rate": 0.0019313235014674966, "loss": 0.2279, "step": 12892 }, { "epoch": 0.022862127752378367, "grad_norm": 0.65234375, "learning_rate": 0.0019313006960328318, "loss": 0.2073, "step": 12894 }, { "epoch": 0.022865673917688185, "grad_norm": 0.62890625, "learning_rate": 0.0019312778869625404, "loss": 0.251, "step": 12896 }, { "epoch": 0.022869220082998, "grad_norm": 1.078125, "learning_rate": 0.001931255074256723, "loss": 0.3197, "step": 12898 }, { "epoch": 0.022872766248307815, "grad_norm": 1.5625, "learning_rate": 0.0019312322579154784, "loss": 0.2321, "step": 12900 }, { "epoch": 0.02287631241361763, "grad_norm": 0.44140625, "learning_rate": 0.0019312094379389066, "loss": 0.2021, "step": 12902 }, { "epoch": 0.022879858578927444, "grad_norm": 0.80859375, "learning_rate": 0.001931186614327108, "loss": 0.2541, "step": 12904 }, { "epoch": 0.022883404744237258, "grad_norm": 0.65625, "learning_rate": 0.0019311637870801815, "loss": 0.2278, "step": 12906 }, { "epoch": 0.022886950909547076, "grad_norm": 1.390625, "learning_rate": 0.0019311409561982276, "loss": 0.4176, "step": 12908 }, { "epoch": 0.02289049707485689, "grad_norm": 1.1484375, "learning_rate": 0.001931118121681346, "loss": 0.2303, "step": 12910 }, { "epoch": 0.022894043240166705, "grad_norm": 0.96875, "learning_rate": 0.0019310952835296365, "loss": 0.1868, "step": 12912 }, { "epoch": 0.02289758940547652, "grad_norm": 5.625, "learning_rate": 0.001931072441743199, "loss": 0.2724, "step": 12914 }, { "epoch": 0.022901135570786334, "grad_norm": 3.296875, "learning_rate": 0.0019310495963221334, "loss": 0.4192, "step": 12916 }, { "epoch": 0.022904681736096152, "grad_norm": 0.9375, "learning_rate": 0.0019310267472665392, "loss": 0.251, "step": 12918 }, { "epoch": 0.022908227901405967, "grad_norm": 0.490234375, "learning_rate": 0.0019310038945765172, "loss": 0.2704, "step": 12920 }, { "epoch": 0.02291177406671578, "grad_norm": 0.78515625, "learning_rate": 0.0019309810382521666, "loss": 0.2699, "step": 12922 }, { "epoch": 0.022915320232025596, "grad_norm": 1.1328125, "learning_rate": 0.0019309581782935876, "loss": 0.2919, "step": 12924 }, { "epoch": 0.02291886639733541, "grad_norm": 0.482421875, "learning_rate": 0.0019309353147008805, "loss": 0.1962, "step": 12926 }, { "epoch": 0.022922412562645225, "grad_norm": 4.84375, "learning_rate": 0.0019309124474741449, "loss": 0.2168, "step": 12928 }, { "epoch": 0.022925958727955043, "grad_norm": 1.015625, "learning_rate": 0.0019308895766134806, "loss": 0.208, "step": 12930 }, { "epoch": 0.022929504893264858, "grad_norm": 5.0, "learning_rate": 0.0019308667021189884, "loss": 0.2692, "step": 12932 }, { "epoch": 0.022933051058574672, "grad_norm": 0.64453125, "learning_rate": 0.0019308438239907676, "loss": 0.2011, "step": 12934 }, { "epoch": 0.022936597223884487, "grad_norm": 5.125, "learning_rate": 0.0019308209422289186, "loss": 0.2225, "step": 12936 }, { "epoch": 0.0229401433891943, "grad_norm": 2.0625, "learning_rate": 0.0019307980568335414, "loss": 0.3926, "step": 12938 }, { "epoch": 0.022943689554504116, "grad_norm": 1.8125, "learning_rate": 0.0019307751678047358, "loss": 0.3128, "step": 12940 }, { "epoch": 0.022947235719813934, "grad_norm": 0.5390625, "learning_rate": 0.0019307522751426027, "loss": 0.2158, "step": 12942 }, { "epoch": 0.02295078188512375, "grad_norm": 0.875, "learning_rate": 0.0019307293788472415, "loss": 0.2262, "step": 12944 }, { "epoch": 0.022954328050433563, "grad_norm": 1.0859375, "learning_rate": 0.0019307064789187522, "loss": 0.3075, "step": 12946 }, { "epoch": 0.022957874215743378, "grad_norm": 1.1953125, "learning_rate": 0.001930683575357236, "loss": 0.2198, "step": 12948 }, { "epoch": 0.022961420381053192, "grad_norm": 0.640625, "learning_rate": 0.0019306606681627916, "loss": 0.216, "step": 12950 }, { "epoch": 0.02296496654636301, "grad_norm": 0.91796875, "learning_rate": 0.0019306377573355203, "loss": 0.2853, "step": 12952 }, { "epoch": 0.022968512711672825, "grad_norm": 0.515625, "learning_rate": 0.0019306148428755222, "loss": 0.2414, "step": 12954 }, { "epoch": 0.02297205887698264, "grad_norm": 0.9375, "learning_rate": 0.001930591924782897, "loss": 0.2223, "step": 12956 }, { "epoch": 0.022975605042292454, "grad_norm": 0.328125, "learning_rate": 0.001930569003057745, "loss": 0.1855, "step": 12958 }, { "epoch": 0.02297915120760227, "grad_norm": 0.93359375, "learning_rate": 0.0019305460777001666, "loss": 0.2487, "step": 12960 }, { "epoch": 0.022982697372912083, "grad_norm": 0.396484375, "learning_rate": 0.0019305231487102624, "loss": 0.206, "step": 12962 }, { "epoch": 0.0229862435382219, "grad_norm": 0.373046875, "learning_rate": 0.0019305002160881323, "loss": 0.2376, "step": 12964 }, { "epoch": 0.022989789703531716, "grad_norm": 0.75, "learning_rate": 0.0019304772798338769, "loss": 0.2325, "step": 12966 }, { "epoch": 0.02299333586884153, "grad_norm": 0.39453125, "learning_rate": 0.0019304543399475957, "loss": 0.2152, "step": 12968 }, { "epoch": 0.022996882034151345, "grad_norm": 0.294921875, "learning_rate": 0.00193043139642939, "loss": 0.1903, "step": 12970 }, { "epoch": 0.02300042819946116, "grad_norm": 1.1875, "learning_rate": 0.0019304084492793596, "loss": 0.269, "step": 12972 }, { "epoch": 0.023003974364770974, "grad_norm": 1.1328125, "learning_rate": 0.0019303854984976053, "loss": 0.2906, "step": 12974 }, { "epoch": 0.023007520530080792, "grad_norm": 0.390625, "learning_rate": 0.0019303625440842268, "loss": 0.2932, "step": 12976 }, { "epoch": 0.023011066695390606, "grad_norm": 0.58984375, "learning_rate": 0.0019303395860393246, "loss": 0.1753, "step": 12978 }, { "epoch": 0.02301461286070042, "grad_norm": 1.4375, "learning_rate": 0.0019303166243630002, "loss": 0.2325, "step": 12980 }, { "epoch": 0.023018159026010235, "grad_norm": 0.55859375, "learning_rate": 0.0019302936590553526, "loss": 0.3821, "step": 12982 }, { "epoch": 0.02302170519132005, "grad_norm": 2.375, "learning_rate": 0.001930270690116483, "loss": 0.3146, "step": 12984 }, { "epoch": 0.023025251356629868, "grad_norm": 3.296875, "learning_rate": 0.0019302477175464916, "loss": 0.4533, "step": 12986 }, { "epoch": 0.023028797521939683, "grad_norm": 3.28125, "learning_rate": 0.0019302247413454793, "loss": 0.3294, "step": 12988 }, { "epoch": 0.023032343687249497, "grad_norm": 0.5078125, "learning_rate": 0.001930201761513546, "loss": 0.2068, "step": 12990 }, { "epoch": 0.02303588985255931, "grad_norm": 0.93359375, "learning_rate": 0.001930178778050792, "loss": 0.2309, "step": 12992 }, { "epoch": 0.023039436017869126, "grad_norm": 0.578125, "learning_rate": 0.0019301557909573185, "loss": 0.2445, "step": 12994 }, { "epoch": 0.02304298218317894, "grad_norm": 0.5703125, "learning_rate": 0.001930132800233226, "loss": 0.1791, "step": 12996 }, { "epoch": 0.02304652834848876, "grad_norm": 2.359375, "learning_rate": 0.0019301098058786149, "loss": 0.6387, "step": 12998 }, { "epoch": 0.023050074513798573, "grad_norm": 0.75390625, "learning_rate": 0.0019300868078935853, "loss": 0.2871, "step": 13000 }, { "epoch": 0.023053620679108388, "grad_norm": 0.400390625, "learning_rate": 0.0019300638062782385, "loss": 0.2078, "step": 13002 }, { "epoch": 0.023057166844418202, "grad_norm": 0.326171875, "learning_rate": 0.0019300408010326747, "loss": 0.2409, "step": 13004 }, { "epoch": 0.023060713009728017, "grad_norm": 0.65625, "learning_rate": 0.0019300177921569947, "loss": 0.2867, "step": 13006 }, { "epoch": 0.02306425917503783, "grad_norm": 0.97265625, "learning_rate": 0.0019299947796512988, "loss": 0.5307, "step": 13008 }, { "epoch": 0.02306780534034765, "grad_norm": 1.671875, "learning_rate": 0.001929971763515688, "loss": 0.2227, "step": 13010 }, { "epoch": 0.023071351505657464, "grad_norm": 1.2109375, "learning_rate": 0.0019299487437502627, "loss": 0.3788, "step": 13012 }, { "epoch": 0.02307489767096728, "grad_norm": 0.33203125, "learning_rate": 0.0019299257203551237, "loss": 0.2514, "step": 13014 }, { "epoch": 0.023078443836277093, "grad_norm": 3.046875, "learning_rate": 0.0019299026933303717, "loss": 0.2558, "step": 13016 }, { "epoch": 0.023081990001586908, "grad_norm": 1.28125, "learning_rate": 0.0019298796626761076, "loss": 0.2056, "step": 13018 }, { "epoch": 0.023085536166896726, "grad_norm": 0.6328125, "learning_rate": 0.0019298566283924314, "loss": 0.1677, "step": 13020 }, { "epoch": 0.02308908233220654, "grad_norm": 0.5625, "learning_rate": 0.0019298335904794446, "loss": 0.1941, "step": 13022 }, { "epoch": 0.023092628497516355, "grad_norm": 0.60546875, "learning_rate": 0.001929810548937248, "loss": 0.1634, "step": 13024 }, { "epoch": 0.02309617466282617, "grad_norm": 0.484375, "learning_rate": 0.0019297875037659416, "loss": 0.1903, "step": 13026 }, { "epoch": 0.023099720828135984, "grad_norm": 0.71875, "learning_rate": 0.0019297644549656268, "loss": 0.1982, "step": 13028 }, { "epoch": 0.0231032669934458, "grad_norm": 0.7578125, "learning_rate": 0.0019297414025364044, "loss": 0.1951, "step": 13030 }, { "epoch": 0.023106813158755617, "grad_norm": 2.359375, "learning_rate": 0.001929718346478375, "loss": 0.2912, "step": 13032 }, { "epoch": 0.02311035932406543, "grad_norm": 0.3046875, "learning_rate": 0.0019296952867916396, "loss": 0.2001, "step": 13034 }, { "epoch": 0.023113905489375246, "grad_norm": 1.7734375, "learning_rate": 0.001929672223476299, "loss": 0.2551, "step": 13036 }, { "epoch": 0.02311745165468506, "grad_norm": 0.36328125, "learning_rate": 0.001929649156532454, "loss": 0.2102, "step": 13038 }, { "epoch": 0.023120997819994875, "grad_norm": 0.65625, "learning_rate": 0.0019296260859602054, "loss": 0.2956, "step": 13040 }, { "epoch": 0.02312454398530469, "grad_norm": 0.58984375, "learning_rate": 0.001929603011759654, "loss": 0.2675, "step": 13042 }, { "epoch": 0.023128090150614507, "grad_norm": 1.1796875, "learning_rate": 0.001929579933930901, "loss": 0.2036, "step": 13044 }, { "epoch": 0.023131636315924322, "grad_norm": 0.50390625, "learning_rate": 0.0019295568524740475, "loss": 0.1797, "step": 13046 }, { "epoch": 0.023135182481234137, "grad_norm": 0.65234375, "learning_rate": 0.0019295337673891936, "loss": 0.2802, "step": 13048 }, { "epoch": 0.02313872864654395, "grad_norm": 0.734375, "learning_rate": 0.0019295106786764418, "loss": 0.2052, "step": 13050 }, { "epoch": 0.023142274811853766, "grad_norm": 0.6171875, "learning_rate": 0.0019294875863358916, "loss": 0.1848, "step": 13052 }, { "epoch": 0.023145820977163584, "grad_norm": 2.1875, "learning_rate": 0.0019294644903676444, "loss": 0.2691, "step": 13054 }, { "epoch": 0.023149367142473398, "grad_norm": 1.4765625, "learning_rate": 0.0019294413907718017, "loss": 0.2286, "step": 13056 }, { "epoch": 0.023152913307783213, "grad_norm": 1.234375, "learning_rate": 0.0019294182875484638, "loss": 0.1945, "step": 13058 }, { "epoch": 0.023156459473093027, "grad_norm": 1.109375, "learning_rate": 0.0019293951806977324, "loss": 0.3339, "step": 13060 }, { "epoch": 0.023160005638402842, "grad_norm": 0.5390625, "learning_rate": 0.0019293720702197078, "loss": 0.2257, "step": 13062 }, { "epoch": 0.023163551803712656, "grad_norm": 1.4375, "learning_rate": 0.0019293489561144918, "loss": 0.1969, "step": 13064 }, { "epoch": 0.023167097969022474, "grad_norm": 0.52734375, "learning_rate": 0.0019293258383821853, "loss": 0.1827, "step": 13066 }, { "epoch": 0.02317064413433229, "grad_norm": 0.31640625, "learning_rate": 0.0019293027170228891, "loss": 0.3714, "step": 13068 }, { "epoch": 0.023174190299642104, "grad_norm": 0.365234375, "learning_rate": 0.0019292795920367047, "loss": 0.2114, "step": 13070 }, { "epoch": 0.023177736464951918, "grad_norm": 0.466796875, "learning_rate": 0.0019292564634237333, "loss": 0.237, "step": 13072 }, { "epoch": 0.023181282630261733, "grad_norm": 0.90234375, "learning_rate": 0.0019292333311840754, "loss": 0.2124, "step": 13074 }, { "epoch": 0.023184828795571547, "grad_norm": 0.306640625, "learning_rate": 0.0019292101953178327, "loss": 0.236, "step": 13076 }, { "epoch": 0.023188374960881365, "grad_norm": 0.8125, "learning_rate": 0.0019291870558251064, "loss": 0.2994, "step": 13078 }, { "epoch": 0.02319192112619118, "grad_norm": 0.6015625, "learning_rate": 0.0019291639127059974, "loss": 0.2153, "step": 13080 }, { "epoch": 0.023195467291500994, "grad_norm": 2.09375, "learning_rate": 0.0019291407659606072, "loss": 0.2804, "step": 13082 }, { "epoch": 0.02319901345681081, "grad_norm": 0.255859375, "learning_rate": 0.0019291176155890367, "loss": 0.2051, "step": 13084 }, { "epoch": 0.023202559622120623, "grad_norm": 0.27734375, "learning_rate": 0.0019290944615913874, "loss": 0.2339, "step": 13086 }, { "epoch": 0.02320610578743044, "grad_norm": 4.1875, "learning_rate": 0.0019290713039677608, "loss": 0.2361, "step": 13088 }, { "epoch": 0.023209651952740256, "grad_norm": 0.84375, "learning_rate": 0.0019290481427182576, "loss": 0.2502, "step": 13090 }, { "epoch": 0.02321319811805007, "grad_norm": 0.7109375, "learning_rate": 0.0019290249778429797, "loss": 0.4189, "step": 13092 }, { "epoch": 0.023216744283359885, "grad_norm": 4.71875, "learning_rate": 0.001929001809342028, "loss": 0.3903, "step": 13094 }, { "epoch": 0.0232202904486697, "grad_norm": 0.765625, "learning_rate": 0.001928978637215504, "loss": 0.2088, "step": 13096 }, { "epoch": 0.023223836613979514, "grad_norm": 0.51171875, "learning_rate": 0.0019289554614635085, "loss": 0.2329, "step": 13098 }, { "epoch": 0.023227382779289332, "grad_norm": 0.8125, "learning_rate": 0.0019289322820861437, "loss": 0.2424, "step": 13100 }, { "epoch": 0.023230928944599147, "grad_norm": 1.4296875, "learning_rate": 0.0019289090990835106, "loss": 0.2687, "step": 13102 }, { "epoch": 0.02323447510990896, "grad_norm": 0.59375, "learning_rate": 0.0019288859124557106, "loss": 0.1787, "step": 13104 }, { "epoch": 0.023238021275218776, "grad_norm": 0.302734375, "learning_rate": 0.001928862722202845, "loss": 0.2024, "step": 13106 }, { "epoch": 0.02324156744052859, "grad_norm": 0.55078125, "learning_rate": 0.0019288395283250154, "loss": 0.1788, "step": 13108 }, { "epoch": 0.023245113605838405, "grad_norm": 1.5234375, "learning_rate": 0.001928816330822323, "loss": 0.2198, "step": 13110 }, { "epoch": 0.023248659771148223, "grad_norm": 0.859375, "learning_rate": 0.0019287931296948695, "loss": 0.1872, "step": 13112 }, { "epoch": 0.023252205936458038, "grad_norm": 0.734375, "learning_rate": 0.0019287699249427562, "loss": 0.2031, "step": 13114 }, { "epoch": 0.023255752101767852, "grad_norm": 0.435546875, "learning_rate": 0.001928746716566085, "loss": 0.2621, "step": 13116 }, { "epoch": 0.023259298267077667, "grad_norm": 0.76171875, "learning_rate": 0.0019287235045649565, "loss": 0.4895, "step": 13118 }, { "epoch": 0.02326284443238748, "grad_norm": 2.515625, "learning_rate": 0.0019287002889394735, "loss": 0.451, "step": 13120 }, { "epoch": 0.0232663905976973, "grad_norm": 1.171875, "learning_rate": 0.001928677069689736, "loss": 0.1918, "step": 13122 }, { "epoch": 0.023269936763007114, "grad_norm": 1.8046875, "learning_rate": 0.0019286538468158468, "loss": 0.2922, "step": 13124 }, { "epoch": 0.02327348292831693, "grad_norm": 0.23828125, "learning_rate": 0.0019286306203179066, "loss": 0.1963, "step": 13126 }, { "epoch": 0.023277029093626743, "grad_norm": 1.2265625, "learning_rate": 0.001928607390196018, "loss": 0.3841, "step": 13128 }, { "epoch": 0.023280575258936557, "grad_norm": 0.84765625, "learning_rate": 0.0019285841564502817, "loss": 0.2603, "step": 13130 }, { "epoch": 0.023284121424246372, "grad_norm": 0.7265625, "learning_rate": 0.0019285609190807995, "loss": 0.2258, "step": 13132 }, { "epoch": 0.02328766758955619, "grad_norm": 1.9296875, "learning_rate": 0.0019285376780876734, "loss": 0.309, "step": 13134 }, { "epoch": 0.023291213754866005, "grad_norm": 0.98828125, "learning_rate": 0.0019285144334710046, "loss": 0.2555, "step": 13136 }, { "epoch": 0.02329475992017582, "grad_norm": 5.0625, "learning_rate": 0.001928491185230895, "loss": 0.2059, "step": 13138 }, { "epoch": 0.023298306085485634, "grad_norm": 1.125, "learning_rate": 0.001928467933367446, "loss": 0.2046, "step": 13140 }, { "epoch": 0.02330185225079545, "grad_norm": 0.388671875, "learning_rate": 0.0019284446778807596, "loss": 0.2326, "step": 13142 }, { "epoch": 0.023305398416105263, "grad_norm": 0.41796875, "learning_rate": 0.0019284214187709375, "loss": 0.2009, "step": 13144 }, { "epoch": 0.02330894458141508, "grad_norm": 0.91796875, "learning_rate": 0.001928398156038081, "loss": 0.2004, "step": 13146 }, { "epoch": 0.023312490746724895, "grad_norm": 1.796875, "learning_rate": 0.0019283748896822923, "loss": 0.2838, "step": 13148 }, { "epoch": 0.02331603691203471, "grad_norm": 0.5703125, "learning_rate": 0.0019283516197036732, "loss": 0.237, "step": 13150 }, { "epoch": 0.023319583077344525, "grad_norm": 1.875, "learning_rate": 0.0019283283461023254, "loss": 0.2416, "step": 13152 }, { "epoch": 0.02332312924265434, "grad_norm": 1.796875, "learning_rate": 0.00192830506887835, "loss": 0.2245, "step": 13154 }, { "epoch": 0.023326675407964157, "grad_norm": 1.1796875, "learning_rate": 0.0019282817880318498, "loss": 0.2192, "step": 13156 }, { "epoch": 0.02333022157327397, "grad_norm": 1.40625, "learning_rate": 0.0019282585035629264, "loss": 0.2011, "step": 13158 }, { "epoch": 0.023333767738583786, "grad_norm": 0.435546875, "learning_rate": 0.001928235215471681, "loss": 0.2881, "step": 13160 }, { "epoch": 0.0233373139038936, "grad_norm": 0.40234375, "learning_rate": 0.0019282119237582157, "loss": 0.1893, "step": 13162 }, { "epoch": 0.023340860069203415, "grad_norm": 1.046875, "learning_rate": 0.0019281886284226327, "loss": 0.2091, "step": 13164 }, { "epoch": 0.02334440623451323, "grad_norm": 0.259765625, "learning_rate": 0.001928165329465034, "loss": 0.1681, "step": 13166 }, { "epoch": 0.023347952399823048, "grad_norm": 0.447265625, "learning_rate": 0.0019281420268855212, "loss": 0.2459, "step": 13168 }, { "epoch": 0.023351498565132862, "grad_norm": 0.8359375, "learning_rate": 0.0019281187206841958, "loss": 0.2593, "step": 13170 }, { "epoch": 0.023355044730442677, "grad_norm": 0.32421875, "learning_rate": 0.0019280954108611605, "loss": 0.1949, "step": 13172 }, { "epoch": 0.02335859089575249, "grad_norm": 0.66796875, "learning_rate": 0.0019280720974165166, "loss": 0.1931, "step": 13174 }, { "epoch": 0.023362137061062306, "grad_norm": 1.0703125, "learning_rate": 0.0019280487803503665, "loss": 0.2145, "step": 13176 }, { "epoch": 0.02336568322637212, "grad_norm": 0.62109375, "learning_rate": 0.001928025459662812, "loss": 0.2072, "step": 13178 }, { "epoch": 0.02336922939168194, "grad_norm": 0.6796875, "learning_rate": 0.001928002135353955, "loss": 0.2401, "step": 13180 }, { "epoch": 0.023372775556991753, "grad_norm": 0.6796875, "learning_rate": 0.001927978807423898, "loss": 0.2412, "step": 13182 }, { "epoch": 0.023376321722301568, "grad_norm": 0.70703125, "learning_rate": 0.0019279554758727423, "loss": 0.2462, "step": 13184 }, { "epoch": 0.023379867887611382, "grad_norm": 2.328125, "learning_rate": 0.0019279321407005903, "loss": 0.2282, "step": 13186 }, { "epoch": 0.023383414052921197, "grad_norm": 0.55859375, "learning_rate": 0.0019279088019075442, "loss": 0.344, "step": 13188 }, { "epoch": 0.023386960218231015, "grad_norm": 6.5, "learning_rate": 0.001927885459493706, "loss": 0.2536, "step": 13190 }, { "epoch": 0.02339050638354083, "grad_norm": 0.8359375, "learning_rate": 0.0019278621134591776, "loss": 0.1924, "step": 13192 }, { "epoch": 0.023394052548850644, "grad_norm": 1.75, "learning_rate": 0.0019278387638040609, "loss": 0.3228, "step": 13194 }, { "epoch": 0.02339759871416046, "grad_norm": 0.380859375, "learning_rate": 0.0019278154105284587, "loss": 0.2052, "step": 13196 }, { "epoch": 0.023401144879470273, "grad_norm": 1.0078125, "learning_rate": 0.0019277920536324722, "loss": 0.2303, "step": 13198 }, { "epoch": 0.023404691044780088, "grad_norm": 0.453125, "learning_rate": 0.0019277686931162047, "loss": 0.283, "step": 13200 }, { "epoch": 0.023408237210089906, "grad_norm": 0.75, "learning_rate": 0.0019277453289797575, "loss": 0.2098, "step": 13202 }, { "epoch": 0.02341178337539972, "grad_norm": 0.447265625, "learning_rate": 0.001927721961223233, "loss": 0.2524, "step": 13204 }, { "epoch": 0.023415329540709535, "grad_norm": 0.61328125, "learning_rate": 0.0019276985898467336, "loss": 0.2307, "step": 13206 }, { "epoch": 0.02341887570601935, "grad_norm": 0.32421875, "learning_rate": 0.0019276752148503612, "loss": 0.227, "step": 13208 }, { "epoch": 0.023422421871329164, "grad_norm": 0.640625, "learning_rate": 0.0019276518362342184, "loss": 0.3369, "step": 13210 }, { "epoch": 0.02342596803663898, "grad_norm": 0.5234375, "learning_rate": 0.001927628453998407, "loss": 0.3536, "step": 13212 }, { "epoch": 0.023429514201948796, "grad_norm": 8.25, "learning_rate": 0.0019276050681430294, "loss": 0.244, "step": 13214 }, { "epoch": 0.02343306036725861, "grad_norm": 0.890625, "learning_rate": 0.0019275816786681883, "loss": 0.1726, "step": 13216 }, { "epoch": 0.023436606532568426, "grad_norm": 1.2109375, "learning_rate": 0.0019275582855739855, "loss": 0.2719, "step": 13218 }, { "epoch": 0.02344015269787824, "grad_norm": 0.640625, "learning_rate": 0.0019275348888605232, "loss": 0.2649, "step": 13220 }, { "epoch": 0.023443698863188055, "grad_norm": 1.5, "learning_rate": 0.0019275114885279046, "loss": 0.2871, "step": 13222 }, { "epoch": 0.023447245028497873, "grad_norm": 0.384765625, "learning_rate": 0.001927488084576231, "loss": 0.3739, "step": 13224 }, { "epoch": 0.023450791193807687, "grad_norm": 1.5546875, "learning_rate": 0.0019274646770056053, "loss": 0.2474, "step": 13226 }, { "epoch": 0.023454337359117502, "grad_norm": 2.859375, "learning_rate": 0.0019274412658161293, "loss": 0.3691, "step": 13228 }, { "epoch": 0.023457883524427316, "grad_norm": 0.392578125, "learning_rate": 0.0019274178510079064, "loss": 0.256, "step": 13230 }, { "epoch": 0.02346142968973713, "grad_norm": 1.5078125, "learning_rate": 0.0019273944325810383, "loss": 0.4181, "step": 13232 }, { "epoch": 0.023464975855046945, "grad_norm": 0.5546875, "learning_rate": 0.0019273710105356275, "loss": 0.1921, "step": 13234 }, { "epoch": 0.023468522020356763, "grad_norm": 0.62890625, "learning_rate": 0.0019273475848717765, "loss": 0.2186, "step": 13236 }, { "epoch": 0.023472068185666578, "grad_norm": 0.5625, "learning_rate": 0.0019273241555895877, "loss": 0.2411, "step": 13238 }, { "epoch": 0.023475614350976393, "grad_norm": 0.49609375, "learning_rate": 0.001927300722689164, "loss": 0.2295, "step": 13240 }, { "epoch": 0.023479160516286207, "grad_norm": 0.4921875, "learning_rate": 0.001927277286170607, "loss": 0.2778, "step": 13242 }, { "epoch": 0.02348270668159602, "grad_norm": 0.703125, "learning_rate": 0.0019272538460340198, "loss": 0.2496, "step": 13244 }, { "epoch": 0.023486252846905836, "grad_norm": 0.27734375, "learning_rate": 0.001927230402279505, "loss": 0.2786, "step": 13246 }, { "epoch": 0.023489799012215654, "grad_norm": 0.53125, "learning_rate": 0.0019272069549071645, "loss": 0.2335, "step": 13248 }, { "epoch": 0.02349334517752547, "grad_norm": 0.49609375, "learning_rate": 0.0019271835039171017, "loss": 0.2211, "step": 13250 }, { "epoch": 0.023496891342835283, "grad_norm": 0.5546875, "learning_rate": 0.0019271600493094186, "loss": 0.234, "step": 13252 }, { "epoch": 0.023500437508145098, "grad_norm": 1.0390625, "learning_rate": 0.001927136591084218, "loss": 0.2217, "step": 13254 }, { "epoch": 0.023503983673454912, "grad_norm": 1.1171875, "learning_rate": 0.0019271131292416025, "loss": 0.2276, "step": 13256 }, { "epoch": 0.023507529838764727, "grad_norm": 0.439453125, "learning_rate": 0.0019270896637816743, "loss": 0.1903, "step": 13258 }, { "epoch": 0.023511076004074545, "grad_norm": 0.859375, "learning_rate": 0.0019270661947045362, "loss": 0.1778, "step": 13260 }, { "epoch": 0.02351462216938436, "grad_norm": 0.71484375, "learning_rate": 0.0019270427220102915, "loss": 0.2327, "step": 13262 }, { "epoch": 0.023518168334694174, "grad_norm": 0.609375, "learning_rate": 0.0019270192456990421, "loss": 0.21, "step": 13264 }, { "epoch": 0.02352171450000399, "grad_norm": 0.40625, "learning_rate": 0.001926995765770891, "loss": 0.2364, "step": 13266 }, { "epoch": 0.023525260665313803, "grad_norm": 0.93359375, "learning_rate": 0.0019269722822259406, "loss": 0.2732, "step": 13268 }, { "epoch": 0.02352880683062362, "grad_norm": 0.72265625, "learning_rate": 0.0019269487950642939, "loss": 0.2324, "step": 13270 }, { "epoch": 0.023532352995933436, "grad_norm": 0.59765625, "learning_rate": 0.0019269253042860537, "loss": 0.1911, "step": 13272 }, { "epoch": 0.02353589916124325, "grad_norm": 2.015625, "learning_rate": 0.0019269018098913224, "loss": 0.2773, "step": 13274 }, { "epoch": 0.023539445326553065, "grad_norm": 0.56640625, "learning_rate": 0.0019268783118802026, "loss": 0.1592, "step": 13276 }, { "epoch": 0.02354299149186288, "grad_norm": 1.9609375, "learning_rate": 0.0019268548102527976, "loss": 0.3859, "step": 13278 }, { "epoch": 0.023546537657172694, "grad_norm": 0.291015625, "learning_rate": 0.00192683130500921, "loss": 0.2417, "step": 13280 }, { "epoch": 0.023550083822482512, "grad_norm": 0.55078125, "learning_rate": 0.001926807796149543, "loss": 0.2277, "step": 13282 }, { "epoch": 0.023553629987792327, "grad_norm": 0.55078125, "learning_rate": 0.0019267842836738986, "loss": 0.2861, "step": 13284 }, { "epoch": 0.02355717615310214, "grad_norm": 0.875, "learning_rate": 0.0019267607675823796, "loss": 0.2087, "step": 13286 }, { "epoch": 0.023560722318411956, "grad_norm": 0.56640625, "learning_rate": 0.0019267372478750898, "loss": 0.2332, "step": 13288 }, { "epoch": 0.02356426848372177, "grad_norm": 0.953125, "learning_rate": 0.0019267137245521312, "loss": 0.1913, "step": 13290 }, { "epoch": 0.023567814649031585, "grad_norm": 0.349609375, "learning_rate": 0.001926690197613607, "loss": 0.2295, "step": 13292 }, { "epoch": 0.023571360814341403, "grad_norm": 0.390625, "learning_rate": 0.0019266666670596203, "loss": 0.2056, "step": 13294 }, { "epoch": 0.023574906979651217, "grad_norm": 0.2890625, "learning_rate": 0.0019266431328902735, "loss": 0.1952, "step": 13296 }, { "epoch": 0.023578453144961032, "grad_norm": 0.83984375, "learning_rate": 0.0019266195951056697, "loss": 0.2857, "step": 13298 }, { "epoch": 0.023581999310270847, "grad_norm": 1.09375, "learning_rate": 0.0019265960537059124, "loss": 0.2769, "step": 13300 }, { "epoch": 0.02358554547558066, "grad_norm": 0.515625, "learning_rate": 0.0019265725086911039, "loss": 0.2274, "step": 13302 }, { "epoch": 0.02358909164089048, "grad_norm": 0.6328125, "learning_rate": 0.0019265489600613472, "loss": 0.2535, "step": 13304 }, { "epoch": 0.023592637806200294, "grad_norm": 1.171875, "learning_rate": 0.0019265254078167458, "loss": 0.2675, "step": 13306 }, { "epoch": 0.023596183971510108, "grad_norm": 0.953125, "learning_rate": 0.0019265018519574021, "loss": 0.2195, "step": 13308 }, { "epoch": 0.023599730136819923, "grad_norm": 0.5546875, "learning_rate": 0.0019264782924834193, "loss": 0.2241, "step": 13310 }, { "epoch": 0.023603276302129737, "grad_norm": 0.41015625, "learning_rate": 0.0019264547293949008, "loss": 0.2474, "step": 13312 }, { "epoch": 0.023606822467439552, "grad_norm": 0.302734375, "learning_rate": 0.0019264311626919493, "loss": 0.1809, "step": 13314 }, { "epoch": 0.02361036863274937, "grad_norm": 1.1015625, "learning_rate": 0.001926407592374668, "loss": 0.1892, "step": 13316 }, { "epoch": 0.023613914798059184, "grad_norm": 0.7265625, "learning_rate": 0.00192638401844316, "loss": 0.2106, "step": 13318 }, { "epoch": 0.023617460963369, "grad_norm": 0.330078125, "learning_rate": 0.0019263604408975281, "loss": 0.1831, "step": 13320 }, { "epoch": 0.023621007128678814, "grad_norm": 0.423828125, "learning_rate": 0.0019263368597378756, "loss": 0.1624, "step": 13322 }, { "epoch": 0.023624553293988628, "grad_norm": 0.396484375, "learning_rate": 0.0019263132749643057, "loss": 0.6868, "step": 13324 }, { "epoch": 0.023628099459298443, "grad_norm": 0.353515625, "learning_rate": 0.0019262896865769217, "loss": 0.2172, "step": 13326 }, { "epoch": 0.02363164562460826, "grad_norm": 0.392578125, "learning_rate": 0.0019262660945758266, "loss": 0.1942, "step": 13328 }, { "epoch": 0.023635191789918075, "grad_norm": 0.26953125, "learning_rate": 0.0019262424989611234, "loss": 0.2103, "step": 13330 }, { "epoch": 0.02363873795522789, "grad_norm": 0.82421875, "learning_rate": 0.0019262188997329155, "loss": 0.2087, "step": 13332 }, { "epoch": 0.023642284120537704, "grad_norm": 0.470703125, "learning_rate": 0.0019261952968913061, "loss": 0.187, "step": 13334 }, { "epoch": 0.02364583028584752, "grad_norm": 6.0, "learning_rate": 0.0019261716904363981, "loss": 0.3445, "step": 13336 }, { "epoch": 0.023649376451157337, "grad_norm": 0.5, "learning_rate": 0.0019261480803682953, "loss": 0.3638, "step": 13338 }, { "epoch": 0.02365292261646715, "grad_norm": 0.296875, "learning_rate": 0.0019261244666871008, "loss": 0.2022, "step": 13340 }, { "epoch": 0.023656468781776966, "grad_norm": 0.91015625, "learning_rate": 0.0019261008493929176, "loss": 0.2352, "step": 13342 }, { "epoch": 0.02366001494708678, "grad_norm": 0.86328125, "learning_rate": 0.001926077228485849, "loss": 0.1972, "step": 13344 }, { "epoch": 0.023663561112396595, "grad_norm": 0.30078125, "learning_rate": 0.0019260536039659986, "loss": 0.1952, "step": 13346 }, { "epoch": 0.02366710727770641, "grad_norm": 3.546875, "learning_rate": 0.0019260299758334695, "loss": 0.2752, "step": 13348 }, { "epoch": 0.023670653443016228, "grad_norm": 0.89453125, "learning_rate": 0.001926006344088365, "loss": 0.2709, "step": 13350 }, { "epoch": 0.023674199608326042, "grad_norm": 1.765625, "learning_rate": 0.0019259827087307888, "loss": 0.2542, "step": 13352 }, { "epoch": 0.023677745773635857, "grad_norm": 0.31640625, "learning_rate": 0.0019259590697608437, "loss": 0.219, "step": 13354 }, { "epoch": 0.02368129193894567, "grad_norm": 0.765625, "learning_rate": 0.0019259354271786338, "loss": 0.2112, "step": 13356 }, { "epoch": 0.023684838104255486, "grad_norm": 0.56640625, "learning_rate": 0.0019259117809842618, "loss": 0.237, "step": 13358 }, { "epoch": 0.0236883842695653, "grad_norm": 4.875, "learning_rate": 0.0019258881311778318, "loss": 0.2161, "step": 13360 }, { "epoch": 0.02369193043487512, "grad_norm": 0.64453125, "learning_rate": 0.0019258644777594464, "loss": 0.2566, "step": 13362 }, { "epoch": 0.023695476600184933, "grad_norm": 0.99609375, "learning_rate": 0.0019258408207292095, "loss": 0.3512, "step": 13364 }, { "epoch": 0.023699022765494748, "grad_norm": 0.365234375, "learning_rate": 0.001925817160087225, "loss": 0.2509, "step": 13366 }, { "epoch": 0.023702568930804562, "grad_norm": 4.25, "learning_rate": 0.0019257934958335956, "loss": 0.4774, "step": 13368 }, { "epoch": 0.023706115096114377, "grad_norm": 0.349609375, "learning_rate": 0.0019257698279684253, "loss": 0.2623, "step": 13370 }, { "epoch": 0.023709661261424195, "grad_norm": 0.81640625, "learning_rate": 0.001925746156491817, "loss": 0.234, "step": 13372 }, { "epoch": 0.02371320742673401, "grad_norm": 10.0, "learning_rate": 0.0019257224814038753, "loss": 0.2755, "step": 13374 }, { "epoch": 0.023716753592043824, "grad_norm": 0.671875, "learning_rate": 0.0019256988027047028, "loss": 0.3044, "step": 13376 }, { "epoch": 0.02372029975735364, "grad_norm": 0.90234375, "learning_rate": 0.0019256751203944032, "loss": 0.2837, "step": 13378 }, { "epoch": 0.023723845922663453, "grad_norm": 0.69140625, "learning_rate": 0.0019256514344730804, "loss": 0.2667, "step": 13380 }, { "epoch": 0.023727392087973267, "grad_norm": 0.625, "learning_rate": 0.0019256277449408378, "loss": 0.2156, "step": 13382 }, { "epoch": 0.023730938253283086, "grad_norm": 1.203125, "learning_rate": 0.001925604051797779, "loss": 0.2001, "step": 13384 }, { "epoch": 0.0237344844185929, "grad_norm": 0.47265625, "learning_rate": 0.0019255803550440077, "loss": 0.5126, "step": 13386 }, { "epoch": 0.023738030583902715, "grad_norm": 0.494140625, "learning_rate": 0.0019255566546796275, "loss": 0.2626, "step": 13388 }, { "epoch": 0.02374157674921253, "grad_norm": 0.7578125, "learning_rate": 0.001925532950704742, "loss": 0.2491, "step": 13390 }, { "epoch": 0.023745122914522344, "grad_norm": 0.416015625, "learning_rate": 0.0019255092431194549, "loss": 0.1858, "step": 13392 }, { "epoch": 0.02374866907983216, "grad_norm": 1.0703125, "learning_rate": 0.0019254855319238695, "loss": 0.2308, "step": 13394 }, { "epoch": 0.023752215245141976, "grad_norm": 0.578125, "learning_rate": 0.0019254618171180905, "loss": 0.1833, "step": 13396 }, { "epoch": 0.02375576141045179, "grad_norm": 0.380859375, "learning_rate": 0.001925438098702221, "loss": 0.1903, "step": 13398 }, { "epoch": 0.023759307575761605, "grad_norm": 1.0703125, "learning_rate": 0.0019254143766763642, "loss": 0.2423, "step": 13400 }, { "epoch": 0.02376285374107142, "grad_norm": 0.3359375, "learning_rate": 0.0019253906510406248, "loss": 0.2154, "step": 13402 }, { "epoch": 0.023766399906381235, "grad_norm": 0.490234375, "learning_rate": 0.0019253669217951057, "loss": 0.4263, "step": 13404 }, { "epoch": 0.023769946071691053, "grad_norm": 0.59765625, "learning_rate": 0.0019253431889399116, "loss": 0.286, "step": 13406 }, { "epoch": 0.023773492237000867, "grad_norm": 3.421875, "learning_rate": 0.0019253194524751457, "loss": 0.3845, "step": 13408 }, { "epoch": 0.02377703840231068, "grad_norm": 1.0859375, "learning_rate": 0.0019252957124009115, "loss": 0.1887, "step": 13410 }, { "epoch": 0.023780584567620496, "grad_norm": 0.515625, "learning_rate": 0.0019252719687173137, "loss": 0.196, "step": 13412 }, { "epoch": 0.02378413073293031, "grad_norm": 0.51171875, "learning_rate": 0.0019252482214244554, "loss": 0.2476, "step": 13414 }, { "epoch": 0.023787676898240125, "grad_norm": 0.78125, "learning_rate": 0.001925224470522441, "loss": 0.2232, "step": 13416 }, { "epoch": 0.023791223063549943, "grad_norm": 0.416015625, "learning_rate": 0.0019252007160113743, "loss": 0.2808, "step": 13418 }, { "epoch": 0.023794769228859758, "grad_norm": 0.55859375, "learning_rate": 0.0019251769578913585, "loss": 0.253, "step": 13420 }, { "epoch": 0.023798315394169572, "grad_norm": 0.55859375, "learning_rate": 0.0019251531961624981, "loss": 0.3107, "step": 13422 }, { "epoch": 0.023801861559479387, "grad_norm": 0.298828125, "learning_rate": 0.001925129430824897, "loss": 0.2207, "step": 13424 }, { "epoch": 0.0238054077247892, "grad_norm": 0.76953125, "learning_rate": 0.0019251056618786594, "loss": 0.2049, "step": 13426 }, { "epoch": 0.023808953890099016, "grad_norm": 1.0234375, "learning_rate": 0.0019250818893238885, "loss": 0.2018, "step": 13428 }, { "epoch": 0.023812500055408834, "grad_norm": 0.3203125, "learning_rate": 0.0019250581131606888, "loss": 0.2755, "step": 13430 }, { "epoch": 0.02381604622071865, "grad_norm": 0.427734375, "learning_rate": 0.001925034333389164, "loss": 0.2266, "step": 13432 }, { "epoch": 0.023819592386028463, "grad_norm": 1.421875, "learning_rate": 0.0019250105500094184, "loss": 0.222, "step": 13434 }, { "epoch": 0.023823138551338278, "grad_norm": 1.140625, "learning_rate": 0.0019249867630215562, "loss": 0.188, "step": 13436 }, { "epoch": 0.023826684716648092, "grad_norm": 0.6640625, "learning_rate": 0.0019249629724256805, "loss": 0.2158, "step": 13438 }, { "epoch": 0.02383023088195791, "grad_norm": 1.1875, "learning_rate": 0.0019249391782218963, "loss": 0.192, "step": 13440 }, { "epoch": 0.023833777047267725, "grad_norm": 0.703125, "learning_rate": 0.0019249153804103076, "loss": 0.1922, "step": 13442 }, { "epoch": 0.02383732321257754, "grad_norm": 0.62890625, "learning_rate": 0.0019248915789910174, "loss": 0.2622, "step": 13444 }, { "epoch": 0.023840869377887354, "grad_norm": 0.451171875, "learning_rate": 0.0019248677739641313, "loss": 0.2145, "step": 13446 }, { "epoch": 0.02384441554319717, "grad_norm": 1.0703125, "learning_rate": 0.0019248439653297524, "loss": 0.2629, "step": 13448 }, { "epoch": 0.023847961708506983, "grad_norm": 3.296875, "learning_rate": 0.0019248201530879853, "loss": 0.4618, "step": 13450 }, { "epoch": 0.0238515078738168, "grad_norm": 1.890625, "learning_rate": 0.0019247963372389337, "loss": 0.2324, "step": 13452 }, { "epoch": 0.023855054039126616, "grad_norm": 0.5234375, "learning_rate": 0.0019247725177827023, "loss": 0.238, "step": 13454 }, { "epoch": 0.02385860020443643, "grad_norm": 0.31640625, "learning_rate": 0.0019247486947193946, "loss": 0.1965, "step": 13456 }, { "epoch": 0.023862146369746245, "grad_norm": 0.765625, "learning_rate": 0.0019247248680491155, "loss": 0.4042, "step": 13458 }, { "epoch": 0.02386569253505606, "grad_norm": 0.8984375, "learning_rate": 0.001924701037771969, "loss": 0.2071, "step": 13460 }, { "epoch": 0.023869238700365874, "grad_norm": 0.828125, "learning_rate": 0.001924677203888059, "loss": 0.2464, "step": 13462 }, { "epoch": 0.023872784865675692, "grad_norm": 0.21484375, "learning_rate": 0.00192465336639749, "loss": 0.1901, "step": 13464 }, { "epoch": 0.023876331030985506, "grad_norm": 0.298828125, "learning_rate": 0.001924629525300366, "loss": 0.1725, "step": 13466 }, { "epoch": 0.02387987719629532, "grad_norm": 1.6015625, "learning_rate": 0.0019246056805967916, "loss": 0.2647, "step": 13468 }, { "epoch": 0.023883423361605136, "grad_norm": 0.84375, "learning_rate": 0.0019245818322868708, "loss": 0.2205, "step": 13470 }, { "epoch": 0.02388696952691495, "grad_norm": 0.75, "learning_rate": 0.0019245579803707086, "loss": 0.3807, "step": 13472 }, { "epoch": 0.023890515692224768, "grad_norm": 1.234375, "learning_rate": 0.001924534124848408, "loss": 0.3317, "step": 13474 }, { "epoch": 0.023894061857534583, "grad_norm": 1.0078125, "learning_rate": 0.0019245102657200746, "loss": 0.2225, "step": 13476 }, { "epoch": 0.023897608022844397, "grad_norm": 0.41015625, "learning_rate": 0.001924486402985812, "loss": 0.173, "step": 13478 }, { "epoch": 0.023901154188154212, "grad_norm": 0.62109375, "learning_rate": 0.0019244625366457249, "loss": 0.2371, "step": 13480 }, { "epoch": 0.023904700353464026, "grad_norm": 0.7265625, "learning_rate": 0.0019244386666999174, "loss": 0.22, "step": 13482 }, { "epoch": 0.02390824651877384, "grad_norm": 0.625, "learning_rate": 0.001924414793148494, "loss": 0.4865, "step": 13484 }, { "epoch": 0.02391179268408366, "grad_norm": 0.29296875, "learning_rate": 0.0019243909159915597, "loss": 0.2268, "step": 13486 }, { "epoch": 0.023915338849393473, "grad_norm": 0.6796875, "learning_rate": 0.0019243670352292182, "loss": 0.2388, "step": 13488 }, { "epoch": 0.023918885014703288, "grad_norm": 0.515625, "learning_rate": 0.0019243431508615737, "loss": 0.193, "step": 13490 }, { "epoch": 0.023922431180013103, "grad_norm": 1.625, "learning_rate": 0.0019243192628887315, "loss": 0.2962, "step": 13492 }, { "epoch": 0.023925977345322917, "grad_norm": 1.546875, "learning_rate": 0.0019242953713107954, "loss": 0.2897, "step": 13494 }, { "epoch": 0.02392952351063273, "grad_norm": 0.283203125, "learning_rate": 0.0019242714761278704, "loss": 0.2944, "step": 13496 }, { "epoch": 0.02393306967594255, "grad_norm": 0.69921875, "learning_rate": 0.0019242475773400608, "loss": 0.1719, "step": 13498 }, { "epoch": 0.023936615841252364, "grad_norm": 0.478515625, "learning_rate": 0.0019242236749474706, "loss": 0.2781, "step": 13500 }, { "epoch": 0.02394016200656218, "grad_norm": 0.400390625, "learning_rate": 0.0019241997689502053, "loss": 0.2557, "step": 13502 }, { "epoch": 0.023943708171871993, "grad_norm": 0.400390625, "learning_rate": 0.001924175859348369, "loss": 0.2425, "step": 13504 }, { "epoch": 0.023947254337181808, "grad_norm": 0.77734375, "learning_rate": 0.0019241519461420658, "loss": 0.4107, "step": 13506 }, { "epoch": 0.023950800502491626, "grad_norm": 0.98828125, "learning_rate": 0.0019241280293314012, "loss": 0.2165, "step": 13508 }, { "epoch": 0.02395434666780144, "grad_norm": 0.423828125, "learning_rate": 0.0019241041089164787, "loss": 0.2631, "step": 13510 }, { "epoch": 0.023957892833111255, "grad_norm": 0.75, "learning_rate": 0.0019240801848974041, "loss": 0.2622, "step": 13512 }, { "epoch": 0.02396143899842107, "grad_norm": 0.265625, "learning_rate": 0.0019240562572742812, "loss": 0.2273, "step": 13514 }, { "epoch": 0.023964985163730884, "grad_norm": 5.40625, "learning_rate": 0.0019240323260472148, "loss": 0.3926, "step": 13516 }, { "epoch": 0.0239685313290407, "grad_norm": 1.7890625, "learning_rate": 0.00192400839121631, "loss": 0.4296, "step": 13518 }, { "epoch": 0.023972077494350517, "grad_norm": 0.46875, "learning_rate": 0.001923984452781671, "loss": 0.3169, "step": 13520 }, { "epoch": 0.02397562365966033, "grad_norm": 2.71875, "learning_rate": 0.0019239605107434024, "loss": 0.2553, "step": 13522 }, { "epoch": 0.023979169824970146, "grad_norm": 0.69140625, "learning_rate": 0.0019239365651016092, "loss": 0.1952, "step": 13524 }, { "epoch": 0.02398271599027996, "grad_norm": 0.75, "learning_rate": 0.001923912615856396, "loss": 0.253, "step": 13526 }, { "epoch": 0.023986262155589775, "grad_norm": 0.55859375, "learning_rate": 0.0019238886630078681, "loss": 0.179, "step": 13528 }, { "epoch": 0.02398980832089959, "grad_norm": 0.326171875, "learning_rate": 0.0019238647065561295, "loss": 0.2136, "step": 13530 }, { "epoch": 0.023993354486209408, "grad_norm": 0.2060546875, "learning_rate": 0.001923840746501285, "loss": 0.184, "step": 13532 }, { "epoch": 0.023996900651519222, "grad_norm": 20.5, "learning_rate": 0.0019238167828434399, "loss": 0.2101, "step": 13534 }, { "epoch": 0.024000446816829037, "grad_norm": 0.71484375, "learning_rate": 0.0019237928155826985, "loss": 0.1452, "step": 13536 }, { "epoch": 0.02400399298213885, "grad_norm": 2.53125, "learning_rate": 0.001923768844719166, "loss": 0.2051, "step": 13538 }, { "epoch": 0.024007539147448666, "grad_norm": 2.671875, "learning_rate": 0.001923744870252947, "loss": 0.3055, "step": 13540 }, { "epoch": 0.024011085312758484, "grad_norm": 0.49609375, "learning_rate": 0.0019237208921841465, "loss": 0.2392, "step": 13542 }, { "epoch": 0.0240146314780683, "grad_norm": 0.51171875, "learning_rate": 0.0019236969105128692, "loss": 0.6113, "step": 13544 }, { "epoch": 0.024018177643378113, "grad_norm": 0.64453125, "learning_rate": 0.00192367292523922, "loss": 0.2406, "step": 13546 }, { "epoch": 0.024021723808687927, "grad_norm": 0.5, "learning_rate": 0.001923648936363304, "loss": 0.2404, "step": 13548 }, { "epoch": 0.024025269973997742, "grad_norm": 0.41796875, "learning_rate": 0.001923624943885226, "loss": 0.2664, "step": 13550 }, { "epoch": 0.024028816139307557, "grad_norm": 3.453125, "learning_rate": 0.001923600947805091, "loss": 0.2874, "step": 13552 }, { "epoch": 0.024032362304617375, "grad_norm": 0.88671875, "learning_rate": 0.0019235769481230037, "loss": 0.244, "step": 13554 }, { "epoch": 0.02403590846992719, "grad_norm": 0.1953125, "learning_rate": 0.0019235529448390694, "loss": 0.1692, "step": 13556 }, { "epoch": 0.024039454635237004, "grad_norm": 0.365234375, "learning_rate": 0.0019235289379533928, "loss": 0.2704, "step": 13558 }, { "epoch": 0.024043000800546818, "grad_norm": 0.2470703125, "learning_rate": 0.0019235049274660787, "loss": 0.181, "step": 13560 }, { "epoch": 0.024046546965856633, "grad_norm": 0.296875, "learning_rate": 0.0019234809133772326, "loss": 0.1587, "step": 13562 }, { "epoch": 0.024050093131166447, "grad_norm": 0.48046875, "learning_rate": 0.0019234568956869595, "loss": 0.3816, "step": 13564 }, { "epoch": 0.024053639296476265, "grad_norm": 0.984375, "learning_rate": 0.0019234328743953642, "loss": 0.3782, "step": 13566 }, { "epoch": 0.02405718546178608, "grad_norm": 0.443359375, "learning_rate": 0.0019234088495025518, "loss": 0.2962, "step": 13568 }, { "epoch": 0.024060731627095894, "grad_norm": 0.490234375, "learning_rate": 0.0019233848210086272, "loss": 0.1817, "step": 13570 }, { "epoch": 0.02406427779240571, "grad_norm": 1.15625, "learning_rate": 0.0019233607889136957, "loss": 0.2801, "step": 13572 }, { "epoch": 0.024067823957715524, "grad_norm": 1.0, "learning_rate": 0.0019233367532178625, "loss": 0.3423, "step": 13574 }, { "epoch": 0.02407137012302534, "grad_norm": 0.36328125, "learning_rate": 0.0019233127139212326, "loss": 0.2034, "step": 13576 }, { "epoch": 0.024074916288335156, "grad_norm": 4.71875, "learning_rate": 0.0019232886710239111, "loss": 0.4523, "step": 13578 }, { "epoch": 0.02407846245364497, "grad_norm": 0.40234375, "learning_rate": 0.001923264624526003, "loss": 0.2173, "step": 13580 }, { "epoch": 0.024082008618954785, "grad_norm": 0.34765625, "learning_rate": 0.0019232405744276139, "loss": 0.2106, "step": 13582 }, { "epoch": 0.0240855547842646, "grad_norm": 1.5390625, "learning_rate": 0.0019232165207288487, "loss": 0.2558, "step": 13584 }, { "epoch": 0.024089100949574414, "grad_norm": 0.98046875, "learning_rate": 0.0019231924634298123, "loss": 0.2368, "step": 13586 }, { "epoch": 0.024092647114884232, "grad_norm": 0.3359375, "learning_rate": 0.0019231684025306107, "loss": 0.3319, "step": 13588 }, { "epoch": 0.024096193280194047, "grad_norm": 0.7109375, "learning_rate": 0.0019231443380313483, "loss": 0.3243, "step": 13590 }, { "epoch": 0.02409973944550386, "grad_norm": 1.4375, "learning_rate": 0.0019231202699321309, "loss": 0.2435, "step": 13592 }, { "epoch": 0.024103285610813676, "grad_norm": 0.57421875, "learning_rate": 0.0019230961982330634, "loss": 0.2668, "step": 13594 }, { "epoch": 0.02410683177612349, "grad_norm": 0.470703125, "learning_rate": 0.0019230721229342512, "loss": 0.212, "step": 13596 }, { "epoch": 0.024110377941433305, "grad_norm": 0.34375, "learning_rate": 0.0019230480440358, "loss": 0.2123, "step": 13598 }, { "epoch": 0.024113924106743123, "grad_norm": 0.470703125, "learning_rate": 0.0019230239615378145, "loss": 0.2428, "step": 13600 }, { "epoch": 0.024117470272052938, "grad_norm": 0.2734375, "learning_rate": 0.0019229998754404002, "loss": 0.466, "step": 13602 }, { "epoch": 0.024121016437362752, "grad_norm": 0.53125, "learning_rate": 0.0019229757857436628, "loss": 0.2363, "step": 13604 }, { "epoch": 0.024124562602672567, "grad_norm": 0.357421875, "learning_rate": 0.0019229516924477068, "loss": 0.2132, "step": 13606 }, { "epoch": 0.02412810876798238, "grad_norm": 0.73046875, "learning_rate": 0.0019229275955526387, "loss": 0.2369, "step": 13608 }, { "epoch": 0.0241316549332922, "grad_norm": 0.52734375, "learning_rate": 0.001922903495058563, "loss": 0.1827, "step": 13610 }, { "epoch": 0.024135201098602014, "grad_norm": 0.66015625, "learning_rate": 0.0019228793909655856, "loss": 0.2831, "step": 13612 }, { "epoch": 0.02413874726391183, "grad_norm": 0.4453125, "learning_rate": 0.0019228552832738115, "loss": 0.2283, "step": 13614 }, { "epoch": 0.024142293429221643, "grad_norm": 0.326171875, "learning_rate": 0.0019228311719833465, "loss": 0.1682, "step": 13616 }, { "epoch": 0.024145839594531458, "grad_norm": 0.3984375, "learning_rate": 0.0019228070570942958, "loss": 0.1725, "step": 13618 }, { "epoch": 0.024149385759841272, "grad_norm": 0.90234375, "learning_rate": 0.001922782938606765, "loss": 0.1719, "step": 13620 }, { "epoch": 0.02415293192515109, "grad_norm": 0.64453125, "learning_rate": 0.00192275881652086, "loss": 0.2025, "step": 13622 }, { "epoch": 0.024156478090460905, "grad_norm": 1.2109375, "learning_rate": 0.0019227346908366851, "loss": 0.2257, "step": 13624 }, { "epoch": 0.02416002425577072, "grad_norm": 0.55859375, "learning_rate": 0.001922710561554347, "loss": 0.2611, "step": 13626 }, { "epoch": 0.024163570421080534, "grad_norm": 0.94921875, "learning_rate": 0.0019226864286739508, "loss": 0.2491, "step": 13628 }, { "epoch": 0.02416711658639035, "grad_norm": 1.6484375, "learning_rate": 0.0019226622921956018, "loss": 0.1742, "step": 13630 }, { "epoch": 0.024170662751700163, "grad_norm": 0.640625, "learning_rate": 0.0019226381521194058, "loss": 0.2892, "step": 13632 }, { "epoch": 0.02417420891700998, "grad_norm": 0.87890625, "learning_rate": 0.0019226140084454687, "loss": 0.2418, "step": 13634 }, { "epoch": 0.024177755082319796, "grad_norm": 0.330078125, "learning_rate": 0.0019225898611738954, "loss": 0.1863, "step": 13636 }, { "epoch": 0.02418130124762961, "grad_norm": 0.408203125, "learning_rate": 0.0019225657103047922, "loss": 0.1864, "step": 13638 }, { "epoch": 0.024184847412939425, "grad_norm": 0.6484375, "learning_rate": 0.0019225415558382641, "loss": 0.2033, "step": 13640 }, { "epoch": 0.02418839357824924, "grad_norm": 0.8828125, "learning_rate": 0.0019225173977744172, "loss": 0.2221, "step": 13642 }, { "epoch": 0.024191939743559057, "grad_norm": 0.75, "learning_rate": 0.001922493236113357, "loss": 0.2803, "step": 13644 }, { "epoch": 0.024195485908868872, "grad_norm": 0.498046875, "learning_rate": 0.0019224690708551891, "loss": 0.2529, "step": 13646 }, { "epoch": 0.024199032074178686, "grad_norm": 0.59375, "learning_rate": 0.0019224449020000189, "loss": 0.2156, "step": 13648 }, { "epoch": 0.0242025782394885, "grad_norm": 2.078125, "learning_rate": 0.001922420729547953, "loss": 0.2818, "step": 13650 }, { "epoch": 0.024206124404798315, "grad_norm": 0.2578125, "learning_rate": 0.0019223965534990962, "loss": 0.3445, "step": 13652 }, { "epoch": 0.02420967057010813, "grad_norm": 1.9296875, "learning_rate": 0.0019223723738535546, "loss": 0.2911, "step": 13654 }, { "epoch": 0.024213216735417948, "grad_norm": 1.9765625, "learning_rate": 0.001922348190611434, "loss": 0.4146, "step": 13656 }, { "epoch": 0.024216762900727763, "grad_norm": 0.6875, "learning_rate": 0.00192232400377284, "loss": 0.2885, "step": 13658 }, { "epoch": 0.024220309066037577, "grad_norm": 0.77734375, "learning_rate": 0.0019222998133378785, "loss": 0.2697, "step": 13660 }, { "epoch": 0.02422385523134739, "grad_norm": 3.90625, "learning_rate": 0.0019222756193066552, "loss": 0.1908, "step": 13662 }, { "epoch": 0.024227401396657206, "grad_norm": 0.439453125, "learning_rate": 0.0019222514216792762, "loss": 0.3814, "step": 13664 }, { "epoch": 0.02423094756196702, "grad_norm": 2.34375, "learning_rate": 0.001922227220455847, "loss": 0.3804, "step": 13666 }, { "epoch": 0.02423449372727684, "grad_norm": 0.275390625, "learning_rate": 0.0019222030156364736, "loss": 0.158, "step": 13668 }, { "epoch": 0.024238039892586653, "grad_norm": 1.40625, "learning_rate": 0.0019221788072212615, "loss": 0.2145, "step": 13670 }, { "epoch": 0.024241586057896468, "grad_norm": 0.6171875, "learning_rate": 0.0019221545952103174, "loss": 0.2379, "step": 13672 }, { "epoch": 0.024245132223206282, "grad_norm": 0.5703125, "learning_rate": 0.0019221303796037464, "loss": 0.2329, "step": 13674 }, { "epoch": 0.024248678388516097, "grad_norm": 0.34765625, "learning_rate": 0.0019221061604016544, "loss": 0.2512, "step": 13676 }, { "epoch": 0.024252224553825915, "grad_norm": 1.4375, "learning_rate": 0.001922081937604148, "loss": 0.1933, "step": 13678 }, { "epoch": 0.02425577071913573, "grad_norm": 0.404296875, "learning_rate": 0.0019220577112113325, "loss": 0.2023, "step": 13680 }, { "epoch": 0.024259316884445544, "grad_norm": 1.21875, "learning_rate": 0.001922033481223314, "loss": 0.277, "step": 13682 }, { "epoch": 0.02426286304975536, "grad_norm": 2.71875, "learning_rate": 0.0019220092476401988, "loss": 0.2647, "step": 13684 }, { "epoch": 0.024266409215065173, "grad_norm": 1.1015625, "learning_rate": 0.0019219850104620925, "loss": 0.281, "step": 13686 }, { "epoch": 0.024269955380374988, "grad_norm": 1.7890625, "learning_rate": 0.0019219607696891012, "loss": 0.3359, "step": 13688 }, { "epoch": 0.024273501545684806, "grad_norm": 0.74609375, "learning_rate": 0.001921936525321331, "loss": 0.3584, "step": 13690 }, { "epoch": 0.02427704771099462, "grad_norm": 0.1513671875, "learning_rate": 0.001921912277358888, "loss": 0.1822, "step": 13692 }, { "epoch": 0.024280593876304435, "grad_norm": 0.5, "learning_rate": 0.0019218880258018778, "loss": 0.2041, "step": 13694 }, { "epoch": 0.02428414004161425, "grad_norm": 0.70703125, "learning_rate": 0.0019218637706504072, "loss": 0.2228, "step": 13696 }, { "epoch": 0.024287686206924064, "grad_norm": 0.30859375, "learning_rate": 0.0019218395119045815, "loss": 0.1759, "step": 13698 }, { "epoch": 0.02429123237223388, "grad_norm": 0.828125, "learning_rate": 0.0019218152495645074, "loss": 0.2733, "step": 13700 }, { "epoch": 0.024294778537543697, "grad_norm": 1.0859375, "learning_rate": 0.0019217909836302905, "loss": 0.1805, "step": 13702 }, { "epoch": 0.02429832470285351, "grad_norm": 0.3046875, "learning_rate": 0.0019217667141020374, "loss": 0.2562, "step": 13704 }, { "epoch": 0.024301870868163326, "grad_norm": 0.359375, "learning_rate": 0.0019217424409798541, "loss": 0.247, "step": 13706 }, { "epoch": 0.02430541703347314, "grad_norm": 0.251953125, "learning_rate": 0.0019217181642638467, "loss": 0.2947, "step": 13708 }, { "epoch": 0.024308963198782955, "grad_norm": 0.7890625, "learning_rate": 0.001921693883954121, "loss": 0.2822, "step": 13710 }, { "epoch": 0.024312509364092773, "grad_norm": 1.5234375, "learning_rate": 0.0019216696000507838, "loss": 0.2057, "step": 13712 }, { "epoch": 0.024316055529402587, "grad_norm": 0.515625, "learning_rate": 0.001921645312553941, "loss": 0.2415, "step": 13714 }, { "epoch": 0.024319601694712402, "grad_norm": 0.228515625, "learning_rate": 0.001921621021463699, "loss": 0.1979, "step": 13716 }, { "epoch": 0.024323147860022216, "grad_norm": 0.5546875, "learning_rate": 0.0019215967267801636, "loss": 0.2427, "step": 13718 }, { "epoch": 0.02432669402533203, "grad_norm": 3.890625, "learning_rate": 0.0019215724285034418, "loss": 0.6283, "step": 13720 }, { "epoch": 0.024330240190641846, "grad_norm": 0.326171875, "learning_rate": 0.001921548126633639, "loss": 0.2413, "step": 13722 }, { "epoch": 0.024333786355951664, "grad_norm": 0.76171875, "learning_rate": 0.001921523821170862, "loss": 0.283, "step": 13724 }, { "epoch": 0.024337332521261478, "grad_norm": 0.40625, "learning_rate": 0.0019214995121152173, "loss": 0.2244, "step": 13726 }, { "epoch": 0.024340878686571293, "grad_norm": 1.09375, "learning_rate": 0.0019214751994668107, "loss": 0.207, "step": 13728 }, { "epoch": 0.024344424851881107, "grad_norm": 0.6015625, "learning_rate": 0.0019214508832257487, "loss": 0.2805, "step": 13730 }, { "epoch": 0.024347971017190922, "grad_norm": 0.73828125, "learning_rate": 0.0019214265633921378, "loss": 0.2206, "step": 13732 }, { "epoch": 0.024351517182500736, "grad_norm": 0.47265625, "learning_rate": 0.0019214022399660841, "loss": 0.3517, "step": 13734 }, { "epoch": 0.024355063347810554, "grad_norm": 0.453125, "learning_rate": 0.0019213779129476944, "loss": 0.2401, "step": 13736 }, { "epoch": 0.02435860951312037, "grad_norm": 0.8515625, "learning_rate": 0.0019213535823370744, "loss": 0.2, "step": 13738 }, { "epoch": 0.024362155678430183, "grad_norm": 0.478515625, "learning_rate": 0.0019213292481343315, "loss": 0.2087, "step": 13740 }, { "epoch": 0.024365701843739998, "grad_norm": 0.8671875, "learning_rate": 0.001921304910339571, "loss": 0.2439, "step": 13742 }, { "epoch": 0.024369248009049813, "grad_norm": 0.515625, "learning_rate": 0.0019212805689529002, "loss": 0.2096, "step": 13744 }, { "epoch": 0.02437279417435963, "grad_norm": 0.9296875, "learning_rate": 0.001921256223974425, "loss": 0.2542, "step": 13746 }, { "epoch": 0.024376340339669445, "grad_norm": 0.63671875, "learning_rate": 0.0019212318754042524, "loss": 0.3706, "step": 13748 }, { "epoch": 0.02437988650497926, "grad_norm": 1.4453125, "learning_rate": 0.0019212075232424887, "loss": 0.3073, "step": 13750 }, { "epoch": 0.024383432670289074, "grad_norm": 1.2890625, "learning_rate": 0.00192118316748924, "loss": 0.2408, "step": 13752 }, { "epoch": 0.02438697883559889, "grad_norm": 0.68359375, "learning_rate": 0.0019211588081446131, "loss": 0.2567, "step": 13754 }, { "epoch": 0.024390525000908703, "grad_norm": 0.474609375, "learning_rate": 0.0019211344452087149, "loss": 0.2415, "step": 13756 }, { "epoch": 0.02439407116621852, "grad_norm": 1.53125, "learning_rate": 0.0019211100786816514, "loss": 0.3627, "step": 13758 }, { "epoch": 0.024397617331528336, "grad_norm": 0.65625, "learning_rate": 0.0019210857085635294, "loss": 0.2069, "step": 13760 }, { "epoch": 0.02440116349683815, "grad_norm": 0.60546875, "learning_rate": 0.0019210613348544555, "loss": 0.1754, "step": 13762 }, { "epoch": 0.024404709662147965, "grad_norm": 1.6640625, "learning_rate": 0.001921036957554536, "loss": 0.254, "step": 13764 }, { "epoch": 0.02440825582745778, "grad_norm": 1.4765625, "learning_rate": 0.001921012576663878, "loss": 0.4525, "step": 13766 }, { "epoch": 0.024411801992767594, "grad_norm": 0.46875, "learning_rate": 0.0019209881921825878, "loss": 0.2479, "step": 13768 }, { "epoch": 0.024415348158077412, "grad_norm": 0.384765625, "learning_rate": 0.0019209638041107722, "loss": 0.2775, "step": 13770 }, { "epoch": 0.024418894323387227, "grad_norm": 0.5625, "learning_rate": 0.0019209394124485378, "loss": 0.3512, "step": 13772 }, { "epoch": 0.02442244048869704, "grad_norm": 0.58203125, "learning_rate": 0.001920915017195991, "loss": 0.2219, "step": 13774 }, { "epoch": 0.024425986654006856, "grad_norm": 0.251953125, "learning_rate": 0.001920890618353239, "loss": 0.2009, "step": 13776 }, { "epoch": 0.02442953281931667, "grad_norm": 0.56640625, "learning_rate": 0.0019208662159203884, "loss": 0.2186, "step": 13778 }, { "epoch": 0.02443307898462649, "grad_norm": 0.82421875, "learning_rate": 0.0019208418098975457, "loss": 0.2147, "step": 13780 }, { "epoch": 0.024436625149936303, "grad_norm": 0.57421875, "learning_rate": 0.0019208174002848177, "loss": 0.2648, "step": 13782 }, { "epoch": 0.024440171315246118, "grad_norm": 0.380859375, "learning_rate": 0.001920792987082311, "loss": 0.1901, "step": 13784 }, { "epoch": 0.024443717480555932, "grad_norm": 0.63671875, "learning_rate": 0.001920768570290133, "loss": 0.1862, "step": 13786 }, { "epoch": 0.024447263645865747, "grad_norm": 0.5078125, "learning_rate": 0.0019207441499083896, "loss": 0.1555, "step": 13788 }, { "epoch": 0.02445080981117556, "grad_norm": 0.470703125, "learning_rate": 0.0019207197259371882, "loss": 0.1781, "step": 13790 }, { "epoch": 0.02445435597648538, "grad_norm": 1.0859375, "learning_rate": 0.0019206952983766353, "loss": 0.2094, "step": 13792 }, { "epoch": 0.024457902141795194, "grad_norm": 1.484375, "learning_rate": 0.0019206708672268378, "loss": 0.3294, "step": 13794 }, { "epoch": 0.02446144830710501, "grad_norm": 0.3359375, "learning_rate": 0.001920646432487903, "loss": 0.3318, "step": 13796 }, { "epoch": 0.024464994472414823, "grad_norm": 0.365234375, "learning_rate": 0.0019206219941599373, "loss": 0.2351, "step": 13798 }, { "epoch": 0.024468540637724637, "grad_norm": 0.69140625, "learning_rate": 0.0019205975522430475, "loss": 0.2617, "step": 13800 }, { "epoch": 0.024472086803034452, "grad_norm": 0.306640625, "learning_rate": 0.0019205731067373406, "loss": 0.231, "step": 13802 }, { "epoch": 0.02447563296834427, "grad_norm": 5.03125, "learning_rate": 0.001920548657642924, "loss": 0.2353, "step": 13804 }, { "epoch": 0.024479179133654085, "grad_norm": 0.8671875, "learning_rate": 0.001920524204959904, "loss": 0.2762, "step": 13806 }, { "epoch": 0.0244827252989639, "grad_norm": 0.267578125, "learning_rate": 0.0019204997486883875, "loss": 0.1902, "step": 13808 }, { "epoch": 0.024486271464273714, "grad_norm": 0.6171875, "learning_rate": 0.0019204752888284818, "loss": 0.296, "step": 13810 }, { "epoch": 0.024489817629583528, "grad_norm": 0.345703125, "learning_rate": 0.0019204508253802936, "loss": 0.1988, "step": 13812 }, { "epoch": 0.024493363794893346, "grad_norm": 0.58984375, "learning_rate": 0.0019204263583439305, "loss": 0.2514, "step": 13814 }, { "epoch": 0.02449690996020316, "grad_norm": 0.341796875, "learning_rate": 0.001920401887719499, "loss": 0.2111, "step": 13816 }, { "epoch": 0.024500456125512975, "grad_norm": 0.40625, "learning_rate": 0.0019203774135071062, "loss": 0.2105, "step": 13818 }, { "epoch": 0.02450400229082279, "grad_norm": 1.09375, "learning_rate": 0.001920352935706859, "loss": 0.3845, "step": 13820 }, { "epoch": 0.024507548456132604, "grad_norm": 0.486328125, "learning_rate": 0.0019203284543188647, "loss": 0.2376, "step": 13822 }, { "epoch": 0.02451109462144242, "grad_norm": 0.455078125, "learning_rate": 0.00192030396934323, "loss": 0.1809, "step": 13824 }, { "epoch": 0.024514640786752237, "grad_norm": 0.29296875, "learning_rate": 0.0019202794807800622, "loss": 0.2303, "step": 13826 }, { "epoch": 0.02451818695206205, "grad_norm": 1.3046875, "learning_rate": 0.0019202549886294686, "loss": 0.2005, "step": 13828 }, { "epoch": 0.024521733117371866, "grad_norm": 0.90625, "learning_rate": 0.0019202304928915561, "loss": 0.1902, "step": 13830 }, { "epoch": 0.02452527928268168, "grad_norm": 0.341796875, "learning_rate": 0.001920205993566432, "loss": 0.1995, "step": 13832 }, { "epoch": 0.024528825447991495, "grad_norm": 0.53515625, "learning_rate": 0.0019201814906542032, "loss": 0.1956, "step": 13834 }, { "epoch": 0.02453237161330131, "grad_norm": 0.33203125, "learning_rate": 0.0019201569841549768, "loss": 0.2358, "step": 13836 }, { "epoch": 0.024535917778611128, "grad_norm": 1.3984375, "learning_rate": 0.0019201324740688605, "loss": 0.3039, "step": 13838 }, { "epoch": 0.024539463943920942, "grad_norm": 0.40234375, "learning_rate": 0.001920107960395961, "loss": 0.1798, "step": 13840 }, { "epoch": 0.024543010109230757, "grad_norm": 0.86328125, "learning_rate": 0.0019200834431363856, "loss": 0.3259, "step": 13842 }, { "epoch": 0.02454655627454057, "grad_norm": 0.2109375, "learning_rate": 0.0019200589222902417, "loss": 0.2328, "step": 13844 }, { "epoch": 0.024550102439850386, "grad_norm": 0.1435546875, "learning_rate": 0.0019200343978576363, "loss": 0.3027, "step": 13846 }, { "epoch": 0.024553648605160204, "grad_norm": 0.5703125, "learning_rate": 0.0019200098698386769, "loss": 0.2139, "step": 13848 }, { "epoch": 0.02455719477047002, "grad_norm": 1.2578125, "learning_rate": 0.0019199853382334702, "loss": 0.338, "step": 13850 }, { "epoch": 0.024560740935779833, "grad_norm": 0.51953125, "learning_rate": 0.0019199608030421244, "loss": 0.2449, "step": 13852 }, { "epoch": 0.024564287101089648, "grad_norm": 0.58984375, "learning_rate": 0.0019199362642647465, "loss": 0.1955, "step": 13854 }, { "epoch": 0.024567833266399462, "grad_norm": 0.9453125, "learning_rate": 0.0019199117219014432, "loss": 0.2068, "step": 13856 }, { "epoch": 0.024571379431709277, "grad_norm": 0.26953125, "learning_rate": 0.0019198871759523225, "loss": 0.2273, "step": 13858 }, { "epoch": 0.024574925597019095, "grad_norm": 0.703125, "learning_rate": 0.0019198626264174915, "loss": 0.235, "step": 13860 }, { "epoch": 0.02457847176232891, "grad_norm": 0.46484375, "learning_rate": 0.0019198380732970578, "loss": 0.2033, "step": 13862 }, { "epoch": 0.024582017927638724, "grad_norm": 1.1015625, "learning_rate": 0.0019198135165911282, "loss": 0.2702, "step": 13864 }, { "epoch": 0.02458556409294854, "grad_norm": 0.443359375, "learning_rate": 0.0019197889562998108, "loss": 0.1868, "step": 13866 }, { "epoch": 0.024589110258258353, "grad_norm": 0.97265625, "learning_rate": 0.0019197643924232127, "loss": 0.2834, "step": 13868 }, { "epoch": 0.024592656423568168, "grad_norm": 0.341796875, "learning_rate": 0.0019197398249614411, "loss": 0.3313, "step": 13870 }, { "epoch": 0.024596202588877986, "grad_norm": 0.474609375, "learning_rate": 0.001919715253914604, "loss": 0.2846, "step": 13872 }, { "epoch": 0.0245997487541878, "grad_norm": 0.59375, "learning_rate": 0.0019196906792828081, "loss": 0.2002, "step": 13874 }, { "epoch": 0.024603294919497615, "grad_norm": 0.5703125, "learning_rate": 0.0019196661010661615, "loss": 0.2345, "step": 13876 }, { "epoch": 0.02460684108480743, "grad_norm": 0.9765625, "learning_rate": 0.0019196415192647715, "loss": 0.3253, "step": 13878 }, { "epoch": 0.024610387250117244, "grad_norm": 0.283203125, "learning_rate": 0.0019196169338787457, "loss": 0.1902, "step": 13880 }, { "epoch": 0.024613933415427062, "grad_norm": 0.6171875, "learning_rate": 0.0019195923449081913, "loss": 0.2404, "step": 13882 }, { "epoch": 0.024617479580736876, "grad_norm": 1.0234375, "learning_rate": 0.0019195677523532162, "loss": 0.4937, "step": 13884 }, { "epoch": 0.02462102574604669, "grad_norm": 0.41796875, "learning_rate": 0.0019195431562139278, "loss": 0.1941, "step": 13886 }, { "epoch": 0.024624571911356505, "grad_norm": 0.314453125, "learning_rate": 0.0019195185564904336, "loss": 0.2128, "step": 13888 }, { "epoch": 0.02462811807666632, "grad_norm": 0.341796875, "learning_rate": 0.0019194939531828414, "loss": 0.2343, "step": 13890 }, { "epoch": 0.024631664241976135, "grad_norm": 0.2734375, "learning_rate": 0.0019194693462912585, "loss": 0.2266, "step": 13892 }, { "epoch": 0.024635210407285953, "grad_norm": 0.53515625, "learning_rate": 0.001919444735815793, "loss": 0.2116, "step": 13894 }, { "epoch": 0.024638756572595767, "grad_norm": 0.470703125, "learning_rate": 0.001919420121756552, "loss": 0.2475, "step": 13896 }, { "epoch": 0.02464230273790558, "grad_norm": 0.263671875, "learning_rate": 0.0019193955041136434, "loss": 0.2494, "step": 13898 }, { "epoch": 0.024645848903215396, "grad_norm": 0.96484375, "learning_rate": 0.0019193708828871747, "loss": 0.2323, "step": 13900 }, { "epoch": 0.02464939506852521, "grad_norm": 0.8671875, "learning_rate": 0.0019193462580772537, "loss": 0.1967, "step": 13902 }, { "epoch": 0.024652941233835025, "grad_norm": 0.52734375, "learning_rate": 0.0019193216296839884, "loss": 0.2507, "step": 13904 }, { "epoch": 0.024656487399144843, "grad_norm": 1.0625, "learning_rate": 0.0019192969977074859, "loss": 0.221, "step": 13906 }, { "epoch": 0.024660033564454658, "grad_norm": 1.4765625, "learning_rate": 0.0019192723621478545, "loss": 0.2676, "step": 13908 }, { "epoch": 0.024663579729764473, "grad_norm": 0.345703125, "learning_rate": 0.0019192477230052015, "loss": 0.2427, "step": 13910 }, { "epoch": 0.024667125895074287, "grad_norm": 0.85546875, "learning_rate": 0.0019192230802796348, "loss": 0.2562, "step": 13912 }, { "epoch": 0.0246706720603841, "grad_norm": 0.5078125, "learning_rate": 0.0019191984339712626, "loss": 0.2307, "step": 13914 }, { "epoch": 0.02467421822569392, "grad_norm": 0.6328125, "learning_rate": 0.0019191737840801918, "loss": 0.3262, "step": 13916 }, { "epoch": 0.024677764391003734, "grad_norm": 0.30078125, "learning_rate": 0.0019191491306065308, "loss": 0.2705, "step": 13918 }, { "epoch": 0.02468131055631355, "grad_norm": 0.251953125, "learning_rate": 0.0019191244735503872, "loss": 0.1889, "step": 13920 }, { "epoch": 0.024684856721623363, "grad_norm": 0.375, "learning_rate": 0.0019190998129118694, "loss": 0.1917, "step": 13922 }, { "epoch": 0.024688402886933178, "grad_norm": 0.7734375, "learning_rate": 0.0019190751486910845, "loss": 0.2384, "step": 13924 }, { "epoch": 0.024691949052242992, "grad_norm": 0.3671875, "learning_rate": 0.0019190504808881406, "loss": 0.2819, "step": 13926 }, { "epoch": 0.02469549521755281, "grad_norm": 1.03125, "learning_rate": 0.001919025809503146, "loss": 0.3895, "step": 13928 }, { "epoch": 0.024699041382862625, "grad_norm": 0.21875, "learning_rate": 0.001919001134536208, "loss": 0.2031, "step": 13930 }, { "epoch": 0.02470258754817244, "grad_norm": 1.359375, "learning_rate": 0.0019189764559874346, "loss": 0.3032, "step": 13932 }, { "epoch": 0.024706133713482254, "grad_norm": 0.52734375, "learning_rate": 0.0019189517738569344, "loss": 0.2921, "step": 13934 }, { "epoch": 0.02470967987879207, "grad_norm": 0.51953125, "learning_rate": 0.0019189270881448143, "loss": 0.2276, "step": 13936 }, { "epoch": 0.024713226044101883, "grad_norm": 0.2470703125, "learning_rate": 0.001918902398851183, "loss": 0.1609, "step": 13938 }, { "epoch": 0.0247167722094117, "grad_norm": 0.376953125, "learning_rate": 0.0019188777059761485, "loss": 0.2039, "step": 13940 }, { "epoch": 0.024720318374721516, "grad_norm": 0.380859375, "learning_rate": 0.0019188530095198184, "loss": 0.2135, "step": 13942 }, { "epoch": 0.02472386454003133, "grad_norm": 0.6484375, "learning_rate": 0.001918828309482301, "loss": 0.2362, "step": 13944 }, { "epoch": 0.024727410705341145, "grad_norm": 0.85546875, "learning_rate": 0.0019188036058637042, "loss": 0.2801, "step": 13946 }, { "epoch": 0.02473095687065096, "grad_norm": 0.34765625, "learning_rate": 0.0019187788986641358, "loss": 0.2272, "step": 13948 }, { "epoch": 0.024734503035960777, "grad_norm": 2.171875, "learning_rate": 0.0019187541878837042, "loss": 0.4132, "step": 13950 }, { "epoch": 0.024738049201270592, "grad_norm": 0.2392578125, "learning_rate": 0.0019187294735225175, "loss": 0.162, "step": 13952 }, { "epoch": 0.024741595366580407, "grad_norm": 0.208984375, "learning_rate": 0.0019187047555806835, "loss": 0.1601, "step": 13954 }, { "epoch": 0.02474514153189022, "grad_norm": 0.7578125, "learning_rate": 0.0019186800340583105, "loss": 0.3078, "step": 13956 }, { "epoch": 0.024748687697200036, "grad_norm": 0.703125, "learning_rate": 0.0019186553089555068, "loss": 0.2143, "step": 13958 }, { "epoch": 0.02475223386250985, "grad_norm": 0.6171875, "learning_rate": 0.0019186305802723802, "loss": 0.2658, "step": 13960 }, { "epoch": 0.024755780027819668, "grad_norm": 0.412109375, "learning_rate": 0.001918605848009039, "loss": 0.3714, "step": 13962 }, { "epoch": 0.024759326193129483, "grad_norm": 0.63671875, "learning_rate": 0.001918581112165591, "loss": 0.181, "step": 13964 }, { "epoch": 0.024762872358439297, "grad_norm": 0.9765625, "learning_rate": 0.001918556372742145, "loss": 0.2423, "step": 13966 }, { "epoch": 0.024766418523749112, "grad_norm": 0.546875, "learning_rate": 0.0019185316297388086, "loss": 0.3429, "step": 13968 }, { "epoch": 0.024769964689058926, "grad_norm": 0.45703125, "learning_rate": 0.0019185068831556907, "loss": 0.3132, "step": 13970 }, { "epoch": 0.02477351085436874, "grad_norm": 0.3984375, "learning_rate": 0.001918482132992899, "loss": 0.2819, "step": 13972 }, { "epoch": 0.02477705701967856, "grad_norm": 0.7734375, "learning_rate": 0.001918457379250542, "loss": 0.2544, "step": 13974 }, { "epoch": 0.024780603184988374, "grad_norm": 1.6953125, "learning_rate": 0.0019184326219287276, "loss": 0.401, "step": 13976 }, { "epoch": 0.024784149350298188, "grad_norm": 1.09375, "learning_rate": 0.0019184078610275644, "loss": 0.1855, "step": 13978 }, { "epoch": 0.024787695515608003, "grad_norm": 0.5390625, "learning_rate": 0.0019183830965471605, "loss": 0.2471, "step": 13980 }, { "epoch": 0.024791241680917817, "grad_norm": 0.400390625, "learning_rate": 0.0019183583284876247, "loss": 0.2138, "step": 13982 }, { "epoch": 0.024794787846227635, "grad_norm": 0.61328125, "learning_rate": 0.0019183335568490646, "loss": 0.229, "step": 13984 }, { "epoch": 0.02479833401153745, "grad_norm": 0.33984375, "learning_rate": 0.0019183087816315887, "loss": 0.1648, "step": 13986 }, { "epoch": 0.024801880176847264, "grad_norm": 1.7890625, "learning_rate": 0.001918284002835306, "loss": 0.2413, "step": 13988 }, { "epoch": 0.02480542634215708, "grad_norm": 0.37890625, "learning_rate": 0.0019182592204603239, "loss": 0.1872, "step": 13990 }, { "epoch": 0.024808972507466893, "grad_norm": 0.33984375, "learning_rate": 0.0019182344345067515, "loss": 0.2338, "step": 13992 }, { "epoch": 0.024812518672776708, "grad_norm": 0.453125, "learning_rate": 0.001918209644974697, "loss": 0.2351, "step": 13994 }, { "epoch": 0.024816064838086526, "grad_norm": 0.62109375, "learning_rate": 0.0019181848518642689, "loss": 0.2204, "step": 13996 }, { "epoch": 0.02481961100339634, "grad_norm": 0.48046875, "learning_rate": 0.0019181600551755752, "loss": 0.1808, "step": 13998 }, { "epoch": 0.024823157168706155, "grad_norm": 1.390625, "learning_rate": 0.0019181352549087248, "loss": 0.2362, "step": 14000 }, { "epoch": 0.02482670333401597, "grad_norm": 0.267578125, "learning_rate": 0.0019181104510638263, "loss": 0.1877, "step": 14002 }, { "epoch": 0.024830249499325784, "grad_norm": 0.3828125, "learning_rate": 0.0019180856436409874, "loss": 0.2181, "step": 14004 }, { "epoch": 0.0248337956646356, "grad_norm": 0.482421875, "learning_rate": 0.0019180608326403174, "loss": 0.2647, "step": 14006 }, { "epoch": 0.024837341829945417, "grad_norm": 0.72265625, "learning_rate": 0.0019180360180619243, "loss": 0.2128, "step": 14008 }, { "epoch": 0.02484088799525523, "grad_norm": 0.400390625, "learning_rate": 0.0019180111999059172, "loss": 0.2545, "step": 14010 }, { "epoch": 0.024844434160565046, "grad_norm": 1.2265625, "learning_rate": 0.001917986378172404, "loss": 0.2386, "step": 14012 }, { "epoch": 0.02484798032587486, "grad_norm": 0.81640625, "learning_rate": 0.0019179615528614935, "loss": 0.2461, "step": 14014 }, { "epoch": 0.024851526491184675, "grad_norm": 0.232421875, "learning_rate": 0.0019179367239732946, "loss": 0.1619, "step": 14016 }, { "epoch": 0.024855072656494493, "grad_norm": 0.54296875, "learning_rate": 0.0019179118915079152, "loss": 0.5902, "step": 14018 }, { "epoch": 0.024858618821804308, "grad_norm": 0.9609375, "learning_rate": 0.0019178870554654643, "loss": 0.4067, "step": 14020 }, { "epoch": 0.024862164987114122, "grad_norm": 0.38671875, "learning_rate": 0.001917862215846051, "loss": 0.4312, "step": 14022 }, { "epoch": 0.024865711152423937, "grad_norm": 0.46875, "learning_rate": 0.001917837372649783, "loss": 0.1862, "step": 14024 }, { "epoch": 0.02486925731773375, "grad_norm": 2.984375, "learning_rate": 0.0019178125258767693, "loss": 0.5017, "step": 14026 }, { "epoch": 0.024872803483043566, "grad_norm": 0.296875, "learning_rate": 0.0019177876755271188, "loss": 0.1826, "step": 14028 }, { "epoch": 0.024876349648353384, "grad_norm": 0.79296875, "learning_rate": 0.0019177628216009402, "loss": 0.2232, "step": 14030 }, { "epoch": 0.0248798958136632, "grad_norm": 0.51171875, "learning_rate": 0.0019177379640983418, "loss": 0.2982, "step": 14032 }, { "epoch": 0.024883441978973013, "grad_norm": 0.57421875, "learning_rate": 0.0019177131030194328, "loss": 0.2894, "step": 14034 }, { "epoch": 0.024886988144282828, "grad_norm": 0.6015625, "learning_rate": 0.0019176882383643214, "loss": 0.3256, "step": 14036 }, { "epoch": 0.024890534309592642, "grad_norm": 1.484375, "learning_rate": 0.0019176633701331167, "loss": 0.2414, "step": 14038 }, { "epoch": 0.024894080474902457, "grad_norm": 0.30078125, "learning_rate": 0.0019176384983259273, "loss": 0.3114, "step": 14040 }, { "epoch": 0.024897626640212275, "grad_norm": 1.28125, "learning_rate": 0.0019176136229428625, "loss": 0.2717, "step": 14042 }, { "epoch": 0.02490117280552209, "grad_norm": 0.6171875, "learning_rate": 0.00191758874398403, "loss": 0.1346, "step": 14044 }, { "epoch": 0.024904718970831904, "grad_norm": 0.703125, "learning_rate": 0.0019175638614495397, "loss": 0.3922, "step": 14046 }, { "epoch": 0.02490826513614172, "grad_norm": 1.34375, "learning_rate": 0.0019175389753394998, "loss": 0.3447, "step": 14048 }, { "epoch": 0.024911811301451533, "grad_norm": 0.345703125, "learning_rate": 0.0019175140856540192, "loss": 0.1935, "step": 14050 }, { "epoch": 0.02491535746676135, "grad_norm": 0.57421875, "learning_rate": 0.0019174891923932071, "loss": 0.2704, "step": 14052 }, { "epoch": 0.024918903632071165, "grad_norm": 0.365234375, "learning_rate": 0.0019174642955571719, "loss": 0.2024, "step": 14054 }, { "epoch": 0.02492244979738098, "grad_norm": 0.41015625, "learning_rate": 0.0019174393951460226, "loss": 0.181, "step": 14056 }, { "epoch": 0.024925995962690795, "grad_norm": 3.6875, "learning_rate": 0.0019174144911598687, "loss": 0.2252, "step": 14058 }, { "epoch": 0.02492954212800061, "grad_norm": 1.2890625, "learning_rate": 0.0019173895835988184, "loss": 0.2413, "step": 14060 }, { "epoch": 0.024933088293310424, "grad_norm": 1.0, "learning_rate": 0.0019173646724629807, "loss": 0.3171, "step": 14062 }, { "epoch": 0.02493663445862024, "grad_norm": 0.57421875, "learning_rate": 0.0019173397577524648, "loss": 0.2648, "step": 14064 }, { "epoch": 0.024940180623930056, "grad_norm": 2.234375, "learning_rate": 0.0019173148394673795, "loss": 0.3527, "step": 14066 }, { "epoch": 0.02494372678923987, "grad_norm": 3.765625, "learning_rate": 0.001917289917607834, "loss": 0.3515, "step": 14068 }, { "epoch": 0.024947272954549685, "grad_norm": 0.47265625, "learning_rate": 0.0019172649921739369, "loss": 0.192, "step": 14070 }, { "epoch": 0.0249508191198595, "grad_norm": 0.439453125, "learning_rate": 0.0019172400631657979, "loss": 0.2277, "step": 14072 }, { "epoch": 0.024954365285169314, "grad_norm": 0.361328125, "learning_rate": 0.0019172151305835252, "loss": 0.198, "step": 14074 }, { "epoch": 0.024957911450479132, "grad_norm": 0.353515625, "learning_rate": 0.0019171901944272283, "loss": 0.1811, "step": 14076 }, { "epoch": 0.024961457615788947, "grad_norm": 0.43359375, "learning_rate": 0.001917165254697016, "loss": 0.2093, "step": 14078 }, { "epoch": 0.02496500378109876, "grad_norm": 0.6875, "learning_rate": 0.0019171403113929977, "loss": 0.2266, "step": 14080 }, { "epoch": 0.024968549946408576, "grad_norm": 0.1953125, "learning_rate": 0.0019171153645152826, "loss": 0.1848, "step": 14082 }, { "epoch": 0.02497209611171839, "grad_norm": 1.1484375, "learning_rate": 0.001917090414063979, "loss": 0.2748, "step": 14084 }, { "epoch": 0.02497564227702821, "grad_norm": 0.33984375, "learning_rate": 0.001917065460039197, "loss": 0.1671, "step": 14086 }, { "epoch": 0.024979188442338023, "grad_norm": 1.0234375, "learning_rate": 0.0019170405024410452, "loss": 0.3008, "step": 14088 }, { "epoch": 0.024982734607647838, "grad_norm": 0.59375, "learning_rate": 0.0019170155412696326, "loss": 0.252, "step": 14090 }, { "epoch": 0.024986280772957652, "grad_norm": 0.3671875, "learning_rate": 0.0019169905765250685, "loss": 0.3065, "step": 14092 }, { "epoch": 0.024989826938267467, "grad_norm": 0.70703125, "learning_rate": 0.0019169656082074628, "loss": 0.2121, "step": 14094 }, { "epoch": 0.02499337310357728, "grad_norm": 1.1796875, "learning_rate": 0.0019169406363169234, "loss": 0.3891, "step": 14096 }, { "epoch": 0.0249969192688871, "grad_norm": 1.0859375, "learning_rate": 0.0019169156608535608, "loss": 0.2253, "step": 14098 }, { "epoch": 0.025000465434196914, "grad_norm": 0.50390625, "learning_rate": 0.0019168906818174832, "loss": 0.2216, "step": 14100 }, { "epoch": 0.02500401159950673, "grad_norm": 0.31640625, "learning_rate": 0.0019168656992088003, "loss": 0.1991, "step": 14102 }, { "epoch": 0.025007557764816543, "grad_norm": 1.4375, "learning_rate": 0.0019168407130276213, "loss": 0.2448, "step": 14104 }, { "epoch": 0.025011103930126358, "grad_norm": 0.369140625, "learning_rate": 0.0019168157232740558, "loss": 0.2675, "step": 14106 }, { "epoch": 0.025014650095436172, "grad_norm": 9.375, "learning_rate": 0.0019167907299482126, "loss": 0.2919, "step": 14108 }, { "epoch": 0.02501819626074599, "grad_norm": 1.8125, "learning_rate": 0.0019167657330502012, "loss": 0.2866, "step": 14110 }, { "epoch": 0.025021742426055805, "grad_norm": 0.5625, "learning_rate": 0.001916740732580131, "loss": 0.2347, "step": 14112 }, { "epoch": 0.02502528859136562, "grad_norm": 0.5546875, "learning_rate": 0.001916715728538111, "loss": 0.2305, "step": 14114 }, { "epoch": 0.025028834756675434, "grad_norm": 0.380859375, "learning_rate": 0.001916690720924251, "loss": 0.2971, "step": 14116 }, { "epoch": 0.02503238092198525, "grad_norm": 0.341796875, "learning_rate": 0.0019166657097386603, "loss": 0.2291, "step": 14118 }, { "epoch": 0.025035927087295066, "grad_norm": 0.35546875, "learning_rate": 0.0019166406949814479, "loss": 0.2293, "step": 14120 }, { "epoch": 0.02503947325260488, "grad_norm": 1.2890625, "learning_rate": 0.0019166156766527236, "loss": 0.2367, "step": 14122 }, { "epoch": 0.025043019417914696, "grad_norm": 0.392578125, "learning_rate": 0.0019165906547525965, "loss": 0.1856, "step": 14124 }, { "epoch": 0.02504656558322451, "grad_norm": 4.21875, "learning_rate": 0.0019165656292811767, "loss": 0.3748, "step": 14126 }, { "epoch": 0.025050111748534325, "grad_norm": 2.546875, "learning_rate": 0.0019165406002385726, "loss": 0.2931, "step": 14128 }, { "epoch": 0.02505365791384414, "grad_norm": 0.361328125, "learning_rate": 0.0019165155676248945, "loss": 0.1808, "step": 14130 }, { "epoch": 0.025057204079153957, "grad_norm": 0.63671875, "learning_rate": 0.0019164905314402515, "loss": 0.2894, "step": 14132 }, { "epoch": 0.025060750244463772, "grad_norm": 0.35546875, "learning_rate": 0.0019164654916847533, "loss": 0.1998, "step": 14134 }, { "epoch": 0.025064296409773586, "grad_norm": 0.66796875, "learning_rate": 0.001916440448358509, "loss": 0.2136, "step": 14136 }, { "epoch": 0.0250678425750834, "grad_norm": 0.365234375, "learning_rate": 0.0019164154014616288, "loss": 0.2767, "step": 14138 }, { "epoch": 0.025071388740393215, "grad_norm": 2.8125, "learning_rate": 0.0019163903509942216, "loss": 0.3425, "step": 14140 }, { "epoch": 0.02507493490570303, "grad_norm": 0.490234375, "learning_rate": 0.0019163652969563971, "loss": 0.2439, "step": 14142 }, { "epoch": 0.025078481071012848, "grad_norm": 0.2578125, "learning_rate": 0.0019163402393482657, "loss": 0.1865, "step": 14144 }, { "epoch": 0.025082027236322663, "grad_norm": 0.306640625, "learning_rate": 0.0019163151781699354, "loss": 0.2019, "step": 14146 }, { "epoch": 0.025085573401632477, "grad_norm": 0.796875, "learning_rate": 0.0019162901134215172, "loss": 0.2086, "step": 14148 }, { "epoch": 0.02508911956694229, "grad_norm": 0.78515625, "learning_rate": 0.0019162650451031201, "loss": 0.2189, "step": 14150 }, { "epoch": 0.025092665732252106, "grad_norm": 1.3046875, "learning_rate": 0.0019162399732148537, "loss": 0.3601, "step": 14152 }, { "epoch": 0.025096211897561924, "grad_norm": 0.51953125, "learning_rate": 0.001916214897756828, "loss": 0.2762, "step": 14154 }, { "epoch": 0.02509975806287174, "grad_norm": 1.0546875, "learning_rate": 0.0019161898187291522, "loss": 0.1849, "step": 14156 }, { "epoch": 0.025103304228181553, "grad_norm": 1.265625, "learning_rate": 0.0019161647361319366, "loss": 0.2427, "step": 14158 }, { "epoch": 0.025106850393491368, "grad_norm": 2.1875, "learning_rate": 0.00191613964996529, "loss": 0.326, "step": 14160 }, { "epoch": 0.025110396558801183, "grad_norm": 0.466796875, "learning_rate": 0.001916114560229323, "loss": 0.3915, "step": 14162 }, { "epoch": 0.025113942724110997, "grad_norm": 1.5546875, "learning_rate": 0.0019160894669241453, "loss": 0.2227, "step": 14164 }, { "epoch": 0.025117488889420815, "grad_norm": 0.94921875, "learning_rate": 0.0019160643700498656, "loss": 0.2625, "step": 14166 }, { "epoch": 0.02512103505473063, "grad_norm": 1.515625, "learning_rate": 0.0019160392696065949, "loss": 0.2359, "step": 14168 }, { "epoch": 0.025124581220040444, "grad_norm": 0.3203125, "learning_rate": 0.0019160141655944424, "loss": 0.205, "step": 14170 }, { "epoch": 0.02512812738535026, "grad_norm": 0.34375, "learning_rate": 0.0019159890580135175, "loss": 0.1611, "step": 14172 }, { "epoch": 0.025131673550660073, "grad_norm": 0.3359375, "learning_rate": 0.0019159639468639307, "loss": 0.2296, "step": 14174 }, { "epoch": 0.025135219715969888, "grad_norm": 0.95703125, "learning_rate": 0.0019159388321457918, "loss": 0.2817, "step": 14176 }, { "epoch": 0.025138765881279706, "grad_norm": 0.71484375, "learning_rate": 0.00191591371385921, "loss": 0.178, "step": 14178 }, { "epoch": 0.02514231204658952, "grad_norm": 0.376953125, "learning_rate": 0.001915888592004296, "loss": 0.1538, "step": 14180 }, { "epoch": 0.025145858211899335, "grad_norm": 0.6015625, "learning_rate": 0.001915863466581159, "loss": 0.1971, "step": 14182 }, { "epoch": 0.02514940437720915, "grad_norm": 0.921875, "learning_rate": 0.0019158383375899092, "loss": 0.2822, "step": 14184 }, { "epoch": 0.025152950542518964, "grad_norm": 0.36328125, "learning_rate": 0.001915813205030656, "loss": 0.1973, "step": 14186 }, { "epoch": 0.025156496707828782, "grad_norm": 0.3203125, "learning_rate": 0.0019157880689035102, "loss": 0.2598, "step": 14188 }, { "epoch": 0.025160042873138597, "grad_norm": 0.31640625, "learning_rate": 0.001915762929208581, "loss": 0.2675, "step": 14190 }, { "epoch": 0.02516358903844841, "grad_norm": 0.431640625, "learning_rate": 0.0019157377859459785, "loss": 0.1687, "step": 14192 }, { "epoch": 0.025167135203758226, "grad_norm": 0.71875, "learning_rate": 0.001915712639115813, "loss": 0.2638, "step": 14194 }, { "epoch": 0.02517068136906804, "grad_norm": 0.61328125, "learning_rate": 0.0019156874887181943, "loss": 0.2039, "step": 14196 }, { "epoch": 0.025174227534377855, "grad_norm": 0.447265625, "learning_rate": 0.0019156623347532323, "loss": 0.2796, "step": 14198 }, { "epoch": 0.025177773699687673, "grad_norm": 0.67578125, "learning_rate": 0.0019156371772210369, "loss": 0.2175, "step": 14200 }, { "epoch": 0.025181319864997487, "grad_norm": 0.30859375, "learning_rate": 0.0019156120161217182, "loss": 0.2446, "step": 14202 }, { "epoch": 0.025184866030307302, "grad_norm": 0.330078125, "learning_rate": 0.0019155868514553864, "loss": 0.2009, "step": 14204 }, { "epoch": 0.025188412195617117, "grad_norm": 0.486328125, "learning_rate": 0.0019155616832221515, "loss": 0.2326, "step": 14206 }, { "epoch": 0.02519195836092693, "grad_norm": 0.3125, "learning_rate": 0.0019155365114221234, "loss": 0.2416, "step": 14208 }, { "epoch": 0.025195504526236746, "grad_norm": 0.44140625, "learning_rate": 0.0019155113360554124, "loss": 0.1995, "step": 14210 }, { "epoch": 0.025199050691546564, "grad_norm": 2.21875, "learning_rate": 0.0019154861571221288, "loss": 0.5416, "step": 14212 }, { "epoch": 0.025202596856856378, "grad_norm": 0.28125, "learning_rate": 0.001915460974622382, "loss": 0.1937, "step": 14214 }, { "epoch": 0.025206143022166193, "grad_norm": 3.65625, "learning_rate": 0.0019154357885562828, "loss": 0.2904, "step": 14216 }, { "epoch": 0.025209689187476007, "grad_norm": 0.390625, "learning_rate": 0.001915410598923941, "loss": 0.1829, "step": 14218 }, { "epoch": 0.025213235352785822, "grad_norm": 0.39453125, "learning_rate": 0.0019153854057254672, "loss": 0.2214, "step": 14220 }, { "epoch": 0.02521678151809564, "grad_norm": 1.3515625, "learning_rate": 0.0019153602089609708, "loss": 0.2305, "step": 14222 }, { "epoch": 0.025220327683405454, "grad_norm": 1.65625, "learning_rate": 0.0019153350086305628, "loss": 0.2849, "step": 14224 }, { "epoch": 0.02522387384871527, "grad_norm": 0.306640625, "learning_rate": 0.001915309804734353, "loss": 0.161, "step": 14226 }, { "epoch": 0.025227420014025084, "grad_norm": 2.015625, "learning_rate": 0.0019152845972724517, "loss": 0.2227, "step": 14228 }, { "epoch": 0.025230966179334898, "grad_norm": 0.76171875, "learning_rate": 0.001915259386244969, "loss": 0.2329, "step": 14230 }, { "epoch": 0.025234512344644713, "grad_norm": 5.9375, "learning_rate": 0.0019152341716520153, "loss": 0.3652, "step": 14232 }, { "epoch": 0.02523805850995453, "grad_norm": 0.36328125, "learning_rate": 0.0019152089534937014, "loss": 0.2174, "step": 14234 }, { "epoch": 0.025241604675264345, "grad_norm": 0.61328125, "learning_rate": 0.0019151837317701364, "loss": 0.1968, "step": 14236 }, { "epoch": 0.02524515084057416, "grad_norm": 0.94921875, "learning_rate": 0.0019151585064814316, "loss": 0.216, "step": 14238 }, { "epoch": 0.025248697005883974, "grad_norm": 0.279296875, "learning_rate": 0.001915133277627697, "loss": 0.2583, "step": 14240 }, { "epoch": 0.02525224317119379, "grad_norm": 0.30078125, "learning_rate": 0.0019151080452090429, "loss": 0.1993, "step": 14242 }, { "epoch": 0.025255789336503603, "grad_norm": 0.8359375, "learning_rate": 0.0019150828092255794, "loss": 0.2012, "step": 14244 }, { "epoch": 0.02525933550181342, "grad_norm": 0.671875, "learning_rate": 0.0019150575696774176, "loss": 0.2384, "step": 14246 }, { "epoch": 0.025262881667123236, "grad_norm": 0.431640625, "learning_rate": 0.001915032326564667, "loss": 0.2082, "step": 14248 }, { "epoch": 0.02526642783243305, "grad_norm": 0.453125, "learning_rate": 0.001915007079887439, "loss": 0.2235, "step": 14250 }, { "epoch": 0.025269973997742865, "grad_norm": 0.482421875, "learning_rate": 0.0019149818296458428, "loss": 0.3525, "step": 14252 }, { "epoch": 0.02527352016305268, "grad_norm": 0.8203125, "learning_rate": 0.0019149565758399898, "loss": 0.2393, "step": 14254 }, { "epoch": 0.025277066328362498, "grad_norm": 0.7421875, "learning_rate": 0.0019149313184699903, "loss": 0.305, "step": 14256 }, { "epoch": 0.025280612493672312, "grad_norm": 1.546875, "learning_rate": 0.001914906057535954, "loss": 0.31, "step": 14258 }, { "epoch": 0.025284158658982127, "grad_norm": 0.376953125, "learning_rate": 0.0019148807930379924, "loss": 0.2632, "step": 14260 }, { "epoch": 0.02528770482429194, "grad_norm": 0.73046875, "learning_rate": 0.0019148555249762155, "loss": 0.247, "step": 14262 }, { "epoch": 0.025291250989601756, "grad_norm": 0.40234375, "learning_rate": 0.001914830253350734, "loss": 0.227, "step": 14264 }, { "epoch": 0.02529479715491157, "grad_norm": 0.45703125, "learning_rate": 0.0019148049781616582, "loss": 0.1652, "step": 14266 }, { "epoch": 0.02529834332022139, "grad_norm": 1.8515625, "learning_rate": 0.0019147796994090987, "loss": 0.2517, "step": 14268 }, { "epoch": 0.025301889485531203, "grad_norm": 0.67578125, "learning_rate": 0.0019147544170931658, "loss": 0.2481, "step": 14270 }, { "epoch": 0.025305435650841018, "grad_norm": 0.8515625, "learning_rate": 0.0019147291312139707, "loss": 0.2564, "step": 14272 }, { "epoch": 0.025308981816150832, "grad_norm": 0.9375, "learning_rate": 0.0019147038417716236, "loss": 0.234, "step": 14274 }, { "epoch": 0.025312527981460647, "grad_norm": 0.38671875, "learning_rate": 0.0019146785487662349, "loss": 0.2343, "step": 14276 }, { "epoch": 0.02531607414677046, "grad_norm": 0.205078125, "learning_rate": 0.0019146532521979156, "loss": 0.2061, "step": 14278 }, { "epoch": 0.02531962031208028, "grad_norm": 0.359375, "learning_rate": 0.0019146279520667762, "loss": 0.1956, "step": 14280 }, { "epoch": 0.025323166477390094, "grad_norm": 0.984375, "learning_rate": 0.0019146026483729272, "loss": 0.3258, "step": 14282 }, { "epoch": 0.02532671264269991, "grad_norm": 0.63671875, "learning_rate": 0.0019145773411164795, "loss": 0.2872, "step": 14284 }, { "epoch": 0.025330258808009723, "grad_norm": 0.40625, "learning_rate": 0.0019145520302975437, "loss": 0.1759, "step": 14286 }, { "epoch": 0.025333804973319538, "grad_norm": 0.390625, "learning_rate": 0.0019145267159162303, "loss": 0.211, "step": 14288 }, { "epoch": 0.025337351138629356, "grad_norm": 1.171875, "learning_rate": 0.0019145013979726502, "loss": 0.2766, "step": 14290 }, { "epoch": 0.02534089730393917, "grad_norm": 0.671875, "learning_rate": 0.0019144760764669144, "loss": 0.2262, "step": 14292 }, { "epoch": 0.025344443469248985, "grad_norm": 0.4765625, "learning_rate": 0.001914450751399133, "loss": 0.2265, "step": 14294 }, { "epoch": 0.0253479896345588, "grad_norm": 0.466796875, "learning_rate": 0.0019144254227694173, "loss": 0.2133, "step": 14296 }, { "epoch": 0.025351535799868614, "grad_norm": 0.416015625, "learning_rate": 0.001914400090577878, "loss": 0.2247, "step": 14298 }, { "epoch": 0.02535508196517843, "grad_norm": 0.46484375, "learning_rate": 0.0019143747548246255, "loss": 0.1978, "step": 14300 }, { "epoch": 0.025358628130488246, "grad_norm": 0.408203125, "learning_rate": 0.0019143494155097712, "loss": 0.2178, "step": 14302 }, { "epoch": 0.02536217429579806, "grad_norm": 0.42578125, "learning_rate": 0.0019143240726334253, "loss": 0.216, "step": 14304 }, { "epoch": 0.025365720461107875, "grad_norm": 0.244140625, "learning_rate": 0.001914298726195699, "loss": 0.2115, "step": 14306 }, { "epoch": 0.02536926662641769, "grad_norm": 0.984375, "learning_rate": 0.001914273376196703, "loss": 0.3008, "step": 14308 }, { "epoch": 0.025372812791727505, "grad_norm": 0.53125, "learning_rate": 0.001914248022636548, "loss": 0.2185, "step": 14310 }, { "epoch": 0.02537635895703732, "grad_norm": 1.0859375, "learning_rate": 0.0019142226655153457, "loss": 0.2335, "step": 14312 }, { "epoch": 0.025379905122347137, "grad_norm": 1.2578125, "learning_rate": 0.0019141973048332063, "loss": 0.2434, "step": 14314 }, { "epoch": 0.02538345128765695, "grad_norm": 0.41796875, "learning_rate": 0.0019141719405902406, "loss": 0.1836, "step": 14316 }, { "epoch": 0.025386997452966766, "grad_norm": 4.0625, "learning_rate": 0.0019141465727865595, "loss": 0.3423, "step": 14318 }, { "epoch": 0.02539054361827658, "grad_norm": 0.234375, "learning_rate": 0.0019141212014222747, "loss": 0.1716, "step": 14320 }, { "epoch": 0.025394089783586395, "grad_norm": 0.4140625, "learning_rate": 0.0019140958264974963, "loss": 0.2725, "step": 14322 }, { "epoch": 0.025397635948896213, "grad_norm": 0.59375, "learning_rate": 0.001914070448012336, "loss": 0.2923, "step": 14324 }, { "epoch": 0.025401182114206028, "grad_norm": 0.76953125, "learning_rate": 0.0019140450659669039, "loss": 0.2703, "step": 14326 }, { "epoch": 0.025404728279515842, "grad_norm": 0.404296875, "learning_rate": 0.0019140196803613119, "loss": 0.2744, "step": 14328 }, { "epoch": 0.025408274444825657, "grad_norm": 0.5703125, "learning_rate": 0.001913994291195671, "loss": 0.2087, "step": 14330 }, { "epoch": 0.02541182061013547, "grad_norm": 1.1328125, "learning_rate": 0.0019139688984700916, "loss": 0.3415, "step": 14332 }, { "epoch": 0.025415366775445286, "grad_norm": 1.0234375, "learning_rate": 0.0019139435021846849, "loss": 0.1995, "step": 14334 }, { "epoch": 0.025418912940755104, "grad_norm": 0.62890625, "learning_rate": 0.001913918102339562, "loss": 0.2535, "step": 14336 }, { "epoch": 0.02542245910606492, "grad_norm": 0.515625, "learning_rate": 0.0019138926989348342, "loss": 0.4705, "step": 14338 }, { "epoch": 0.025426005271374733, "grad_norm": 0.59765625, "learning_rate": 0.0019138672919706127, "loss": 0.2064, "step": 14340 }, { "epoch": 0.025429551436684548, "grad_norm": 1.0234375, "learning_rate": 0.0019138418814470082, "loss": 0.2218, "step": 14342 }, { "epoch": 0.025433097601994362, "grad_norm": 1.109375, "learning_rate": 0.0019138164673641322, "loss": 0.279, "step": 14344 }, { "epoch": 0.025436643767304177, "grad_norm": 0.287109375, "learning_rate": 0.0019137910497220956, "loss": 0.2018, "step": 14346 }, { "epoch": 0.025440189932613995, "grad_norm": 0.314453125, "learning_rate": 0.00191376562852101, "loss": 0.2745, "step": 14348 }, { "epoch": 0.02544373609792381, "grad_norm": 0.640625, "learning_rate": 0.0019137402037609857, "loss": 0.2219, "step": 14350 }, { "epoch": 0.025447282263233624, "grad_norm": 0.447265625, "learning_rate": 0.001913714775442135, "loss": 0.234, "step": 14352 }, { "epoch": 0.02545082842854344, "grad_norm": 0.470703125, "learning_rate": 0.001913689343564568, "loss": 0.2991, "step": 14354 }, { "epoch": 0.025454374593853253, "grad_norm": 0.66796875, "learning_rate": 0.0019136639081283964, "loss": 0.2025, "step": 14356 }, { "epoch": 0.02545792075916307, "grad_norm": 0.45703125, "learning_rate": 0.0019136384691337321, "loss": 0.2744, "step": 14358 }, { "epoch": 0.025461466924472886, "grad_norm": 1.0390625, "learning_rate": 0.0019136130265806853, "loss": 0.1906, "step": 14360 }, { "epoch": 0.0254650130897827, "grad_norm": 3.71875, "learning_rate": 0.0019135875804693677, "loss": 0.3135, "step": 14362 }, { "epoch": 0.025468559255092515, "grad_norm": 0.38671875, "learning_rate": 0.0019135621307998908, "loss": 0.2412, "step": 14364 }, { "epoch": 0.02547210542040233, "grad_norm": 0.388671875, "learning_rate": 0.0019135366775723658, "loss": 0.1991, "step": 14366 }, { "epoch": 0.025475651585712144, "grad_norm": 0.39453125, "learning_rate": 0.0019135112207869036, "loss": 0.3452, "step": 14368 }, { "epoch": 0.025479197751021962, "grad_norm": 0.37890625, "learning_rate": 0.001913485760443616, "loss": 0.2382, "step": 14370 }, { "epoch": 0.025482743916331776, "grad_norm": 1.734375, "learning_rate": 0.0019134602965426143, "loss": 0.2199, "step": 14372 }, { "epoch": 0.02548629008164159, "grad_norm": 0.46484375, "learning_rate": 0.00191343482908401, "loss": 0.2345, "step": 14374 }, { "epoch": 0.025489836246951406, "grad_norm": 0.70703125, "learning_rate": 0.0019134093580679137, "loss": 0.2167, "step": 14376 }, { "epoch": 0.02549338241226122, "grad_norm": 0.61328125, "learning_rate": 0.0019133838834944376, "loss": 0.1742, "step": 14378 }, { "epoch": 0.025496928577571035, "grad_norm": 1.96875, "learning_rate": 0.001913358405363693, "loss": 0.2832, "step": 14380 }, { "epoch": 0.025500474742880853, "grad_norm": 1.03125, "learning_rate": 0.0019133329236757912, "loss": 0.2628, "step": 14382 }, { "epoch": 0.025504020908190667, "grad_norm": 0.259765625, "learning_rate": 0.0019133074384308434, "loss": 0.2282, "step": 14384 }, { "epoch": 0.025507567073500482, "grad_norm": 3.046875, "learning_rate": 0.0019132819496289616, "loss": 0.3004, "step": 14386 }, { "epoch": 0.025511113238810296, "grad_norm": 0.384765625, "learning_rate": 0.0019132564572702567, "loss": 0.2918, "step": 14388 }, { "epoch": 0.02551465940412011, "grad_norm": 0.578125, "learning_rate": 0.0019132309613548405, "loss": 0.2561, "step": 14390 }, { "epoch": 0.02551820556942993, "grad_norm": 1.0625, "learning_rate": 0.0019132054618828246, "loss": 0.3287, "step": 14392 }, { "epoch": 0.025521751734739744, "grad_norm": 0.36328125, "learning_rate": 0.0019131799588543203, "loss": 0.2029, "step": 14394 }, { "epoch": 0.025525297900049558, "grad_norm": 0.310546875, "learning_rate": 0.001913154452269439, "loss": 0.1439, "step": 14396 }, { "epoch": 0.025528844065359373, "grad_norm": 1.0234375, "learning_rate": 0.0019131289421282927, "loss": 0.2456, "step": 14398 }, { "epoch": 0.025532390230669187, "grad_norm": 0.263671875, "learning_rate": 0.0019131034284309925, "loss": 0.1774, "step": 14400 }, { "epoch": 0.025535936395979, "grad_norm": 0.251953125, "learning_rate": 0.0019130779111776507, "loss": 0.2214, "step": 14402 }, { "epoch": 0.02553948256128882, "grad_norm": 0.4453125, "learning_rate": 0.0019130523903683782, "loss": 0.2823, "step": 14404 }, { "epoch": 0.025543028726598634, "grad_norm": 0.435546875, "learning_rate": 0.0019130268660032866, "loss": 0.2331, "step": 14406 }, { "epoch": 0.02554657489190845, "grad_norm": 0.62890625, "learning_rate": 0.0019130013380824875, "loss": 0.2679, "step": 14408 }, { "epoch": 0.025550121057218263, "grad_norm": 0.6328125, "learning_rate": 0.0019129758066060935, "loss": 0.2774, "step": 14410 }, { "epoch": 0.025553667222528078, "grad_norm": 0.9453125, "learning_rate": 0.001912950271574215, "loss": 0.172, "step": 14412 }, { "epoch": 0.025557213387837893, "grad_norm": 1.9765625, "learning_rate": 0.0019129247329869644, "loss": 0.2489, "step": 14414 }, { "epoch": 0.02556075955314771, "grad_norm": 0.46484375, "learning_rate": 0.0019128991908444533, "loss": 0.1763, "step": 14416 }, { "epoch": 0.025564305718457525, "grad_norm": 0.25390625, "learning_rate": 0.0019128736451467935, "loss": 0.2252, "step": 14418 }, { "epoch": 0.02556785188376734, "grad_norm": 0.3671875, "learning_rate": 0.0019128480958940962, "loss": 0.2655, "step": 14420 }, { "epoch": 0.025571398049077154, "grad_norm": 0.62109375, "learning_rate": 0.0019128225430864734, "loss": 0.4226, "step": 14422 }, { "epoch": 0.02557494421438697, "grad_norm": 1.0234375, "learning_rate": 0.0019127969867240371, "loss": 0.2347, "step": 14424 }, { "epoch": 0.025578490379696787, "grad_norm": 1.5625, "learning_rate": 0.0019127714268068992, "loss": 0.2773, "step": 14426 }, { "epoch": 0.0255820365450066, "grad_norm": 0.65625, "learning_rate": 0.001912745863335171, "loss": 0.2183, "step": 14428 }, { "epoch": 0.025585582710316416, "grad_norm": 1.1640625, "learning_rate": 0.0019127202963089643, "loss": 0.2333, "step": 14430 }, { "epoch": 0.02558912887562623, "grad_norm": 0.9296875, "learning_rate": 0.0019126947257283914, "loss": 0.2336, "step": 14432 }, { "epoch": 0.025592675040936045, "grad_norm": 0.84375, "learning_rate": 0.0019126691515935635, "loss": 0.2532, "step": 14434 }, { "epoch": 0.02559622120624586, "grad_norm": 0.53515625, "learning_rate": 0.001912643573904593, "loss": 0.3615, "step": 14436 }, { "epoch": 0.025599767371555678, "grad_norm": 0.87890625, "learning_rate": 0.0019126179926615917, "loss": 0.1932, "step": 14438 }, { "epoch": 0.025603313536865492, "grad_norm": 1.2109375, "learning_rate": 0.0019125924078646714, "loss": 0.2298, "step": 14440 }, { "epoch": 0.025606859702175307, "grad_norm": 0.5390625, "learning_rate": 0.0019125668195139433, "loss": 0.2653, "step": 14442 }, { "epoch": 0.02561040586748512, "grad_norm": 0.376953125, "learning_rate": 0.0019125412276095204, "loss": 0.2084, "step": 14444 }, { "epoch": 0.025613952032794936, "grad_norm": 0.32421875, "learning_rate": 0.0019125156321515141, "loss": 0.174, "step": 14446 }, { "epoch": 0.02561749819810475, "grad_norm": 0.3671875, "learning_rate": 0.0019124900331400363, "loss": 0.2152, "step": 14448 }, { "epoch": 0.02562104436341457, "grad_norm": 0.3359375, "learning_rate": 0.0019124644305751992, "loss": 0.2205, "step": 14450 }, { "epoch": 0.025624590528724383, "grad_norm": 8.1875, "learning_rate": 0.0019124388244571146, "loss": 0.2619, "step": 14452 }, { "epoch": 0.025628136694034197, "grad_norm": 2.453125, "learning_rate": 0.0019124132147858941, "loss": 0.2973, "step": 14454 }, { "epoch": 0.025631682859344012, "grad_norm": 6.1875, "learning_rate": 0.0019123876015616505, "loss": 0.509, "step": 14456 }, { "epoch": 0.025635229024653827, "grad_norm": 0.494140625, "learning_rate": 0.0019123619847844953, "loss": 0.3097, "step": 14458 }, { "epoch": 0.025638775189963645, "grad_norm": 1.4765625, "learning_rate": 0.001912336364454541, "loss": 0.2535, "step": 14460 }, { "epoch": 0.02564232135527346, "grad_norm": 0.490234375, "learning_rate": 0.001912310740571899, "loss": 0.1607, "step": 14462 }, { "epoch": 0.025645867520583274, "grad_norm": 0.447265625, "learning_rate": 0.0019122851131366813, "loss": 0.2405, "step": 14464 }, { "epoch": 0.025649413685893088, "grad_norm": 3.046875, "learning_rate": 0.001912259482149001, "loss": 0.3025, "step": 14466 }, { "epoch": 0.025652959851202903, "grad_norm": 0.5546875, "learning_rate": 0.001912233847608969, "loss": 0.2178, "step": 14468 }, { "epoch": 0.025656506016512717, "grad_norm": 0.93359375, "learning_rate": 0.0019122082095166984, "loss": 0.2059, "step": 14470 }, { "epoch": 0.025660052181822535, "grad_norm": 2.828125, "learning_rate": 0.0019121825678723005, "loss": 0.3008, "step": 14472 }, { "epoch": 0.02566359834713235, "grad_norm": 0.400390625, "learning_rate": 0.001912156922675888, "loss": 0.3689, "step": 14474 }, { "epoch": 0.025667144512442164, "grad_norm": 0.416015625, "learning_rate": 0.001912131273927573, "loss": 0.4026, "step": 14476 }, { "epoch": 0.02567069067775198, "grad_norm": 0.302734375, "learning_rate": 0.0019121056216274673, "loss": 0.1851, "step": 14478 }, { "epoch": 0.025674236843061794, "grad_norm": 0.353515625, "learning_rate": 0.0019120799657756833, "loss": 0.2009, "step": 14480 }, { "epoch": 0.025677783008371608, "grad_norm": 0.416015625, "learning_rate": 0.0019120543063723336, "loss": 0.2027, "step": 14482 }, { "epoch": 0.025681329173681426, "grad_norm": 0.251953125, "learning_rate": 0.0019120286434175298, "loss": 0.1606, "step": 14484 }, { "epoch": 0.02568487533899124, "grad_norm": 0.63671875, "learning_rate": 0.0019120029769113846, "loss": 0.1858, "step": 14486 }, { "epoch": 0.025688421504301055, "grad_norm": 0.34765625, "learning_rate": 0.00191197730685401, "loss": 0.2374, "step": 14488 }, { "epoch": 0.02569196766961087, "grad_norm": 0.6640625, "learning_rate": 0.001911951633245518, "loss": 0.2286, "step": 14490 }, { "epoch": 0.025695513834920684, "grad_norm": 1.2265625, "learning_rate": 0.0019119259560860216, "loss": 0.2667, "step": 14492 }, { "epoch": 0.025699060000230502, "grad_norm": 0.4765625, "learning_rate": 0.0019119002753756323, "loss": 0.2498, "step": 14494 }, { "epoch": 0.025702606165540317, "grad_norm": 1.1484375, "learning_rate": 0.0019118745911144632, "loss": 0.2345, "step": 14496 }, { "epoch": 0.02570615233085013, "grad_norm": 2.03125, "learning_rate": 0.0019118489033026261, "loss": 0.3078, "step": 14498 }, { "epoch": 0.025709698496159946, "grad_norm": 0.365234375, "learning_rate": 0.0019118232119402332, "loss": 0.2554, "step": 14500 }, { "epoch": 0.02571324466146976, "grad_norm": 0.4765625, "learning_rate": 0.0019117975170273973, "loss": 0.2464, "step": 14502 }, { "epoch": 0.025716790826779575, "grad_norm": 0.45703125, "learning_rate": 0.0019117718185642308, "loss": 0.2298, "step": 14504 }, { "epoch": 0.025720336992089393, "grad_norm": 0.58203125, "learning_rate": 0.0019117461165508458, "loss": 0.2165, "step": 14506 }, { "epoch": 0.025723883157399208, "grad_norm": 0.419921875, "learning_rate": 0.0019117204109873544, "loss": 0.1989, "step": 14508 }, { "epoch": 0.025727429322709022, "grad_norm": 0.39453125, "learning_rate": 0.0019116947018738702, "loss": 0.2287, "step": 14510 }, { "epoch": 0.025730975488018837, "grad_norm": 0.412109375, "learning_rate": 0.0019116689892105044, "loss": 0.2387, "step": 14512 }, { "epoch": 0.02573452165332865, "grad_norm": 0.56640625, "learning_rate": 0.00191164327299737, "loss": 0.2285, "step": 14514 }, { "epoch": 0.025738067818638466, "grad_norm": 0.388671875, "learning_rate": 0.0019116175532345792, "loss": 0.2518, "step": 14516 }, { "epoch": 0.025741613983948284, "grad_norm": 1.9453125, "learning_rate": 0.001911591829922245, "loss": 0.1906, "step": 14518 }, { "epoch": 0.0257451601492581, "grad_norm": 3.3125, "learning_rate": 0.0019115661030604792, "loss": 0.2865, "step": 14520 }, { "epoch": 0.025748706314567913, "grad_norm": 0.828125, "learning_rate": 0.001911540372649395, "loss": 0.1852, "step": 14522 }, { "epoch": 0.025752252479877728, "grad_norm": 2.15625, "learning_rate": 0.0019115146386891042, "loss": 0.5389, "step": 14524 }, { "epoch": 0.025755798645187542, "grad_norm": 0.4921875, "learning_rate": 0.00191148890117972, "loss": 0.3038, "step": 14526 }, { "epoch": 0.02575934481049736, "grad_norm": 0.32421875, "learning_rate": 0.001911463160121355, "loss": 0.1592, "step": 14528 }, { "epoch": 0.025762890975807175, "grad_norm": 0.9609375, "learning_rate": 0.001911437415514121, "loss": 0.29, "step": 14530 }, { "epoch": 0.02576643714111699, "grad_norm": 0.322265625, "learning_rate": 0.0019114116673581317, "loss": 0.1901, "step": 14532 }, { "epoch": 0.025769983306426804, "grad_norm": 4.03125, "learning_rate": 0.0019113859156534986, "loss": 0.3074, "step": 14534 }, { "epoch": 0.02577352947173662, "grad_norm": 1.15625, "learning_rate": 0.0019113601604003347, "loss": 0.1938, "step": 14536 }, { "epoch": 0.025777075637046433, "grad_norm": 0.244140625, "learning_rate": 0.001911334401598753, "loss": 0.2131, "step": 14538 }, { "epoch": 0.02578062180235625, "grad_norm": 0.67578125, "learning_rate": 0.0019113086392488659, "loss": 0.222, "step": 14540 }, { "epoch": 0.025784167967666066, "grad_norm": 0.578125, "learning_rate": 0.001911282873350786, "loss": 0.2063, "step": 14542 }, { "epoch": 0.02578771413297588, "grad_norm": 1.3125, "learning_rate": 0.0019112571039046262, "loss": 0.4433, "step": 14544 }, { "epoch": 0.025791260298285695, "grad_norm": 0.85546875, "learning_rate": 0.0019112313309104989, "loss": 0.2687, "step": 14546 }, { "epoch": 0.02579480646359551, "grad_norm": 0.65625, "learning_rate": 0.001911205554368517, "loss": 0.2466, "step": 14548 }, { "epoch": 0.025798352628905324, "grad_norm": 0.28515625, "learning_rate": 0.0019111797742787933, "loss": 0.2469, "step": 14550 }, { "epoch": 0.025801898794215142, "grad_norm": 1.4140625, "learning_rate": 0.0019111539906414403, "loss": 0.2228, "step": 14552 }, { "epoch": 0.025805444959524956, "grad_norm": 0.8671875, "learning_rate": 0.001911128203456571, "loss": 0.3096, "step": 14554 }, { "epoch": 0.02580899112483477, "grad_norm": 0.92578125, "learning_rate": 0.0019111024127242983, "loss": 0.2732, "step": 14556 }, { "epoch": 0.025812537290144585, "grad_norm": 0.69140625, "learning_rate": 0.0019110766184447345, "loss": 0.2657, "step": 14558 }, { "epoch": 0.0258160834554544, "grad_norm": 1.03125, "learning_rate": 0.0019110508206179931, "loss": 0.2445, "step": 14560 }, { "epoch": 0.025819629620764218, "grad_norm": 0.2294921875, "learning_rate": 0.0019110250192441861, "loss": 0.1686, "step": 14562 }, { "epoch": 0.025823175786074033, "grad_norm": 0.26953125, "learning_rate": 0.0019109992143234269, "loss": 0.1855, "step": 14564 }, { "epoch": 0.025826721951383847, "grad_norm": 0.72265625, "learning_rate": 0.0019109734058558284, "loss": 0.2585, "step": 14566 }, { "epoch": 0.02583026811669366, "grad_norm": 0.625, "learning_rate": 0.0019109475938415027, "loss": 0.3157, "step": 14568 }, { "epoch": 0.025833814282003476, "grad_norm": 1.78125, "learning_rate": 0.0019109217782805638, "loss": 0.4324, "step": 14570 }, { "epoch": 0.02583736044731329, "grad_norm": 0.443359375, "learning_rate": 0.001910895959173124, "loss": 0.2578, "step": 14572 }, { "epoch": 0.02584090661262311, "grad_norm": 0.498046875, "learning_rate": 0.0019108701365192964, "loss": 0.1843, "step": 14574 }, { "epoch": 0.025844452777932923, "grad_norm": 0.462890625, "learning_rate": 0.0019108443103191938, "loss": 0.1911, "step": 14576 }, { "epoch": 0.025847998943242738, "grad_norm": 0.392578125, "learning_rate": 0.0019108184805729292, "loss": 0.2157, "step": 14578 }, { "epoch": 0.025851545108552552, "grad_norm": 0.59765625, "learning_rate": 0.0019107926472806153, "loss": 0.3567, "step": 14580 }, { "epoch": 0.025855091273862367, "grad_norm": 0.56640625, "learning_rate": 0.0019107668104423655, "loss": 0.2774, "step": 14582 }, { "epoch": 0.02585863743917218, "grad_norm": 0.380859375, "learning_rate": 0.0019107409700582926, "loss": 0.2489, "step": 14584 }, { "epoch": 0.025862183604482, "grad_norm": 0.546875, "learning_rate": 0.0019107151261285094, "loss": 0.2831, "step": 14586 }, { "epoch": 0.025865729769791814, "grad_norm": 0.390625, "learning_rate": 0.0019106892786531294, "loss": 0.1998, "step": 14588 }, { "epoch": 0.02586927593510163, "grad_norm": 0.3125, "learning_rate": 0.0019106634276322655, "loss": 0.2331, "step": 14590 }, { "epoch": 0.025872822100411443, "grad_norm": 0.365234375, "learning_rate": 0.0019106375730660306, "loss": 0.2044, "step": 14592 }, { "epoch": 0.025876368265721258, "grad_norm": 0.453125, "learning_rate": 0.0019106117149545376, "loss": 0.3099, "step": 14594 }, { "epoch": 0.025879914431031076, "grad_norm": 0.5703125, "learning_rate": 0.0019105858532979002, "loss": 0.1866, "step": 14596 }, { "epoch": 0.02588346059634089, "grad_norm": 0.6875, "learning_rate": 0.0019105599880962307, "loss": 0.2265, "step": 14598 }, { "epoch": 0.025887006761650705, "grad_norm": 0.42578125, "learning_rate": 0.0019105341193496428, "loss": 0.2651, "step": 14600 }, { "epoch": 0.02589055292696052, "grad_norm": 1.359375, "learning_rate": 0.0019105082470582495, "loss": 0.2419, "step": 14602 }, { "epoch": 0.025894099092270334, "grad_norm": 0.546875, "learning_rate": 0.0019104823712221641, "loss": 0.2241, "step": 14604 }, { "epoch": 0.02589764525758015, "grad_norm": 0.27734375, "learning_rate": 0.0019104564918414994, "loss": 0.1834, "step": 14606 }, { "epoch": 0.025901191422889967, "grad_norm": 1.578125, "learning_rate": 0.001910430608916369, "loss": 0.5238, "step": 14608 }, { "epoch": 0.02590473758819978, "grad_norm": 0.25390625, "learning_rate": 0.0019104047224468856, "loss": 0.2118, "step": 14610 }, { "epoch": 0.025908283753509596, "grad_norm": 1.3984375, "learning_rate": 0.0019103788324331629, "loss": 0.2461, "step": 14612 }, { "epoch": 0.02591182991881941, "grad_norm": 0.52734375, "learning_rate": 0.0019103529388753138, "loss": 0.2201, "step": 14614 }, { "epoch": 0.025915376084129225, "grad_norm": 0.283203125, "learning_rate": 0.0019103270417734515, "loss": 0.169, "step": 14616 }, { "epoch": 0.02591892224943904, "grad_norm": 0.30859375, "learning_rate": 0.0019103011411276893, "loss": 0.1584, "step": 14618 }, { "epoch": 0.025922468414748857, "grad_norm": 0.412109375, "learning_rate": 0.001910275236938141, "loss": 0.2714, "step": 14620 }, { "epoch": 0.025926014580058672, "grad_norm": 0.5546875, "learning_rate": 0.0019102493292049193, "loss": 0.2209, "step": 14622 }, { "epoch": 0.025929560745368486, "grad_norm": 0.37890625, "learning_rate": 0.0019102234179281378, "loss": 0.217, "step": 14624 }, { "epoch": 0.0259331069106783, "grad_norm": 1.4296875, "learning_rate": 0.0019101975031079095, "loss": 0.2033, "step": 14626 }, { "epoch": 0.025936653075988116, "grad_norm": 1.7578125, "learning_rate": 0.001910171584744348, "loss": 0.3697, "step": 14628 }, { "epoch": 0.025940199241297934, "grad_norm": 0.392578125, "learning_rate": 0.0019101456628375665, "loss": 0.1862, "step": 14630 }, { "epoch": 0.025943745406607748, "grad_norm": 0.5859375, "learning_rate": 0.001910119737387679, "loss": 0.3081, "step": 14632 }, { "epoch": 0.025947291571917563, "grad_norm": 0.65625, "learning_rate": 0.0019100938083947976, "loss": 0.2581, "step": 14634 }, { "epoch": 0.025950837737227377, "grad_norm": 18.75, "learning_rate": 0.0019100678758590369, "loss": 0.2683, "step": 14636 }, { "epoch": 0.025954383902537192, "grad_norm": 1.0625, "learning_rate": 0.0019100419397805096, "loss": 0.2786, "step": 14638 }, { "epoch": 0.025957930067847006, "grad_norm": 0.3984375, "learning_rate": 0.0019100160001593299, "loss": 0.1847, "step": 14640 }, { "epoch": 0.025961476233156824, "grad_norm": 0.322265625, "learning_rate": 0.0019099900569956101, "loss": 0.385, "step": 14642 }, { "epoch": 0.02596502239846664, "grad_norm": 1.7109375, "learning_rate": 0.0019099641102894645, "loss": 0.2356, "step": 14644 }, { "epoch": 0.025968568563776454, "grad_norm": 0.6875, "learning_rate": 0.0019099381600410064, "loss": 0.2214, "step": 14646 }, { "epoch": 0.025972114729086268, "grad_norm": 0.326171875, "learning_rate": 0.0019099122062503494, "loss": 0.2698, "step": 14648 }, { "epoch": 0.025975660894396083, "grad_norm": 0.55859375, "learning_rate": 0.0019098862489176065, "loss": 0.2492, "step": 14650 }, { "epoch": 0.025979207059705897, "grad_norm": 0.4296875, "learning_rate": 0.0019098602880428916, "loss": 0.2228, "step": 14652 }, { "epoch": 0.025982753225015715, "grad_norm": 1.5703125, "learning_rate": 0.0019098343236263187, "loss": 0.2568, "step": 14654 }, { "epoch": 0.02598629939032553, "grad_norm": 2.234375, "learning_rate": 0.0019098083556680005, "loss": 0.4226, "step": 14656 }, { "epoch": 0.025989845555635344, "grad_norm": 0.255859375, "learning_rate": 0.0019097823841680512, "loss": 0.2181, "step": 14658 }, { "epoch": 0.02599339172094516, "grad_norm": 0.2578125, "learning_rate": 0.001909756409126584, "loss": 0.1788, "step": 14660 }, { "epoch": 0.025996937886254973, "grad_norm": 0.484375, "learning_rate": 0.0019097304305437123, "loss": 0.2516, "step": 14662 }, { "epoch": 0.02600048405156479, "grad_norm": 0.65625, "learning_rate": 0.0019097044484195508, "loss": 0.2304, "step": 14664 }, { "epoch": 0.026004030216874606, "grad_norm": 0.2099609375, "learning_rate": 0.0019096784627542116, "loss": 0.183, "step": 14666 }, { "epoch": 0.02600757638218442, "grad_norm": 7.3125, "learning_rate": 0.0019096524735478094, "loss": 0.5231, "step": 14668 }, { "epoch": 0.026011122547494235, "grad_norm": 0.359375, "learning_rate": 0.0019096264808004578, "loss": 0.2108, "step": 14670 }, { "epoch": 0.02601466871280405, "grad_norm": 0.3515625, "learning_rate": 0.0019096004845122702, "loss": 0.2743, "step": 14672 }, { "epoch": 0.026018214878113864, "grad_norm": 0.2734375, "learning_rate": 0.00190957448468336, "loss": 0.1845, "step": 14674 }, { "epoch": 0.026021761043423682, "grad_norm": 0.53125, "learning_rate": 0.0019095484813138415, "loss": 0.2807, "step": 14676 }, { "epoch": 0.026025307208733497, "grad_norm": 0.71484375, "learning_rate": 0.0019095224744038282, "loss": 0.194, "step": 14678 }, { "epoch": 0.02602885337404331, "grad_norm": 0.5546875, "learning_rate": 0.001909496463953434, "loss": 0.1754, "step": 14680 }, { "epoch": 0.026032399539353126, "grad_norm": 0.400390625, "learning_rate": 0.0019094704499627723, "loss": 0.2059, "step": 14682 }, { "epoch": 0.02603594570466294, "grad_norm": 0.28515625, "learning_rate": 0.001909444432431957, "loss": 0.2378, "step": 14684 }, { "epoch": 0.026039491869972755, "grad_norm": 0.2578125, "learning_rate": 0.0019094184113611018, "loss": 0.2093, "step": 14686 }, { "epoch": 0.026043038035282573, "grad_norm": 1.2421875, "learning_rate": 0.001909392386750321, "loss": 0.2315, "step": 14688 }, { "epoch": 0.026046584200592388, "grad_norm": 0.67578125, "learning_rate": 0.0019093663585997276, "loss": 0.2437, "step": 14690 }, { "epoch": 0.026050130365902202, "grad_norm": 0.357421875, "learning_rate": 0.0019093403269094362, "loss": 0.2245, "step": 14692 }, { "epoch": 0.026053676531212017, "grad_norm": 0.94140625, "learning_rate": 0.0019093142916795603, "loss": 0.2478, "step": 14694 }, { "epoch": 0.02605722269652183, "grad_norm": 2.953125, "learning_rate": 0.0019092882529102135, "loss": 0.2911, "step": 14696 }, { "epoch": 0.02606076886183165, "grad_norm": 0.63671875, "learning_rate": 0.0019092622106015105, "loss": 0.1966, "step": 14698 }, { "epoch": 0.026064315027141464, "grad_norm": 0.578125, "learning_rate": 0.0019092361647535642, "loss": 0.2857, "step": 14700 }, { "epoch": 0.02606786119245128, "grad_norm": 1.5390625, "learning_rate": 0.0019092101153664893, "loss": 0.2704, "step": 14702 }, { "epoch": 0.026071407357761093, "grad_norm": 0.439453125, "learning_rate": 0.0019091840624403994, "loss": 0.2451, "step": 14704 }, { "epoch": 0.026074953523070907, "grad_norm": 0.298828125, "learning_rate": 0.001909158005975408, "loss": 0.2542, "step": 14706 }, { "epoch": 0.026078499688380722, "grad_norm": 0.5859375, "learning_rate": 0.0019091319459716301, "loss": 0.1703, "step": 14708 }, { "epoch": 0.02608204585369054, "grad_norm": 0.1962890625, "learning_rate": 0.0019091058824291787, "loss": 0.2259, "step": 14710 }, { "epoch": 0.026085592019000355, "grad_norm": 0.333984375, "learning_rate": 0.0019090798153481683, "loss": 0.2256, "step": 14712 }, { "epoch": 0.02608913818431017, "grad_norm": 0.408203125, "learning_rate": 0.0019090537447287127, "loss": 0.1817, "step": 14714 }, { "epoch": 0.026092684349619984, "grad_norm": 0.65234375, "learning_rate": 0.001909027670570926, "loss": 0.2133, "step": 14716 }, { "epoch": 0.026096230514929798, "grad_norm": 0.7421875, "learning_rate": 0.0019090015928749223, "loss": 0.2392, "step": 14718 }, { "epoch": 0.026099776680239613, "grad_norm": 1.0859375, "learning_rate": 0.0019089755116408155, "loss": 0.2425, "step": 14720 }, { "epoch": 0.02610332284554943, "grad_norm": 1.3359375, "learning_rate": 0.0019089494268687195, "loss": 0.6104, "step": 14722 }, { "epoch": 0.026106869010859245, "grad_norm": 0.37109375, "learning_rate": 0.0019089233385587488, "loss": 0.2047, "step": 14724 }, { "epoch": 0.02611041517616906, "grad_norm": 1.1171875, "learning_rate": 0.001908897246711017, "loss": 0.2606, "step": 14726 }, { "epoch": 0.026113961341478874, "grad_norm": 0.84375, "learning_rate": 0.001908871151325639, "loss": 0.2366, "step": 14728 }, { "epoch": 0.02611750750678869, "grad_norm": 0.8203125, "learning_rate": 0.0019088450524027281, "loss": 0.2255, "step": 14730 }, { "epoch": 0.026121053672098507, "grad_norm": 0.76953125, "learning_rate": 0.0019088189499423987, "loss": 0.22, "step": 14732 }, { "epoch": 0.02612459983740832, "grad_norm": 0.6953125, "learning_rate": 0.0019087928439447653, "loss": 0.1648, "step": 14734 }, { "epoch": 0.026128146002718136, "grad_norm": 0.59765625, "learning_rate": 0.0019087667344099416, "loss": 0.1796, "step": 14736 }, { "epoch": 0.02613169216802795, "grad_norm": 0.4921875, "learning_rate": 0.0019087406213380421, "loss": 0.2897, "step": 14738 }, { "epoch": 0.026135238333337765, "grad_norm": 0.97265625, "learning_rate": 0.001908714504729181, "loss": 0.2544, "step": 14740 }, { "epoch": 0.02613878449864758, "grad_norm": 0.470703125, "learning_rate": 0.0019086883845834722, "loss": 0.1746, "step": 14742 }, { "epoch": 0.026142330663957398, "grad_norm": 0.333984375, "learning_rate": 0.0019086622609010304, "loss": 0.2445, "step": 14744 }, { "epoch": 0.026145876829267212, "grad_norm": 0.291015625, "learning_rate": 0.0019086361336819694, "loss": 0.2983, "step": 14746 }, { "epoch": 0.026149422994577027, "grad_norm": 0.82421875, "learning_rate": 0.0019086100029264035, "loss": 0.2083, "step": 14748 }, { "epoch": 0.02615296915988684, "grad_norm": 10.5625, "learning_rate": 0.0019085838686344475, "loss": 0.3305, "step": 14750 }, { "epoch": 0.026156515325196656, "grad_norm": 1.859375, "learning_rate": 0.0019085577308062152, "loss": 0.264, "step": 14752 }, { "epoch": 0.02616006149050647, "grad_norm": 0.4921875, "learning_rate": 0.0019085315894418209, "loss": 0.1988, "step": 14754 }, { "epoch": 0.02616360765581629, "grad_norm": 1.390625, "learning_rate": 0.0019085054445413792, "loss": 0.201, "step": 14756 }, { "epoch": 0.026167153821126103, "grad_norm": 0.56640625, "learning_rate": 0.0019084792961050047, "loss": 0.2224, "step": 14758 }, { "epoch": 0.026170699986435918, "grad_norm": 0.6953125, "learning_rate": 0.0019084531441328108, "loss": 0.2322, "step": 14760 }, { "epoch": 0.026174246151745732, "grad_norm": 0.54296875, "learning_rate": 0.0019084269886249128, "loss": 0.2054, "step": 14762 }, { "epoch": 0.026177792317055547, "grad_norm": 0.423828125, "learning_rate": 0.0019084008295814244, "loss": 0.3416, "step": 14764 }, { "epoch": 0.026181338482365365, "grad_norm": 0.32421875, "learning_rate": 0.0019083746670024606, "loss": 0.2573, "step": 14766 }, { "epoch": 0.02618488464767518, "grad_norm": 0.9375, "learning_rate": 0.0019083485008881355, "loss": 0.2149, "step": 14768 }, { "epoch": 0.026188430812984994, "grad_norm": 0.37109375, "learning_rate": 0.0019083223312385635, "loss": 0.2298, "step": 14770 }, { "epoch": 0.02619197697829481, "grad_norm": 1.234375, "learning_rate": 0.0019082961580538596, "loss": 0.2629, "step": 14772 }, { "epoch": 0.026195523143604623, "grad_norm": 0.330078125, "learning_rate": 0.0019082699813341374, "loss": 0.2463, "step": 14774 }, { "epoch": 0.026199069308914438, "grad_norm": 0.77734375, "learning_rate": 0.001908243801079512, "loss": 0.3223, "step": 14776 }, { "epoch": 0.026202615474224256, "grad_norm": 1.2734375, "learning_rate": 0.0019082176172900973, "loss": 0.4925, "step": 14778 }, { "epoch": 0.02620616163953407, "grad_norm": 0.55859375, "learning_rate": 0.0019081914299660086, "loss": 0.259, "step": 14780 }, { "epoch": 0.026209707804843885, "grad_norm": 1.9921875, "learning_rate": 0.00190816523910736, "loss": 0.3621, "step": 14782 }, { "epoch": 0.0262132539701537, "grad_norm": 0.41796875, "learning_rate": 0.001908139044714266, "loss": 0.3189, "step": 14784 }, { "epoch": 0.026216800135463514, "grad_norm": 0.353515625, "learning_rate": 0.0019081128467868414, "loss": 0.1443, "step": 14786 }, { "epoch": 0.02622034630077333, "grad_norm": 0.70703125, "learning_rate": 0.0019080866453252004, "loss": 0.185, "step": 14788 }, { "epoch": 0.026223892466083146, "grad_norm": 1.0390625, "learning_rate": 0.0019080604403294578, "loss": 0.241, "step": 14790 }, { "epoch": 0.02622743863139296, "grad_norm": 0.51171875, "learning_rate": 0.0019080342317997286, "loss": 0.2265, "step": 14792 }, { "epoch": 0.026230984796702776, "grad_norm": 0.765625, "learning_rate": 0.0019080080197361265, "loss": 0.4541, "step": 14794 }, { "epoch": 0.02623453096201259, "grad_norm": 0.43359375, "learning_rate": 0.0019079818041387669, "loss": 0.1784, "step": 14796 }, { "epoch": 0.026238077127322405, "grad_norm": 0.3515625, "learning_rate": 0.0019079555850077643, "loss": 0.1908, "step": 14798 }, { "epoch": 0.026241623292632223, "grad_norm": 0.35546875, "learning_rate": 0.0019079293623432332, "loss": 0.3, "step": 14800 }, { "epoch": 0.026245169457942037, "grad_norm": 0.49609375, "learning_rate": 0.0019079031361452885, "loss": 0.167, "step": 14802 }, { "epoch": 0.026248715623251852, "grad_norm": 0.8203125, "learning_rate": 0.0019078769064140447, "loss": 0.2549, "step": 14804 }, { "epoch": 0.026252261788561666, "grad_norm": 0.46484375, "learning_rate": 0.0019078506731496163, "loss": 0.2131, "step": 14806 }, { "epoch": 0.02625580795387148, "grad_norm": 0.474609375, "learning_rate": 0.0019078244363521185, "loss": 0.2364, "step": 14808 }, { "epoch": 0.026259354119181295, "grad_norm": 2.96875, "learning_rate": 0.001907798196021666, "loss": 0.2523, "step": 14810 }, { "epoch": 0.026262900284491113, "grad_norm": 0.349609375, "learning_rate": 0.0019077719521583731, "loss": 0.2722, "step": 14812 }, { "epoch": 0.026266446449800928, "grad_norm": 0.4296875, "learning_rate": 0.0019077457047623552, "loss": 0.3053, "step": 14814 }, { "epoch": 0.026269992615110743, "grad_norm": 0.400390625, "learning_rate": 0.0019077194538337269, "loss": 0.2543, "step": 14816 }, { "epoch": 0.026273538780420557, "grad_norm": 0.474609375, "learning_rate": 0.0019076931993726026, "loss": 0.2537, "step": 14818 }, { "epoch": 0.02627708494573037, "grad_norm": 3.375, "learning_rate": 0.0019076669413790974, "loss": 0.3939, "step": 14820 }, { "epoch": 0.026280631111040186, "grad_norm": 0.82421875, "learning_rate": 0.0019076406798533264, "loss": 0.2492, "step": 14822 }, { "epoch": 0.026284177276350004, "grad_norm": 0.5625, "learning_rate": 0.001907614414795404, "loss": 0.2189, "step": 14824 }, { "epoch": 0.02628772344165982, "grad_norm": 2.53125, "learning_rate": 0.001907588146205445, "loss": 0.1692, "step": 14826 }, { "epoch": 0.026291269606969633, "grad_norm": 0.578125, "learning_rate": 0.0019075618740835652, "loss": 0.2354, "step": 14828 }, { "epoch": 0.026294815772279448, "grad_norm": 0.625, "learning_rate": 0.0019075355984298783, "loss": 0.1972, "step": 14830 }, { "epoch": 0.026298361937589262, "grad_norm": 0.625, "learning_rate": 0.0019075093192445001, "loss": 0.1916, "step": 14832 }, { "epoch": 0.02630190810289908, "grad_norm": 0.353515625, "learning_rate": 0.001907483036527545, "loss": 0.2462, "step": 14834 }, { "epoch": 0.026305454268208895, "grad_norm": 1.078125, "learning_rate": 0.0019074567502791282, "loss": 0.3043, "step": 14836 }, { "epoch": 0.02630900043351871, "grad_norm": 4.40625, "learning_rate": 0.0019074304604993647, "loss": 0.3007, "step": 14838 }, { "epoch": 0.026312546598828524, "grad_norm": 0.314453125, "learning_rate": 0.0019074041671883692, "loss": 0.1707, "step": 14840 }, { "epoch": 0.02631609276413834, "grad_norm": 0.52734375, "learning_rate": 0.001907377870346257, "loss": 0.3716, "step": 14842 }, { "epoch": 0.026319638929448153, "grad_norm": 0.7265625, "learning_rate": 0.001907351569973143, "loss": 0.1962, "step": 14844 }, { "epoch": 0.02632318509475797, "grad_norm": 0.390625, "learning_rate": 0.001907325266069142, "loss": 0.2329, "step": 14846 }, { "epoch": 0.026326731260067786, "grad_norm": 1.359375, "learning_rate": 0.0019072989586343693, "loss": 0.2513, "step": 14848 }, { "epoch": 0.0263302774253776, "grad_norm": 1.609375, "learning_rate": 0.0019072726476689402, "loss": 0.6849, "step": 14850 }, { "epoch": 0.026333823590687415, "grad_norm": 0.7578125, "learning_rate": 0.001907246333172969, "loss": 0.2384, "step": 14852 }, { "epoch": 0.02633736975599723, "grad_norm": 0.55859375, "learning_rate": 0.0019072200151465715, "loss": 0.181, "step": 14854 }, { "epoch": 0.026340915921307044, "grad_norm": 1.2578125, "learning_rate": 0.0019071936935898624, "loss": 0.5526, "step": 14856 }, { "epoch": 0.026344462086616862, "grad_norm": 0.4296875, "learning_rate": 0.001907167368502957, "loss": 0.2215, "step": 14858 }, { "epoch": 0.026348008251926677, "grad_norm": 0.5234375, "learning_rate": 0.0019071410398859706, "loss": 0.2278, "step": 14860 }, { "epoch": 0.02635155441723649, "grad_norm": 0.5546875, "learning_rate": 0.0019071147077390181, "loss": 0.2034, "step": 14862 }, { "epoch": 0.026355100582546306, "grad_norm": 0.60546875, "learning_rate": 0.0019070883720622145, "loss": 0.2976, "step": 14864 }, { "epoch": 0.02635864674785612, "grad_norm": 4.53125, "learning_rate": 0.0019070620328556754, "loss": 0.2985, "step": 14866 }, { "epoch": 0.02636219291316594, "grad_norm": 0.609375, "learning_rate": 0.0019070356901195154, "loss": 0.2863, "step": 14868 }, { "epoch": 0.026365739078475753, "grad_norm": 0.2734375, "learning_rate": 0.0019070093438538502, "loss": 0.2001, "step": 14870 }, { "epoch": 0.026369285243785567, "grad_norm": 0.416015625, "learning_rate": 0.0019069829940587951, "loss": 0.3035, "step": 14872 }, { "epoch": 0.026372831409095382, "grad_norm": 0.443359375, "learning_rate": 0.0019069566407344653, "loss": 0.222, "step": 14874 }, { "epoch": 0.026376377574405196, "grad_norm": 0.396484375, "learning_rate": 0.0019069302838809754, "loss": 0.222, "step": 14876 }, { "epoch": 0.02637992373971501, "grad_norm": 6.21875, "learning_rate": 0.0019069039234984413, "loss": 0.3695, "step": 14878 }, { "epoch": 0.02638346990502483, "grad_norm": 0.62890625, "learning_rate": 0.0019068775595869783, "loss": 0.2371, "step": 14880 }, { "epoch": 0.026387016070334644, "grad_norm": 0.384765625, "learning_rate": 0.0019068511921467015, "loss": 0.2246, "step": 14882 }, { "epoch": 0.026390562235644458, "grad_norm": 2.984375, "learning_rate": 0.0019068248211777262, "loss": 0.2463, "step": 14884 }, { "epoch": 0.026394108400954273, "grad_norm": 0.345703125, "learning_rate": 0.0019067984466801678, "loss": 0.1926, "step": 14886 }, { "epoch": 0.026397654566264087, "grad_norm": 0.64453125, "learning_rate": 0.0019067720686541416, "loss": 0.1887, "step": 14888 }, { "epoch": 0.026401200731573902, "grad_norm": 0.435546875, "learning_rate": 0.0019067456870997628, "loss": 0.1676, "step": 14890 }, { "epoch": 0.02640474689688372, "grad_norm": 0.53515625, "learning_rate": 0.0019067193020171475, "loss": 0.242, "step": 14892 }, { "epoch": 0.026408293062193534, "grad_norm": 0.4453125, "learning_rate": 0.0019066929134064104, "loss": 0.2627, "step": 14894 }, { "epoch": 0.02641183922750335, "grad_norm": 0.306640625, "learning_rate": 0.0019066665212676668, "loss": 0.1893, "step": 14896 }, { "epoch": 0.026415385392813164, "grad_norm": 0.251953125, "learning_rate": 0.0019066401256010326, "loss": 0.2064, "step": 14898 }, { "epoch": 0.026418931558122978, "grad_norm": 0.4140625, "learning_rate": 0.001906613726406623, "loss": 0.2684, "step": 14900 }, { "epoch": 0.026422477723432796, "grad_norm": 0.66015625, "learning_rate": 0.0019065873236845535, "loss": 0.2618, "step": 14902 }, { "epoch": 0.02642602388874261, "grad_norm": 0.32421875, "learning_rate": 0.00190656091743494, "loss": 0.1992, "step": 14904 }, { "epoch": 0.026429570054052425, "grad_norm": 0.83984375, "learning_rate": 0.0019065345076578971, "loss": 0.3719, "step": 14906 }, { "epoch": 0.02643311621936224, "grad_norm": 0.5703125, "learning_rate": 0.0019065080943535405, "loss": 0.2528, "step": 14908 }, { "epoch": 0.026436662384672054, "grad_norm": 1.34375, "learning_rate": 0.0019064816775219862, "loss": 0.2649, "step": 14910 }, { "epoch": 0.02644020854998187, "grad_norm": 0.5859375, "learning_rate": 0.0019064552571633497, "loss": 0.1754, "step": 14912 }, { "epoch": 0.026443754715291687, "grad_norm": 0.7890625, "learning_rate": 0.0019064288332777462, "loss": 0.2324, "step": 14914 }, { "epoch": 0.0264473008806015, "grad_norm": 0.78125, "learning_rate": 0.0019064024058652914, "loss": 0.2982, "step": 14916 }, { "epoch": 0.026450847045911316, "grad_norm": 0.34375, "learning_rate": 0.0019063759749261009, "loss": 0.2612, "step": 14918 }, { "epoch": 0.02645439321122113, "grad_norm": 0.64453125, "learning_rate": 0.0019063495404602903, "loss": 0.225, "step": 14920 }, { "epoch": 0.026457939376530945, "grad_norm": 0.76953125, "learning_rate": 0.0019063231024679752, "loss": 0.2348, "step": 14922 }, { "epoch": 0.02646148554184076, "grad_norm": 0.310546875, "learning_rate": 0.0019062966609492713, "loss": 0.2207, "step": 14924 }, { "epoch": 0.026465031707150578, "grad_norm": 0.6953125, "learning_rate": 0.0019062702159042938, "loss": 0.297, "step": 14926 }, { "epoch": 0.026468577872460392, "grad_norm": 2.1875, "learning_rate": 0.0019062437673331592, "loss": 0.3689, "step": 14928 }, { "epoch": 0.026472124037770207, "grad_norm": 0.89453125, "learning_rate": 0.0019062173152359825, "loss": 0.2048, "step": 14930 }, { "epoch": 0.02647567020308002, "grad_norm": 0.40625, "learning_rate": 0.0019061908596128795, "loss": 0.2138, "step": 14932 }, { "epoch": 0.026479216368389836, "grad_norm": 0.515625, "learning_rate": 0.001906164400463966, "loss": 0.2556, "step": 14934 }, { "epoch": 0.026482762533699654, "grad_norm": 0.259765625, "learning_rate": 0.0019061379377893577, "loss": 0.1824, "step": 14936 }, { "epoch": 0.02648630869900947, "grad_norm": 0.5703125, "learning_rate": 0.0019061114715891705, "loss": 0.2387, "step": 14938 }, { "epoch": 0.026489854864319283, "grad_norm": 0.279296875, "learning_rate": 0.0019060850018635196, "loss": 0.1963, "step": 14940 }, { "epoch": 0.026493401029629098, "grad_norm": 0.5078125, "learning_rate": 0.0019060585286125213, "loss": 0.2328, "step": 14942 }, { "epoch": 0.026496947194938912, "grad_norm": 0.80078125, "learning_rate": 0.0019060320518362915, "loss": 0.315, "step": 14944 }, { "epoch": 0.026500493360248727, "grad_norm": 1.5859375, "learning_rate": 0.0019060055715349454, "loss": 0.3271, "step": 14946 }, { "epoch": 0.026504039525558545, "grad_norm": 0.345703125, "learning_rate": 0.0019059790877085992, "loss": 0.204, "step": 14948 }, { "epoch": 0.02650758569086836, "grad_norm": 0.41796875, "learning_rate": 0.0019059526003573685, "loss": 0.3085, "step": 14950 }, { "epoch": 0.026511131856178174, "grad_norm": 1.390625, "learning_rate": 0.0019059261094813693, "loss": 0.242, "step": 14952 }, { "epoch": 0.02651467802148799, "grad_norm": 0.640625, "learning_rate": 0.0019058996150807174, "loss": 0.3998, "step": 14954 }, { "epoch": 0.026518224186797803, "grad_norm": 1.75, "learning_rate": 0.001905873117155529, "loss": 0.3869, "step": 14956 }, { "epoch": 0.026521770352107617, "grad_norm": 1.234375, "learning_rate": 0.0019058466157059193, "loss": 0.33, "step": 14958 }, { "epoch": 0.026525316517417435, "grad_norm": 0.7578125, "learning_rate": 0.001905820110732005, "loss": 0.3124, "step": 14960 }, { "epoch": 0.02652886268272725, "grad_norm": 0.8125, "learning_rate": 0.0019057936022339014, "loss": 0.2348, "step": 14962 }, { "epoch": 0.026532408848037065, "grad_norm": 1.984375, "learning_rate": 0.0019057670902117249, "loss": 0.3382, "step": 14964 }, { "epoch": 0.02653595501334688, "grad_norm": 0.9375, "learning_rate": 0.0019057405746655907, "loss": 0.2266, "step": 14966 }, { "epoch": 0.026539501178656694, "grad_norm": 0.39453125, "learning_rate": 0.0019057140555956155, "loss": 0.2307, "step": 14968 }, { "epoch": 0.02654304734396651, "grad_norm": 0.255859375, "learning_rate": 0.001905687533001915, "loss": 0.3062, "step": 14970 }, { "epoch": 0.026546593509276326, "grad_norm": 0.515625, "learning_rate": 0.0019056610068846054, "loss": 0.2401, "step": 14972 }, { "epoch": 0.02655013967458614, "grad_norm": 0.373046875, "learning_rate": 0.0019056344772438024, "loss": 0.1732, "step": 14974 }, { "epoch": 0.026553685839895955, "grad_norm": 1.015625, "learning_rate": 0.0019056079440796221, "loss": 0.3149, "step": 14976 }, { "epoch": 0.02655723200520577, "grad_norm": 1.2890625, "learning_rate": 0.0019055814073921807, "loss": 0.1881, "step": 14978 }, { "epoch": 0.026560778170515584, "grad_norm": 0.388671875, "learning_rate": 0.0019055548671815942, "loss": 0.2802, "step": 14980 }, { "epoch": 0.026564324335825402, "grad_norm": 0.93359375, "learning_rate": 0.0019055283234479786, "loss": 0.2396, "step": 14982 }, { "epoch": 0.026567870501135217, "grad_norm": 0.62890625, "learning_rate": 0.0019055017761914499, "loss": 0.1673, "step": 14984 }, { "epoch": 0.02657141666644503, "grad_norm": 1.046875, "learning_rate": 0.0019054752254121243, "loss": 0.3285, "step": 14986 }, { "epoch": 0.026574962831754846, "grad_norm": 0.326171875, "learning_rate": 0.0019054486711101183, "loss": 0.1991, "step": 14988 }, { "epoch": 0.02657850899706466, "grad_norm": 1.0078125, "learning_rate": 0.0019054221132855473, "loss": 0.1926, "step": 14990 }, { "epoch": 0.026582055162374475, "grad_norm": 0.7734375, "learning_rate": 0.001905395551938528, "loss": 0.3694, "step": 14992 }, { "epoch": 0.026585601327684293, "grad_norm": 0.451171875, "learning_rate": 0.0019053689870691763, "loss": 0.1863, "step": 14994 }, { "epoch": 0.026589147492994108, "grad_norm": 0.396484375, "learning_rate": 0.0019053424186776084, "loss": 0.233, "step": 14996 }, { "epoch": 0.026592693658303922, "grad_norm": 0.66015625, "learning_rate": 0.001905315846763941, "loss": 0.2588, "step": 14998 }, { "epoch": 0.026596239823613737, "grad_norm": 0.306640625, "learning_rate": 0.0019052892713282892, "loss": 0.2344, "step": 15000 }, { "epoch": 0.02659978598892355, "grad_norm": 0.376953125, "learning_rate": 0.0019052626923707703, "loss": 0.1813, "step": 15002 }, { "epoch": 0.02660333215423337, "grad_norm": 0.3671875, "learning_rate": 0.0019052361098915002, "loss": 0.2422, "step": 15004 }, { "epoch": 0.026606878319543184, "grad_norm": 0.59375, "learning_rate": 0.001905209523890595, "loss": 0.2422, "step": 15006 }, { "epoch": 0.026610424484853, "grad_norm": 0.71875, "learning_rate": 0.0019051829343681708, "loss": 0.2918, "step": 15008 }, { "epoch": 0.026613970650162813, "grad_norm": 0.357421875, "learning_rate": 0.0019051563413243447, "loss": 0.168, "step": 15010 }, { "epoch": 0.026617516815472628, "grad_norm": 0.75, "learning_rate": 0.001905129744759232, "loss": 0.2462, "step": 15012 }, { "epoch": 0.026621062980782442, "grad_norm": 0.60546875, "learning_rate": 0.0019051031446729494, "loss": 0.2822, "step": 15014 }, { "epoch": 0.02662460914609226, "grad_norm": 0.3515625, "learning_rate": 0.0019050765410656137, "loss": 0.2151, "step": 15016 }, { "epoch": 0.026628155311402075, "grad_norm": 0.4765625, "learning_rate": 0.0019050499339373407, "loss": 0.1869, "step": 15018 }, { "epoch": 0.02663170147671189, "grad_norm": 0.49609375, "learning_rate": 0.0019050233232882467, "loss": 0.2133, "step": 15020 }, { "epoch": 0.026635247642021704, "grad_norm": 2.671875, "learning_rate": 0.0019049967091184484, "loss": 0.2579, "step": 15022 }, { "epoch": 0.02663879380733152, "grad_norm": 2.484375, "learning_rate": 0.0019049700914280621, "loss": 0.2855, "step": 15024 }, { "epoch": 0.026642339972641333, "grad_norm": 0.80078125, "learning_rate": 0.0019049434702172045, "loss": 0.232, "step": 15026 }, { "epoch": 0.02664588613795115, "grad_norm": 0.419921875, "learning_rate": 0.0019049168454859915, "loss": 0.2858, "step": 15028 }, { "epoch": 0.026649432303260966, "grad_norm": 0.578125, "learning_rate": 0.0019048902172345396, "loss": 0.363, "step": 15030 }, { "epoch": 0.02665297846857078, "grad_norm": 1.0390625, "learning_rate": 0.0019048635854629658, "loss": 0.2173, "step": 15032 }, { "epoch": 0.026656524633880595, "grad_norm": 0.9296875, "learning_rate": 0.0019048369501713856, "loss": 0.175, "step": 15034 }, { "epoch": 0.02666007079919041, "grad_norm": 0.37109375, "learning_rate": 0.0019048103113599166, "loss": 0.2386, "step": 15036 }, { "epoch": 0.026663616964500227, "grad_norm": 2.875, "learning_rate": 0.0019047836690286746, "loss": 0.2733, "step": 15038 }, { "epoch": 0.026667163129810042, "grad_norm": 0.62109375, "learning_rate": 0.0019047570231777765, "loss": 0.2928, "step": 15040 }, { "epoch": 0.026670709295119856, "grad_norm": 0.279296875, "learning_rate": 0.0019047303738073385, "loss": 0.3489, "step": 15042 }, { "epoch": 0.02667425546042967, "grad_norm": 1.9375, "learning_rate": 0.0019047037209174774, "loss": 0.2277, "step": 15044 }, { "epoch": 0.026677801625739486, "grad_norm": 2.203125, "learning_rate": 0.0019046770645083093, "loss": 0.4361, "step": 15046 }, { "epoch": 0.0266813477910493, "grad_norm": 0.3828125, "learning_rate": 0.0019046504045799513, "loss": 0.1842, "step": 15048 }, { "epoch": 0.026684893956359118, "grad_norm": 0.66015625, "learning_rate": 0.00190462374113252, "loss": 0.3395, "step": 15050 }, { "epoch": 0.026688440121668933, "grad_norm": 1.0390625, "learning_rate": 0.0019045970741661318, "loss": 0.512, "step": 15052 }, { "epoch": 0.026691986286978747, "grad_norm": 0.98828125, "learning_rate": 0.0019045704036809033, "loss": 0.308, "step": 15054 }, { "epoch": 0.026695532452288562, "grad_norm": 0.86328125, "learning_rate": 0.001904543729676951, "loss": 0.2244, "step": 15056 }, { "epoch": 0.026699078617598376, "grad_norm": 0.87890625, "learning_rate": 0.001904517052154392, "loss": 0.199, "step": 15058 }, { "epoch": 0.02670262478290819, "grad_norm": 3.6875, "learning_rate": 0.0019044903711133427, "loss": 0.3927, "step": 15060 }, { "epoch": 0.02670617094821801, "grad_norm": 0.76953125, "learning_rate": 0.00190446368655392, "loss": 0.2784, "step": 15062 }, { "epoch": 0.026709717113527823, "grad_norm": 15.75, "learning_rate": 0.0019044369984762403, "loss": 0.3221, "step": 15064 }, { "epoch": 0.026713263278837638, "grad_norm": 0.55859375, "learning_rate": 0.0019044103068804204, "loss": 0.3118, "step": 15066 }, { "epoch": 0.026716809444147453, "grad_norm": 0.69921875, "learning_rate": 0.001904383611766577, "loss": 0.2349, "step": 15068 }, { "epoch": 0.026720355609457267, "grad_norm": 0.423828125, "learning_rate": 0.001904356913134827, "loss": 0.2012, "step": 15070 }, { "epoch": 0.026723901774767085, "grad_norm": 0.3828125, "learning_rate": 0.0019043302109852868, "loss": 0.2411, "step": 15072 }, { "epoch": 0.0267274479400769, "grad_norm": 0.51171875, "learning_rate": 0.001904303505318074, "loss": 0.2508, "step": 15074 }, { "epoch": 0.026730994105386714, "grad_norm": 0.443359375, "learning_rate": 0.0019042767961333046, "loss": 0.2092, "step": 15076 }, { "epoch": 0.02673454027069653, "grad_norm": 0.458984375, "learning_rate": 0.0019042500834310955, "loss": 0.2352, "step": 15078 }, { "epoch": 0.026738086436006343, "grad_norm": 0.71484375, "learning_rate": 0.0019042233672115641, "loss": 0.2208, "step": 15080 }, { "epoch": 0.026741632601316158, "grad_norm": 0.4375, "learning_rate": 0.0019041966474748265, "loss": 0.2644, "step": 15082 }, { "epoch": 0.026745178766625976, "grad_norm": 0.45703125, "learning_rate": 0.0019041699242209997, "loss": 0.2331, "step": 15084 }, { "epoch": 0.02674872493193579, "grad_norm": 1.0390625, "learning_rate": 0.0019041431974502012, "loss": 0.2221, "step": 15086 }, { "epoch": 0.026752271097245605, "grad_norm": 0.5703125, "learning_rate": 0.0019041164671625478, "loss": 0.2456, "step": 15088 }, { "epoch": 0.02675581726255542, "grad_norm": 0.388671875, "learning_rate": 0.0019040897333581555, "loss": 0.2733, "step": 15090 }, { "epoch": 0.026759363427865234, "grad_norm": 0.5625, "learning_rate": 0.0019040629960371416, "loss": 0.3393, "step": 15092 }, { "epoch": 0.02676290959317505, "grad_norm": 1.140625, "learning_rate": 0.0019040362551996236, "loss": 0.2042, "step": 15094 }, { "epoch": 0.026766455758484867, "grad_norm": 1.25, "learning_rate": 0.0019040095108457179, "loss": 0.2111, "step": 15096 }, { "epoch": 0.02677000192379468, "grad_norm": 0.71484375, "learning_rate": 0.0019039827629755417, "loss": 0.1714, "step": 15098 }, { "epoch": 0.026773548089104496, "grad_norm": 0.9140625, "learning_rate": 0.0019039560115892118, "loss": 0.178, "step": 15100 }, { "epoch": 0.02677709425441431, "grad_norm": 0.34375, "learning_rate": 0.0019039292566868453, "loss": 0.2248, "step": 15102 }, { "epoch": 0.026780640419724125, "grad_norm": 1.1171875, "learning_rate": 0.001903902498268559, "loss": 0.271, "step": 15104 }, { "epoch": 0.026784186585033943, "grad_norm": 0.5703125, "learning_rate": 0.0019038757363344708, "loss": 0.2073, "step": 15106 }, { "epoch": 0.026787732750343757, "grad_norm": 0.5703125, "learning_rate": 0.0019038489708846967, "loss": 0.2431, "step": 15108 }, { "epoch": 0.026791278915653572, "grad_norm": 1.1640625, "learning_rate": 0.0019038222019193537, "loss": 0.2159, "step": 15110 }, { "epoch": 0.026794825080963387, "grad_norm": 0.67578125, "learning_rate": 0.00190379542943856, "loss": 0.2053, "step": 15112 }, { "epoch": 0.0267983712462732, "grad_norm": 0.46484375, "learning_rate": 0.0019037686534424316, "loss": 0.2193, "step": 15114 }, { "epoch": 0.026801917411583016, "grad_norm": 0.55078125, "learning_rate": 0.0019037418739310862, "loss": 0.3041, "step": 15116 }, { "epoch": 0.026805463576892834, "grad_norm": 0.55859375, "learning_rate": 0.0019037150909046406, "loss": 0.2518, "step": 15118 }, { "epoch": 0.026809009742202648, "grad_norm": 0.32421875, "learning_rate": 0.001903688304363212, "loss": 0.1878, "step": 15120 }, { "epoch": 0.026812555907512463, "grad_norm": 0.51171875, "learning_rate": 0.0019036615143069179, "loss": 0.2409, "step": 15122 }, { "epoch": 0.026816102072822277, "grad_norm": 0.349609375, "learning_rate": 0.0019036347207358748, "loss": 0.177, "step": 15124 }, { "epoch": 0.026819648238132092, "grad_norm": 1.9609375, "learning_rate": 0.0019036079236502005, "loss": 0.2686, "step": 15126 }, { "epoch": 0.026823194403441906, "grad_norm": 3.328125, "learning_rate": 0.0019035811230500117, "loss": 0.5553, "step": 15128 }, { "epoch": 0.026826740568751724, "grad_norm": 0.99609375, "learning_rate": 0.0019035543189354258, "loss": 0.4466, "step": 15130 }, { "epoch": 0.02683028673406154, "grad_norm": 0.451171875, "learning_rate": 0.0019035275113065602, "loss": 0.2988, "step": 15132 }, { "epoch": 0.026833832899371354, "grad_norm": 0.7578125, "learning_rate": 0.001903500700163532, "loss": 0.2313, "step": 15134 }, { "epoch": 0.026837379064681168, "grad_norm": 1.328125, "learning_rate": 0.0019034738855064587, "loss": 0.2671, "step": 15136 }, { "epoch": 0.026840925229990983, "grad_norm": 0.66796875, "learning_rate": 0.0019034470673354568, "loss": 0.1987, "step": 15138 }, { "epoch": 0.0268444713953008, "grad_norm": 1.109375, "learning_rate": 0.0019034202456506446, "loss": 0.2458, "step": 15140 }, { "epoch": 0.026848017560610615, "grad_norm": 0.1748046875, "learning_rate": 0.001903393420452139, "loss": 0.236, "step": 15142 }, { "epoch": 0.02685156372592043, "grad_norm": 0.67578125, "learning_rate": 0.0019033665917400568, "loss": 0.2526, "step": 15144 }, { "epoch": 0.026855109891230244, "grad_norm": 1.359375, "learning_rate": 0.001903339759514516, "loss": 0.3702, "step": 15146 }, { "epoch": 0.02685865605654006, "grad_norm": 0.58984375, "learning_rate": 0.0019033129237756338, "loss": 0.268, "step": 15148 }, { "epoch": 0.026862202221849873, "grad_norm": 0.3984375, "learning_rate": 0.0019032860845235274, "loss": 0.2284, "step": 15150 }, { "epoch": 0.02686574838715969, "grad_norm": 0.259765625, "learning_rate": 0.0019032592417583143, "loss": 0.1844, "step": 15152 }, { "epoch": 0.026869294552469506, "grad_norm": 0.734375, "learning_rate": 0.001903232395480112, "loss": 0.2287, "step": 15154 }, { "epoch": 0.02687284071777932, "grad_norm": 1.984375, "learning_rate": 0.0019032055456890374, "loss": 0.2131, "step": 15156 }, { "epoch": 0.026876386883089135, "grad_norm": 0.390625, "learning_rate": 0.0019031786923852084, "loss": 0.1591, "step": 15158 }, { "epoch": 0.02687993304839895, "grad_norm": 0.515625, "learning_rate": 0.0019031518355687425, "loss": 0.1689, "step": 15160 }, { "epoch": 0.026883479213708764, "grad_norm": 3.40625, "learning_rate": 0.001903124975239757, "loss": 0.2431, "step": 15162 }, { "epoch": 0.026887025379018582, "grad_norm": 0.478515625, "learning_rate": 0.0019030981113983692, "loss": 0.1768, "step": 15164 }, { "epoch": 0.026890571544328397, "grad_norm": 0.314453125, "learning_rate": 0.0019030712440446969, "loss": 0.2345, "step": 15166 }, { "epoch": 0.02689411770963821, "grad_norm": 1.0546875, "learning_rate": 0.0019030443731788576, "loss": 0.2273, "step": 15168 }, { "epoch": 0.026897663874948026, "grad_norm": 0.85546875, "learning_rate": 0.0019030174988009685, "loss": 0.1511, "step": 15170 }, { "epoch": 0.02690121004025784, "grad_norm": 0.451171875, "learning_rate": 0.0019029906209111474, "loss": 0.2022, "step": 15172 }, { "epoch": 0.02690475620556766, "grad_norm": 0.69140625, "learning_rate": 0.0019029637395095117, "loss": 0.4968, "step": 15174 }, { "epoch": 0.026908302370877473, "grad_norm": 0.294921875, "learning_rate": 0.001902936854596179, "loss": 0.3302, "step": 15176 }, { "epoch": 0.026911848536187288, "grad_norm": 0.71484375, "learning_rate": 0.0019029099661712668, "loss": 0.2483, "step": 15178 }, { "epoch": 0.026915394701497102, "grad_norm": 1.2421875, "learning_rate": 0.0019028830742348927, "loss": 0.415, "step": 15180 }, { "epoch": 0.026918940866806917, "grad_norm": 0.357421875, "learning_rate": 0.0019028561787871748, "loss": 0.2501, "step": 15182 }, { "epoch": 0.02692248703211673, "grad_norm": 0.890625, "learning_rate": 0.00190282927982823, "loss": 0.2902, "step": 15184 }, { "epoch": 0.02692603319742655, "grad_norm": 0.44921875, "learning_rate": 0.0019028023773581765, "loss": 0.3006, "step": 15186 }, { "epoch": 0.026929579362736364, "grad_norm": 0.53125, "learning_rate": 0.0019027754713771316, "loss": 0.2204, "step": 15188 }, { "epoch": 0.02693312552804618, "grad_norm": 0.4765625, "learning_rate": 0.0019027485618852132, "loss": 0.1845, "step": 15190 }, { "epoch": 0.026936671693355993, "grad_norm": 0.53515625, "learning_rate": 0.0019027216488825386, "loss": 0.2787, "step": 15192 }, { "epoch": 0.026940217858665808, "grad_norm": 0.8828125, "learning_rate": 0.0019026947323692262, "loss": 0.4492, "step": 15194 }, { "epoch": 0.026943764023975622, "grad_norm": 2.125, "learning_rate": 0.001902667812345393, "loss": 0.3507, "step": 15196 }, { "epoch": 0.02694731018928544, "grad_norm": 0.27734375, "learning_rate": 0.0019026408888111572, "loss": 0.238, "step": 15198 }, { "epoch": 0.026950856354595255, "grad_norm": 2.34375, "learning_rate": 0.0019026139617666362, "loss": 0.2794, "step": 15200 }, { "epoch": 0.02695440251990507, "grad_norm": 0.462890625, "learning_rate": 0.001902587031211948, "loss": 0.2536, "step": 15202 }, { "epoch": 0.026957948685214884, "grad_norm": 0.55078125, "learning_rate": 0.0019025600971472104, "loss": 0.2051, "step": 15204 }, { "epoch": 0.0269614948505247, "grad_norm": 0.8359375, "learning_rate": 0.0019025331595725413, "loss": 0.203, "step": 15206 }, { "epoch": 0.026965041015834516, "grad_norm": 0.2080078125, "learning_rate": 0.0019025062184880581, "loss": 0.1757, "step": 15208 }, { "epoch": 0.02696858718114433, "grad_norm": 0.400390625, "learning_rate": 0.0019024792738938786, "loss": 0.2133, "step": 15210 }, { "epoch": 0.026972133346454145, "grad_norm": 0.640625, "learning_rate": 0.0019024523257901214, "loss": 0.4925, "step": 15212 }, { "epoch": 0.02697567951176396, "grad_norm": 0.498046875, "learning_rate": 0.0019024253741769033, "loss": 0.1979, "step": 15214 }, { "epoch": 0.026979225677073775, "grad_norm": 0.921875, "learning_rate": 0.0019023984190543431, "loss": 0.2097, "step": 15216 }, { "epoch": 0.02698277184238359, "grad_norm": 0.63671875, "learning_rate": 0.0019023714604225583, "loss": 0.203, "step": 15218 }, { "epoch": 0.026986318007693407, "grad_norm": 0.40234375, "learning_rate": 0.0019023444982816666, "loss": 0.21, "step": 15220 }, { "epoch": 0.02698986417300322, "grad_norm": 0.390625, "learning_rate": 0.0019023175326317863, "loss": 0.257, "step": 15222 }, { "epoch": 0.026993410338313036, "grad_norm": 1.015625, "learning_rate": 0.0019022905634730352, "loss": 0.2313, "step": 15224 }, { "epoch": 0.02699695650362285, "grad_norm": 0.53515625, "learning_rate": 0.001902263590805531, "loss": 0.2813, "step": 15226 }, { "epoch": 0.027000502668932665, "grad_norm": 0.50390625, "learning_rate": 0.0019022366146293922, "loss": 0.2477, "step": 15228 }, { "epoch": 0.02700404883424248, "grad_norm": 1.6875, "learning_rate": 0.001902209634944736, "loss": 0.3164, "step": 15230 }, { "epoch": 0.027007594999552298, "grad_norm": 0.8203125, "learning_rate": 0.0019021826517516811, "loss": 0.2341, "step": 15232 }, { "epoch": 0.027011141164862112, "grad_norm": 1.0234375, "learning_rate": 0.0019021556650503453, "loss": 0.2046, "step": 15234 }, { "epoch": 0.027014687330171927, "grad_norm": 0.6484375, "learning_rate": 0.0019021286748408463, "loss": 0.197, "step": 15236 }, { "epoch": 0.02701823349548174, "grad_norm": 0.609375, "learning_rate": 0.0019021016811233027, "loss": 0.273, "step": 15238 }, { "epoch": 0.027021779660791556, "grad_norm": 1.484375, "learning_rate": 0.001902074683897832, "loss": 0.2518, "step": 15240 }, { "epoch": 0.027025325826101374, "grad_norm": 1.921875, "learning_rate": 0.0019020476831645527, "loss": 0.4678, "step": 15242 }, { "epoch": 0.02702887199141119, "grad_norm": 0.2890625, "learning_rate": 0.0019020206789235826, "loss": 0.1867, "step": 15244 }, { "epoch": 0.027032418156721003, "grad_norm": 7.03125, "learning_rate": 0.0019019936711750403, "loss": 0.3129, "step": 15246 }, { "epoch": 0.027035964322030818, "grad_norm": 0.451171875, "learning_rate": 0.001901966659919043, "loss": 0.2265, "step": 15248 }, { "epoch": 0.027039510487340632, "grad_norm": 0.3203125, "learning_rate": 0.0019019396451557095, "loss": 0.3175, "step": 15250 }, { "epoch": 0.027043056652650447, "grad_norm": 0.54296875, "learning_rate": 0.001901912626885158, "loss": 0.2248, "step": 15252 }, { "epoch": 0.027046602817960265, "grad_norm": 0.25390625, "learning_rate": 0.0019018856051075065, "loss": 0.2404, "step": 15254 }, { "epoch": 0.02705014898327008, "grad_norm": 0.89453125, "learning_rate": 0.001901858579822873, "loss": 0.2238, "step": 15256 }, { "epoch": 0.027053695148579894, "grad_norm": 0.69921875, "learning_rate": 0.0019018315510313758, "loss": 0.2281, "step": 15258 }, { "epoch": 0.02705724131388971, "grad_norm": 0.20703125, "learning_rate": 0.0019018045187331333, "loss": 0.2019, "step": 15260 }, { "epoch": 0.027060787479199523, "grad_norm": 0.302734375, "learning_rate": 0.0019017774829282635, "loss": 0.16, "step": 15262 }, { "epoch": 0.027064333644509338, "grad_norm": 0.4765625, "learning_rate": 0.0019017504436168846, "loss": 0.1872, "step": 15264 }, { "epoch": 0.027067879809819156, "grad_norm": 0.392578125, "learning_rate": 0.0019017234007991152, "loss": 0.2284, "step": 15266 }, { "epoch": 0.02707142597512897, "grad_norm": 0.6953125, "learning_rate": 0.0019016963544750732, "loss": 0.2375, "step": 15268 }, { "epoch": 0.027074972140438785, "grad_norm": 3.078125, "learning_rate": 0.001901669304644877, "loss": 0.2851, "step": 15270 }, { "epoch": 0.0270785183057486, "grad_norm": 0.6015625, "learning_rate": 0.0019016422513086454, "loss": 0.2036, "step": 15272 }, { "epoch": 0.027082064471058414, "grad_norm": 0.484375, "learning_rate": 0.0019016151944664956, "loss": 0.2692, "step": 15274 }, { "epoch": 0.027085610636368232, "grad_norm": 0.375, "learning_rate": 0.0019015881341185468, "loss": 0.2084, "step": 15276 }, { "epoch": 0.027089156801678047, "grad_norm": 0.66015625, "learning_rate": 0.0019015610702649173, "loss": 0.3184, "step": 15278 }, { "epoch": 0.02709270296698786, "grad_norm": 0.6875, "learning_rate": 0.0019015340029057248, "loss": 0.221, "step": 15280 }, { "epoch": 0.027096249132297676, "grad_norm": 0.890625, "learning_rate": 0.0019015069320410887, "loss": 0.1924, "step": 15282 }, { "epoch": 0.02709979529760749, "grad_norm": 0.345703125, "learning_rate": 0.0019014798576711266, "loss": 0.2075, "step": 15284 }, { "epoch": 0.027103341462917305, "grad_norm": 0.78515625, "learning_rate": 0.0019014527797959573, "loss": 0.3273, "step": 15286 }, { "epoch": 0.027106887628227123, "grad_norm": 0.3359375, "learning_rate": 0.001901425698415699, "loss": 0.3972, "step": 15288 }, { "epoch": 0.027110433793536937, "grad_norm": 1.046875, "learning_rate": 0.00190139861353047, "loss": 0.2948, "step": 15290 }, { "epoch": 0.027113979958846752, "grad_norm": 2.140625, "learning_rate": 0.0019013715251403893, "loss": 0.2245, "step": 15292 }, { "epoch": 0.027117526124156566, "grad_norm": 0.40234375, "learning_rate": 0.001901344433245575, "loss": 0.2347, "step": 15294 }, { "epoch": 0.02712107228946638, "grad_norm": 0.498046875, "learning_rate": 0.0019013173378461452, "loss": 0.2032, "step": 15296 }, { "epoch": 0.027124618454776196, "grad_norm": 0.86328125, "learning_rate": 0.0019012902389422192, "loss": 0.2114, "step": 15298 }, { "epoch": 0.027128164620086014, "grad_norm": 0.3515625, "learning_rate": 0.0019012631365339153, "loss": 0.2238, "step": 15300 }, { "epoch": 0.027131710785395828, "grad_norm": 1.15625, "learning_rate": 0.0019012360306213518, "loss": 0.2421, "step": 15302 }, { "epoch": 0.027135256950705643, "grad_norm": 0.38671875, "learning_rate": 0.0019012089212046468, "loss": 0.1971, "step": 15304 }, { "epoch": 0.027138803116015457, "grad_norm": 0.65234375, "learning_rate": 0.0019011818082839199, "loss": 0.2091, "step": 15306 }, { "epoch": 0.027142349281325272, "grad_norm": 0.283203125, "learning_rate": 0.001901154691859289, "loss": 0.2308, "step": 15308 }, { "epoch": 0.02714589544663509, "grad_norm": 0.7421875, "learning_rate": 0.001901127571930873, "loss": 0.2504, "step": 15310 }, { "epoch": 0.027149441611944904, "grad_norm": 0.48828125, "learning_rate": 0.0019011004484987902, "loss": 0.1844, "step": 15312 }, { "epoch": 0.02715298777725472, "grad_norm": 0.83984375, "learning_rate": 0.001901073321563159, "loss": 0.2676, "step": 15314 }, { "epoch": 0.027156533942564533, "grad_norm": 0.451171875, "learning_rate": 0.001901046191124099, "loss": 0.2291, "step": 15316 }, { "epoch": 0.027160080107874348, "grad_norm": 0.83203125, "learning_rate": 0.001901019057181728, "loss": 0.4434, "step": 15318 }, { "epoch": 0.027163626273184163, "grad_norm": 1.015625, "learning_rate": 0.0019009919197361651, "loss": 0.5163, "step": 15320 }, { "epoch": 0.02716717243849398, "grad_norm": 0.404296875, "learning_rate": 0.0019009647787875288, "loss": 0.2067, "step": 15322 }, { "epoch": 0.027170718603803795, "grad_norm": 0.244140625, "learning_rate": 0.0019009376343359374, "loss": 0.1615, "step": 15324 }, { "epoch": 0.02717426476911361, "grad_norm": 0.462890625, "learning_rate": 0.0019009104863815106, "loss": 0.197, "step": 15326 }, { "epoch": 0.027177810934423424, "grad_norm": 1.0390625, "learning_rate": 0.001900883334924366, "loss": 0.248, "step": 15328 }, { "epoch": 0.02718135709973324, "grad_norm": 0.404296875, "learning_rate": 0.0019008561799646233, "loss": 0.217, "step": 15330 }, { "epoch": 0.027184903265043053, "grad_norm": 0.51171875, "learning_rate": 0.001900829021502401, "loss": 0.2013, "step": 15332 }, { "epoch": 0.02718844943035287, "grad_norm": 0.5078125, "learning_rate": 0.0019008018595378174, "loss": 0.2051, "step": 15334 }, { "epoch": 0.027191995595662686, "grad_norm": 0.50390625, "learning_rate": 0.0019007746940709917, "loss": 0.2385, "step": 15336 }, { "epoch": 0.0271955417609725, "grad_norm": 0.42578125, "learning_rate": 0.0019007475251020425, "loss": 0.2572, "step": 15338 }, { "epoch": 0.027199087926282315, "grad_norm": 2.34375, "learning_rate": 0.001900720352631089, "loss": 0.2728, "step": 15340 }, { "epoch": 0.02720263409159213, "grad_norm": 0.7265625, "learning_rate": 0.0019006931766582498, "loss": 0.2144, "step": 15342 }, { "epoch": 0.027206180256901948, "grad_norm": 0.341796875, "learning_rate": 0.0019006659971836436, "loss": 0.237, "step": 15344 }, { "epoch": 0.027209726422211762, "grad_norm": 0.58203125, "learning_rate": 0.0019006388142073893, "loss": 0.2356, "step": 15346 }, { "epoch": 0.027213272587521577, "grad_norm": 0.6015625, "learning_rate": 0.0019006116277296061, "loss": 0.2006, "step": 15348 }, { "epoch": 0.02721681875283139, "grad_norm": 1.515625, "learning_rate": 0.0019005844377504124, "loss": 0.2658, "step": 15350 }, { "epoch": 0.027220364918141206, "grad_norm": 0.66796875, "learning_rate": 0.0019005572442699278, "loss": 0.2551, "step": 15352 }, { "epoch": 0.02722391108345102, "grad_norm": 0.7578125, "learning_rate": 0.0019005300472882706, "loss": 0.1957, "step": 15354 }, { "epoch": 0.02722745724876084, "grad_norm": 0.640625, "learning_rate": 0.00190050284680556, "loss": 0.2783, "step": 15356 }, { "epoch": 0.027231003414070653, "grad_norm": 1.4453125, "learning_rate": 0.0019004756428219153, "loss": 0.2551, "step": 15358 }, { "epoch": 0.027234549579380467, "grad_norm": 2.21875, "learning_rate": 0.0019004484353374548, "loss": 0.2885, "step": 15360 }, { "epoch": 0.027238095744690282, "grad_norm": 0.1982421875, "learning_rate": 0.0019004212243522976, "loss": 0.2348, "step": 15362 }, { "epoch": 0.027241641910000097, "grad_norm": 0.330078125, "learning_rate": 0.0019003940098665628, "loss": 0.2123, "step": 15364 }, { "epoch": 0.02724518807530991, "grad_norm": 1.0390625, "learning_rate": 0.00190036679188037, "loss": 0.2455, "step": 15366 }, { "epoch": 0.02724873424061973, "grad_norm": 1.0078125, "learning_rate": 0.0019003395703938374, "loss": 0.3356, "step": 15368 }, { "epoch": 0.027252280405929544, "grad_norm": 0.5, "learning_rate": 0.001900312345407085, "loss": 0.1785, "step": 15370 }, { "epoch": 0.027255826571239358, "grad_norm": 1.796875, "learning_rate": 0.0019002851169202307, "loss": 0.4332, "step": 15372 }, { "epoch": 0.027259372736549173, "grad_norm": 1.5, "learning_rate": 0.0019002578849333942, "loss": 0.2256, "step": 15374 }, { "epoch": 0.027262918901858987, "grad_norm": 0.765625, "learning_rate": 0.001900230649446695, "loss": 0.2256, "step": 15376 }, { "epoch": 0.027266465067168805, "grad_norm": 0.61328125, "learning_rate": 0.0019002034104602513, "loss": 0.1571, "step": 15378 }, { "epoch": 0.02727001123247862, "grad_norm": 0.43359375, "learning_rate": 0.001900176167974183, "loss": 0.3175, "step": 15380 }, { "epoch": 0.027273557397788434, "grad_norm": 2.109375, "learning_rate": 0.0019001489219886087, "loss": 0.3228, "step": 15382 }, { "epoch": 0.02727710356309825, "grad_norm": 0.72265625, "learning_rate": 0.0019001216725036479, "loss": 0.2183, "step": 15384 }, { "epoch": 0.027280649728408064, "grad_norm": 0.365234375, "learning_rate": 0.0019000944195194196, "loss": 0.1846, "step": 15386 }, { "epoch": 0.027284195893717878, "grad_norm": 0.494140625, "learning_rate": 0.001900067163036043, "loss": 0.2067, "step": 15388 }, { "epoch": 0.027287742059027696, "grad_norm": 1.1484375, "learning_rate": 0.0019000399030536378, "loss": 0.2178, "step": 15390 }, { "epoch": 0.02729128822433751, "grad_norm": 1.0625, "learning_rate": 0.0019000126395723224, "loss": 0.2557, "step": 15392 }, { "epoch": 0.027294834389647325, "grad_norm": 0.609375, "learning_rate": 0.0018999853725922163, "loss": 0.5676, "step": 15394 }, { "epoch": 0.02729838055495714, "grad_norm": 0.80859375, "learning_rate": 0.0018999581021134392, "loss": 0.2546, "step": 15396 }, { "epoch": 0.027301926720266954, "grad_norm": 0.47265625, "learning_rate": 0.0018999308281361096, "loss": 0.2156, "step": 15398 }, { "epoch": 0.02730547288557677, "grad_norm": 1.203125, "learning_rate": 0.0018999035506603477, "loss": 0.4945, "step": 15400 }, { "epoch": 0.027309019050886587, "grad_norm": 0.310546875, "learning_rate": 0.0018998762696862722, "loss": 0.2199, "step": 15402 }, { "epoch": 0.0273125652161964, "grad_norm": 1.3046875, "learning_rate": 0.0018998489852140022, "loss": 0.2495, "step": 15404 }, { "epoch": 0.027316111381506216, "grad_norm": 0.255859375, "learning_rate": 0.0018998216972436577, "loss": 0.4403, "step": 15406 }, { "epoch": 0.02731965754681603, "grad_norm": 0.6640625, "learning_rate": 0.0018997944057753575, "loss": 0.2146, "step": 15408 }, { "epoch": 0.027323203712125845, "grad_norm": 0.890625, "learning_rate": 0.001899767110809221, "loss": 0.2135, "step": 15410 }, { "epoch": 0.027326749877435663, "grad_norm": 0.69921875, "learning_rate": 0.001899739812345368, "loss": 0.2371, "step": 15412 }, { "epoch": 0.027330296042745478, "grad_norm": 0.6875, "learning_rate": 0.0018997125103839177, "loss": 0.2115, "step": 15414 }, { "epoch": 0.027333842208055292, "grad_norm": 4.75, "learning_rate": 0.0018996852049249892, "loss": 0.3307, "step": 15416 }, { "epoch": 0.027337388373365107, "grad_norm": 0.53515625, "learning_rate": 0.0018996578959687021, "loss": 0.1585, "step": 15418 }, { "epoch": 0.02734093453867492, "grad_norm": 0.5859375, "learning_rate": 0.0018996305835151759, "loss": 0.2031, "step": 15420 }, { "epoch": 0.027344480703984736, "grad_norm": 0.5625, "learning_rate": 0.00189960326756453, "loss": 0.2385, "step": 15422 }, { "epoch": 0.027348026869294554, "grad_norm": 0.5703125, "learning_rate": 0.001899575948116884, "loss": 0.2273, "step": 15424 }, { "epoch": 0.02735157303460437, "grad_norm": 0.443359375, "learning_rate": 0.001899548625172357, "loss": 0.2049, "step": 15426 }, { "epoch": 0.027355119199914183, "grad_norm": 0.5625, "learning_rate": 0.001899521298731069, "loss": 0.2159, "step": 15428 }, { "epoch": 0.027358665365223998, "grad_norm": 1.96875, "learning_rate": 0.0018994939687931392, "loss": 0.3147, "step": 15430 }, { "epoch": 0.027362211530533812, "grad_norm": 0.53125, "learning_rate": 0.001899466635358687, "loss": 0.2018, "step": 15432 }, { "epoch": 0.027365757695843627, "grad_norm": 0.30859375, "learning_rate": 0.0018994392984278323, "loss": 0.2216, "step": 15434 }, { "epoch": 0.027369303861153445, "grad_norm": 2.421875, "learning_rate": 0.0018994119580006946, "loss": 0.3849, "step": 15436 }, { "epoch": 0.02737285002646326, "grad_norm": 0.80078125, "learning_rate": 0.001899384614077393, "loss": 0.2914, "step": 15438 }, { "epoch": 0.027376396191773074, "grad_norm": 0.87109375, "learning_rate": 0.0018993572666580476, "loss": 0.3477, "step": 15440 }, { "epoch": 0.02737994235708289, "grad_norm": 0.318359375, "learning_rate": 0.001899329915742778, "loss": 0.1767, "step": 15442 }, { "epoch": 0.027383488522392703, "grad_norm": 0.5703125, "learning_rate": 0.0018993025613317034, "loss": 0.1951, "step": 15444 }, { "epoch": 0.02738703468770252, "grad_norm": 2.109375, "learning_rate": 0.0018992752034249437, "loss": 0.4441, "step": 15446 }, { "epoch": 0.027390580853012336, "grad_norm": 0.376953125, "learning_rate": 0.0018992478420226187, "loss": 0.1852, "step": 15448 }, { "epoch": 0.02739412701832215, "grad_norm": 0.49609375, "learning_rate": 0.0018992204771248477, "loss": 0.1859, "step": 15450 }, { "epoch": 0.027397673183631965, "grad_norm": 2.140625, "learning_rate": 0.0018991931087317508, "loss": 0.2127, "step": 15452 }, { "epoch": 0.02740121934894178, "grad_norm": 0.3359375, "learning_rate": 0.0018991657368434476, "loss": 0.3461, "step": 15454 }, { "epoch": 0.027404765514251594, "grad_norm": 1.0859375, "learning_rate": 0.0018991383614600575, "loss": 0.2311, "step": 15456 }, { "epoch": 0.027408311679561412, "grad_norm": 1.6640625, "learning_rate": 0.0018991109825817001, "loss": 0.3202, "step": 15458 }, { "epoch": 0.027411857844871226, "grad_norm": 0.3515625, "learning_rate": 0.0018990836002084957, "loss": 0.2116, "step": 15460 }, { "epoch": 0.02741540401018104, "grad_norm": 0.67578125, "learning_rate": 0.0018990562143405638, "loss": 0.2348, "step": 15462 }, { "epoch": 0.027418950175490855, "grad_norm": 0.37890625, "learning_rate": 0.0018990288249780243, "loss": 0.1984, "step": 15464 }, { "epoch": 0.02742249634080067, "grad_norm": 0.98046875, "learning_rate": 0.0018990014321209965, "loss": 0.2039, "step": 15466 }, { "epoch": 0.027426042506110485, "grad_norm": 0.95703125, "learning_rate": 0.0018989740357696011, "loss": 0.2799, "step": 15468 }, { "epoch": 0.027429588671420303, "grad_norm": 0.6328125, "learning_rate": 0.0018989466359239566, "loss": 0.1798, "step": 15470 }, { "epoch": 0.027433134836730117, "grad_norm": 1.5078125, "learning_rate": 0.0018989192325841841, "loss": 0.2245, "step": 15472 }, { "epoch": 0.02743668100203993, "grad_norm": 1.0546875, "learning_rate": 0.001898891825750403, "loss": 0.2221, "step": 15474 }, { "epoch": 0.027440227167349746, "grad_norm": 0.4140625, "learning_rate": 0.001898864415422733, "loss": 0.3451, "step": 15476 }, { "epoch": 0.02744377333265956, "grad_norm": 1.015625, "learning_rate": 0.001898837001601294, "loss": 0.2383, "step": 15478 }, { "epoch": 0.02744731949796938, "grad_norm": 0.85546875, "learning_rate": 0.001898809584286206, "loss": 0.2777, "step": 15480 }, { "epoch": 0.027450865663279193, "grad_norm": 0.53125, "learning_rate": 0.001898782163477589, "loss": 0.2234, "step": 15482 }, { "epoch": 0.027454411828589008, "grad_norm": 2.0, "learning_rate": 0.0018987547391755627, "loss": 0.2381, "step": 15484 }, { "epoch": 0.027457957993898822, "grad_norm": 1.796875, "learning_rate": 0.0018987273113802473, "loss": 0.2826, "step": 15486 }, { "epoch": 0.027461504159208637, "grad_norm": 0.490234375, "learning_rate": 0.001898699880091762, "loss": 0.4324, "step": 15488 }, { "epoch": 0.02746505032451845, "grad_norm": 0.52734375, "learning_rate": 0.001898672445310228, "loss": 0.1801, "step": 15490 }, { "epoch": 0.02746859648982827, "grad_norm": 1.234375, "learning_rate": 0.0018986450070357646, "loss": 0.1696, "step": 15492 }, { "epoch": 0.027472142655138084, "grad_norm": 0.345703125, "learning_rate": 0.0018986175652684917, "loss": 0.2298, "step": 15494 }, { "epoch": 0.0274756888204479, "grad_norm": 1.6015625, "learning_rate": 0.0018985901200085293, "loss": 0.2746, "step": 15496 }, { "epoch": 0.027479234985757713, "grad_norm": 3.890625, "learning_rate": 0.001898562671255998, "loss": 0.1679, "step": 15498 }, { "epoch": 0.027482781151067528, "grad_norm": 0.86328125, "learning_rate": 0.001898535219011017, "loss": 0.2745, "step": 15500 }, { "epoch": 0.027486327316377342, "grad_norm": 0.5078125, "learning_rate": 0.0018985077632737073, "loss": 0.2252, "step": 15502 }, { "epoch": 0.02748987348168716, "grad_norm": 0.63671875, "learning_rate": 0.0018984803040441877, "loss": 0.2374, "step": 15504 }, { "epoch": 0.027493419646996975, "grad_norm": 0.365234375, "learning_rate": 0.0018984528413225799, "loss": 0.2092, "step": 15506 }, { "epoch": 0.02749696581230679, "grad_norm": 1.3359375, "learning_rate": 0.001898425375109003, "loss": 0.1766, "step": 15508 }, { "epoch": 0.027500511977616604, "grad_norm": 0.65625, "learning_rate": 0.0018983979054035768, "loss": 0.218, "step": 15510 }, { "epoch": 0.02750405814292642, "grad_norm": 0.435546875, "learning_rate": 0.0018983704322064223, "loss": 0.2114, "step": 15512 }, { "epoch": 0.027507604308236237, "grad_norm": 2.234375, "learning_rate": 0.001898342955517659, "loss": 0.2277, "step": 15514 }, { "epoch": 0.02751115047354605, "grad_norm": 1.890625, "learning_rate": 0.0018983154753374077, "loss": 0.2003, "step": 15516 }, { "epoch": 0.027514696638855866, "grad_norm": 7.65625, "learning_rate": 0.0018982879916657881, "loss": 0.2852, "step": 15518 }, { "epoch": 0.02751824280416568, "grad_norm": 0.51171875, "learning_rate": 0.0018982605045029205, "loss": 0.2034, "step": 15520 }, { "epoch": 0.027521788969475495, "grad_norm": 3.109375, "learning_rate": 0.0018982330138489253, "loss": 0.4603, "step": 15522 }, { "epoch": 0.02752533513478531, "grad_norm": 1.921875, "learning_rate": 0.001898205519703922, "loss": 0.2909, "step": 15524 }, { "epoch": 0.027528881300095127, "grad_norm": 2.78125, "learning_rate": 0.0018981780220680318, "loss": 0.3515, "step": 15526 }, { "epoch": 0.027532427465404942, "grad_norm": 0.8125, "learning_rate": 0.0018981505209413748, "loss": 0.2402, "step": 15528 }, { "epoch": 0.027535973630714757, "grad_norm": 0.3125, "learning_rate": 0.0018981230163240708, "loss": 0.2306, "step": 15530 }, { "epoch": 0.02753951979602457, "grad_norm": 0.43359375, "learning_rate": 0.0018980955082162402, "loss": 0.2403, "step": 15532 }, { "epoch": 0.027543065961334386, "grad_norm": 0.90234375, "learning_rate": 0.0018980679966180039, "loss": 0.2921, "step": 15534 }, { "epoch": 0.0275466121266442, "grad_norm": 0.58984375, "learning_rate": 0.0018980404815294811, "loss": 0.2111, "step": 15536 }, { "epoch": 0.027550158291954018, "grad_norm": 0.6953125, "learning_rate": 0.0018980129629507932, "loss": 0.1853, "step": 15538 }, { "epoch": 0.027553704457263833, "grad_norm": 0.279296875, "learning_rate": 0.0018979854408820603, "loss": 0.1969, "step": 15540 }, { "epoch": 0.027557250622573647, "grad_norm": 0.298828125, "learning_rate": 0.0018979579153234024, "loss": 0.2102, "step": 15542 }, { "epoch": 0.027560796787883462, "grad_norm": 1.0078125, "learning_rate": 0.00189793038627494, "loss": 0.2929, "step": 15544 }, { "epoch": 0.027564342953193276, "grad_norm": 1.4375, "learning_rate": 0.0018979028537367934, "loss": 0.2484, "step": 15546 }, { "epoch": 0.027567889118503094, "grad_norm": 0.357421875, "learning_rate": 0.0018978753177090837, "loss": 0.2442, "step": 15548 }, { "epoch": 0.02757143528381291, "grad_norm": 0.6640625, "learning_rate": 0.0018978477781919306, "loss": 0.21, "step": 15550 }, { "epoch": 0.027574981449122724, "grad_norm": 0.69140625, "learning_rate": 0.0018978202351854546, "loss": 0.2478, "step": 15552 }, { "epoch": 0.027578527614432538, "grad_norm": 0.625, "learning_rate": 0.0018977926886897766, "loss": 0.2219, "step": 15554 }, { "epoch": 0.027582073779742353, "grad_norm": 0.2041015625, "learning_rate": 0.0018977651387050165, "loss": 0.2007, "step": 15556 }, { "epoch": 0.027585619945052167, "grad_norm": 0.30078125, "learning_rate": 0.0018977375852312955, "loss": 0.2055, "step": 15558 }, { "epoch": 0.027589166110361985, "grad_norm": 0.91015625, "learning_rate": 0.0018977100282687332, "loss": 0.2607, "step": 15560 }, { "epoch": 0.0275927122756718, "grad_norm": 0.369140625, "learning_rate": 0.0018976824678174507, "loss": 0.1687, "step": 15562 }, { "epoch": 0.027596258440981614, "grad_norm": 0.71484375, "learning_rate": 0.0018976549038775686, "loss": 0.1937, "step": 15564 }, { "epoch": 0.02759980460629143, "grad_norm": 2.0625, "learning_rate": 0.0018976273364492073, "loss": 0.1824, "step": 15566 }, { "epoch": 0.027603350771601243, "grad_norm": 1.0859375, "learning_rate": 0.0018975997655324873, "loss": 0.2877, "step": 15568 }, { "epoch": 0.027606896936911058, "grad_norm": 0.404296875, "learning_rate": 0.001897572191127529, "loss": 0.2208, "step": 15570 }, { "epoch": 0.027610443102220876, "grad_norm": 0.37890625, "learning_rate": 0.0018975446132344536, "loss": 0.1956, "step": 15572 }, { "epoch": 0.02761398926753069, "grad_norm": 5.5, "learning_rate": 0.0018975170318533813, "loss": 0.322, "step": 15574 }, { "epoch": 0.027617535432840505, "grad_norm": 1.2734375, "learning_rate": 0.0018974894469844326, "loss": 0.1873, "step": 15576 }, { "epoch": 0.02762108159815032, "grad_norm": 0.3046875, "learning_rate": 0.0018974618586277284, "loss": 0.3496, "step": 15578 }, { "epoch": 0.027624627763460134, "grad_norm": 0.4375, "learning_rate": 0.0018974342667833891, "loss": 0.2293, "step": 15580 }, { "epoch": 0.027628173928769952, "grad_norm": 0.81640625, "learning_rate": 0.0018974066714515354, "loss": 0.1992, "step": 15582 }, { "epoch": 0.027631720094079767, "grad_norm": 2.375, "learning_rate": 0.0018973790726322885, "loss": 0.4927, "step": 15584 }, { "epoch": 0.02763526625938958, "grad_norm": 0.80078125, "learning_rate": 0.0018973514703257684, "loss": 0.2591, "step": 15586 }, { "epoch": 0.027638812424699396, "grad_norm": 0.72265625, "learning_rate": 0.001897323864532096, "loss": 0.2868, "step": 15588 }, { "epoch": 0.02764235859000921, "grad_norm": 1.421875, "learning_rate": 0.0018972962552513926, "loss": 0.2461, "step": 15590 }, { "epoch": 0.027645904755319025, "grad_norm": 1.3046875, "learning_rate": 0.0018972686424837781, "loss": 0.2519, "step": 15592 }, { "epoch": 0.027649450920628843, "grad_norm": 0.640625, "learning_rate": 0.001897241026229374, "loss": 0.1797, "step": 15594 }, { "epoch": 0.027652997085938658, "grad_norm": 0.291015625, "learning_rate": 0.0018972134064883005, "loss": 0.2251, "step": 15596 }, { "epoch": 0.027656543251248472, "grad_norm": 1.6953125, "learning_rate": 0.0018971857832606788, "loss": 0.2488, "step": 15598 }, { "epoch": 0.027660089416558287, "grad_norm": 0.53125, "learning_rate": 0.0018971581565466293, "loss": 0.2194, "step": 15600 }, { "epoch": 0.0276636355818681, "grad_norm": 0.80078125, "learning_rate": 0.0018971305263462733, "loss": 0.2385, "step": 15602 }, { "epoch": 0.027667181747177916, "grad_norm": 1.4453125, "learning_rate": 0.0018971028926597316, "loss": 0.2466, "step": 15604 }, { "epoch": 0.027670727912487734, "grad_norm": 0.8515625, "learning_rate": 0.0018970752554871245, "loss": 0.2331, "step": 15606 }, { "epoch": 0.02767427407779755, "grad_norm": 1.8203125, "learning_rate": 0.0018970476148285735, "loss": 0.1913, "step": 15608 }, { "epoch": 0.027677820243107363, "grad_norm": 0.87890625, "learning_rate": 0.0018970199706841988, "loss": 0.2529, "step": 15610 }, { "epoch": 0.027681366408417177, "grad_norm": 1.125, "learning_rate": 0.0018969923230541222, "loss": 0.3198, "step": 15612 }, { "epoch": 0.027684912573726992, "grad_norm": 0.40234375, "learning_rate": 0.0018969646719384638, "loss": 0.2148, "step": 15614 }, { "epoch": 0.02768845873903681, "grad_norm": 0.609375, "learning_rate": 0.001896937017337345, "loss": 0.216, "step": 15616 }, { "epoch": 0.027692004904346625, "grad_norm": 0.58203125, "learning_rate": 0.0018969093592508864, "loss": 0.2154, "step": 15618 }, { "epoch": 0.02769555106965644, "grad_norm": 3.734375, "learning_rate": 0.0018968816976792093, "loss": 0.2969, "step": 15620 }, { "epoch": 0.027699097234966254, "grad_norm": 0.40625, "learning_rate": 0.0018968540326224346, "loss": 0.1977, "step": 15622 }, { "epoch": 0.027702643400276068, "grad_norm": 0.51171875, "learning_rate": 0.001896826364080683, "loss": 0.297, "step": 15624 }, { "epoch": 0.027706189565585883, "grad_norm": 0.462890625, "learning_rate": 0.0018967986920540763, "loss": 0.2014, "step": 15626 }, { "epoch": 0.0277097357308957, "grad_norm": 2.046875, "learning_rate": 0.0018967710165427344, "loss": 0.2084, "step": 15628 }, { "epoch": 0.027713281896205515, "grad_norm": 0.39453125, "learning_rate": 0.0018967433375467792, "loss": 0.2166, "step": 15630 }, { "epoch": 0.02771682806151533, "grad_norm": 0.359375, "learning_rate": 0.0018967156550663312, "loss": 0.2063, "step": 15632 }, { "epoch": 0.027720374226825144, "grad_norm": 0.640625, "learning_rate": 0.0018966879691015117, "loss": 0.2084, "step": 15634 }, { "epoch": 0.02772392039213496, "grad_norm": 0.65234375, "learning_rate": 0.0018966602796524421, "loss": 0.2782, "step": 15636 }, { "epoch": 0.027727466557444774, "grad_norm": 1.078125, "learning_rate": 0.0018966325867192432, "loss": 0.1774, "step": 15638 }, { "epoch": 0.02773101272275459, "grad_norm": 4.03125, "learning_rate": 0.001896604890302036, "loss": 0.3807, "step": 15640 }, { "epoch": 0.027734558888064406, "grad_norm": 0.1953125, "learning_rate": 0.0018965771904009415, "loss": 0.2192, "step": 15642 }, { "epoch": 0.02773810505337422, "grad_norm": 0.4921875, "learning_rate": 0.001896549487016081, "loss": 0.2476, "step": 15644 }, { "epoch": 0.027741651218684035, "grad_norm": 0.67578125, "learning_rate": 0.001896521780147576, "loss": 0.2919, "step": 15646 }, { "epoch": 0.02774519738399385, "grad_norm": 0.482421875, "learning_rate": 0.0018964940697955475, "loss": 0.176, "step": 15648 }, { "epoch": 0.027748743549303664, "grad_norm": 1.1796875, "learning_rate": 0.0018964663559601163, "loss": 0.2488, "step": 15650 }, { "epoch": 0.027752289714613482, "grad_norm": 0.44921875, "learning_rate": 0.0018964386386414042, "loss": 0.1735, "step": 15652 }, { "epoch": 0.027755835879923297, "grad_norm": 0.4375, "learning_rate": 0.001896410917839532, "loss": 0.1998, "step": 15654 }, { "epoch": 0.02775938204523311, "grad_norm": 0.48828125, "learning_rate": 0.0018963831935546209, "loss": 0.2088, "step": 15656 }, { "epoch": 0.027762928210542926, "grad_norm": 0.2255859375, "learning_rate": 0.0018963554657867926, "loss": 0.2592, "step": 15658 }, { "epoch": 0.02776647437585274, "grad_norm": 0.50390625, "learning_rate": 0.0018963277345361677, "loss": 0.1769, "step": 15660 }, { "epoch": 0.02777002054116256, "grad_norm": 1.4140625, "learning_rate": 0.0018962999998028683, "loss": 0.2791, "step": 15662 }, { "epoch": 0.027773566706472373, "grad_norm": 0.466796875, "learning_rate": 0.001896272261587015, "loss": 0.2015, "step": 15664 }, { "epoch": 0.027777112871782188, "grad_norm": 3.015625, "learning_rate": 0.0018962445198887296, "loss": 0.2435, "step": 15666 }, { "epoch": 0.027780659037092002, "grad_norm": 4.0625, "learning_rate": 0.001896216774708133, "loss": 0.237, "step": 15668 }, { "epoch": 0.027784205202401817, "grad_norm": 0.5703125, "learning_rate": 0.0018961890260453464, "loss": 0.2398, "step": 15670 }, { "epoch": 0.02778775136771163, "grad_norm": 0.6796875, "learning_rate": 0.0018961612739004919, "loss": 0.1938, "step": 15672 }, { "epoch": 0.02779129753302145, "grad_norm": 0.73046875, "learning_rate": 0.0018961335182736904, "loss": 0.2593, "step": 15674 }, { "epoch": 0.027794843698331264, "grad_norm": 1.2265625, "learning_rate": 0.001896105759165063, "loss": 0.243, "step": 15676 }, { "epoch": 0.02779838986364108, "grad_norm": 1.921875, "learning_rate": 0.001896077996574732, "loss": 0.2006, "step": 15678 }, { "epoch": 0.027801936028950893, "grad_norm": 0.875, "learning_rate": 0.0018960502305028178, "loss": 0.3268, "step": 15680 }, { "epoch": 0.027805482194260708, "grad_norm": 0.412109375, "learning_rate": 0.0018960224609494423, "loss": 0.1961, "step": 15682 }, { "epoch": 0.027809028359570522, "grad_norm": 2.484375, "learning_rate": 0.0018959946879147274, "loss": 0.3464, "step": 15684 }, { "epoch": 0.02781257452488034, "grad_norm": 0.59375, "learning_rate": 0.0018959669113987937, "loss": 0.2466, "step": 15686 }, { "epoch": 0.027816120690190155, "grad_norm": 0.6796875, "learning_rate": 0.001895939131401763, "loss": 0.2505, "step": 15688 }, { "epoch": 0.02781966685549997, "grad_norm": 0.66015625, "learning_rate": 0.001895911347923757, "loss": 0.2469, "step": 15690 }, { "epoch": 0.027823213020809784, "grad_norm": 0.431640625, "learning_rate": 0.0018958835609648973, "loss": 0.2324, "step": 15692 }, { "epoch": 0.0278267591861196, "grad_norm": 0.44921875, "learning_rate": 0.0018958557705253051, "loss": 0.256, "step": 15694 }, { "epoch": 0.027830305351429416, "grad_norm": 1.625, "learning_rate": 0.0018958279766051018, "loss": 0.198, "step": 15696 }, { "epoch": 0.02783385151673923, "grad_norm": 0.369140625, "learning_rate": 0.0018958001792044096, "loss": 0.1609, "step": 15698 }, { "epoch": 0.027837397682049046, "grad_norm": 1.1484375, "learning_rate": 0.0018957723783233494, "loss": 0.3339, "step": 15700 }, { "epoch": 0.02784094384735886, "grad_norm": 0.26953125, "learning_rate": 0.0018957445739620432, "loss": 0.1605, "step": 15702 }, { "epoch": 0.027844490012668675, "grad_norm": 2.015625, "learning_rate": 0.0018957167661206125, "loss": 0.2186, "step": 15704 }, { "epoch": 0.02784803617797849, "grad_norm": 0.419921875, "learning_rate": 0.0018956889547991787, "loss": 0.2542, "step": 15706 }, { "epoch": 0.027851582343288307, "grad_norm": 0.80078125, "learning_rate": 0.0018956611399978637, "loss": 0.2654, "step": 15708 }, { "epoch": 0.027855128508598122, "grad_norm": 3.84375, "learning_rate": 0.001895633321716789, "loss": 0.3302, "step": 15710 }, { "epoch": 0.027858674673907936, "grad_norm": 1.25, "learning_rate": 0.0018956054999560762, "loss": 0.1935, "step": 15712 }, { "epoch": 0.02786222083921775, "grad_norm": 1.3046875, "learning_rate": 0.001895577674715847, "loss": 0.2726, "step": 15714 }, { "epoch": 0.027865767004527565, "grad_norm": 0.68359375, "learning_rate": 0.0018955498459962234, "loss": 0.1816, "step": 15716 }, { "epoch": 0.02786931316983738, "grad_norm": 0.55078125, "learning_rate": 0.001895522013797327, "loss": 0.1816, "step": 15718 }, { "epoch": 0.027872859335147198, "grad_norm": 0.248046875, "learning_rate": 0.0018954941781192795, "loss": 0.217, "step": 15720 }, { "epoch": 0.027876405500457013, "grad_norm": 0.416015625, "learning_rate": 0.0018954663389622022, "loss": 0.1722, "step": 15722 }, { "epoch": 0.027879951665766827, "grad_norm": 0.71875, "learning_rate": 0.0018954384963262175, "loss": 0.198, "step": 15724 }, { "epoch": 0.02788349783107664, "grad_norm": 2.3125, "learning_rate": 0.0018954106502114465, "loss": 0.3218, "step": 15726 }, { "epoch": 0.027887043996386456, "grad_norm": 1.8671875, "learning_rate": 0.0018953828006180117, "loss": 0.3218, "step": 15728 }, { "epoch": 0.027890590161696274, "grad_norm": 0.310546875, "learning_rate": 0.0018953549475460342, "loss": 0.1864, "step": 15730 }, { "epoch": 0.02789413632700609, "grad_norm": 0.6953125, "learning_rate": 0.0018953270909956364, "loss": 0.2432, "step": 15732 }, { "epoch": 0.027897682492315903, "grad_norm": 0.330078125, "learning_rate": 0.0018952992309669396, "loss": 0.1888, "step": 15734 }, { "epoch": 0.027901228657625718, "grad_norm": 0.76171875, "learning_rate": 0.001895271367460066, "loss": 0.2592, "step": 15736 }, { "epoch": 0.027904774822935532, "grad_norm": 0.298828125, "learning_rate": 0.001895243500475138, "loss": 0.1659, "step": 15738 }, { "epoch": 0.027908320988245347, "grad_norm": 0.484375, "learning_rate": 0.0018952156300122763, "loss": 0.2086, "step": 15740 }, { "epoch": 0.027911867153555165, "grad_norm": 0.6640625, "learning_rate": 0.0018951877560716033, "loss": 0.2274, "step": 15742 }, { "epoch": 0.02791541331886498, "grad_norm": 0.421875, "learning_rate": 0.001895159878653241, "loss": 0.1829, "step": 15744 }, { "epoch": 0.027918959484174794, "grad_norm": 0.72265625, "learning_rate": 0.0018951319977573114, "loss": 0.2438, "step": 15746 }, { "epoch": 0.02792250564948461, "grad_norm": 0.796875, "learning_rate": 0.0018951041133839365, "loss": 0.1785, "step": 15748 }, { "epoch": 0.027926051814794423, "grad_norm": 8.75, "learning_rate": 0.0018950762255332378, "loss": 0.2103, "step": 15750 }, { "epoch": 0.027929597980104238, "grad_norm": 0.50390625, "learning_rate": 0.0018950483342053373, "loss": 0.2072, "step": 15752 }, { "epoch": 0.027933144145414056, "grad_norm": 2.015625, "learning_rate": 0.0018950204394003573, "loss": 0.2162, "step": 15754 }, { "epoch": 0.02793669031072387, "grad_norm": 0.5859375, "learning_rate": 0.0018949925411184201, "loss": 0.2156, "step": 15756 }, { "epoch": 0.027940236476033685, "grad_norm": 0.828125, "learning_rate": 0.0018949646393596469, "loss": 0.1922, "step": 15758 }, { "epoch": 0.0279437826413435, "grad_norm": 0.7578125, "learning_rate": 0.0018949367341241606, "loss": 0.1897, "step": 15760 }, { "epoch": 0.027947328806653314, "grad_norm": 0.43359375, "learning_rate": 0.0018949088254120825, "loss": 0.2115, "step": 15762 }, { "epoch": 0.027950874971963132, "grad_norm": 0.84375, "learning_rate": 0.0018948809132235348, "loss": 0.1647, "step": 15764 }, { "epoch": 0.027954421137272947, "grad_norm": 1.6328125, "learning_rate": 0.00189485299755864, "loss": 0.3455, "step": 15766 }, { "epoch": 0.02795796730258276, "grad_norm": 1.0234375, "learning_rate": 0.0018948250784175198, "loss": 0.2627, "step": 15768 }, { "epoch": 0.027961513467892576, "grad_norm": 0.396484375, "learning_rate": 0.0018947971558002962, "loss": 0.2093, "step": 15770 }, { "epoch": 0.02796505963320239, "grad_norm": 0.26953125, "learning_rate": 0.0018947692297070916, "loss": 0.1956, "step": 15772 }, { "epoch": 0.027968605798512205, "grad_norm": 0.5234375, "learning_rate": 0.001894741300138028, "loss": 0.1643, "step": 15774 }, { "epoch": 0.027972151963822023, "grad_norm": 0.734375, "learning_rate": 0.0018947133670932277, "loss": 0.1987, "step": 15776 }, { "epoch": 0.027975698129131837, "grad_norm": 0.2021484375, "learning_rate": 0.001894685430572813, "loss": 0.1331, "step": 15778 }, { "epoch": 0.027979244294441652, "grad_norm": 0.6484375, "learning_rate": 0.0018946574905769052, "loss": 0.2511, "step": 15780 }, { "epoch": 0.027982790459751467, "grad_norm": 1.3984375, "learning_rate": 0.0018946295471056276, "loss": 0.2689, "step": 15782 }, { "epoch": 0.02798633662506128, "grad_norm": 0.77734375, "learning_rate": 0.0018946016001591018, "loss": 0.281, "step": 15784 }, { "epoch": 0.027989882790371096, "grad_norm": 2.171875, "learning_rate": 0.0018945736497374501, "loss": 0.3021, "step": 15786 }, { "epoch": 0.027993428955680914, "grad_norm": 0.26953125, "learning_rate": 0.0018945456958407952, "loss": 0.2186, "step": 15788 }, { "epoch": 0.027996975120990728, "grad_norm": 0.8828125, "learning_rate": 0.0018945177384692587, "loss": 0.2289, "step": 15790 }, { "epoch": 0.028000521286300543, "grad_norm": 1.0234375, "learning_rate": 0.001894489777622963, "loss": 0.2135, "step": 15792 }, { "epoch": 0.028004067451610357, "grad_norm": 1.5625, "learning_rate": 0.0018944618133020306, "loss": 0.4, "step": 15794 }, { "epoch": 0.028007613616920172, "grad_norm": 0.69921875, "learning_rate": 0.0018944338455065836, "loss": 0.1867, "step": 15796 }, { "epoch": 0.02801115978222999, "grad_norm": 1.1015625, "learning_rate": 0.0018944058742367445, "loss": 0.181, "step": 15798 }, { "epoch": 0.028014705947539804, "grad_norm": 0.89453125, "learning_rate": 0.0018943778994926356, "loss": 0.2815, "step": 15800 }, { "epoch": 0.02801825211284962, "grad_norm": 0.64453125, "learning_rate": 0.001894349921274379, "loss": 0.2253, "step": 15802 }, { "epoch": 0.028021798278159434, "grad_norm": 0.5625, "learning_rate": 0.0018943219395820977, "loss": 0.1856, "step": 15804 }, { "epoch": 0.028025344443469248, "grad_norm": 0.29296875, "learning_rate": 0.0018942939544159133, "loss": 0.1999, "step": 15806 }, { "epoch": 0.028028890608779063, "grad_norm": 0.6796875, "learning_rate": 0.0018942659657759484, "loss": 0.2594, "step": 15808 }, { "epoch": 0.02803243677408888, "grad_norm": 1.5859375, "learning_rate": 0.001894237973662326, "loss": 0.2131, "step": 15810 }, { "epoch": 0.028035982939398695, "grad_norm": 0.365234375, "learning_rate": 0.0018942099780751675, "loss": 0.2195, "step": 15812 }, { "epoch": 0.02803952910470851, "grad_norm": 1.5, "learning_rate": 0.0018941819790145962, "loss": 0.3423, "step": 15814 }, { "epoch": 0.028043075270018324, "grad_norm": 5.9375, "learning_rate": 0.0018941539764807344, "loss": 0.4074, "step": 15816 }, { "epoch": 0.02804662143532814, "grad_norm": 0.69140625, "learning_rate": 0.0018941259704737042, "loss": 0.1996, "step": 15818 }, { "epoch": 0.028050167600637953, "grad_norm": 0.83984375, "learning_rate": 0.001894097960993628, "loss": 0.2494, "step": 15820 }, { "epoch": 0.02805371376594777, "grad_norm": 0.44921875, "learning_rate": 0.001894069948040629, "loss": 0.2007, "step": 15822 }, { "epoch": 0.028057259931257586, "grad_norm": 1.0390625, "learning_rate": 0.0018940419316148289, "loss": 0.2276, "step": 15824 }, { "epoch": 0.0280608060965674, "grad_norm": 0.90234375, "learning_rate": 0.001894013911716351, "loss": 0.2376, "step": 15826 }, { "epoch": 0.028064352261877215, "grad_norm": 0.66015625, "learning_rate": 0.0018939858883453174, "loss": 0.1771, "step": 15828 }, { "epoch": 0.02806789842718703, "grad_norm": 7.84375, "learning_rate": 0.0018939578615018507, "loss": 0.2792, "step": 15830 }, { "epoch": 0.028071444592496848, "grad_norm": 0.5546875, "learning_rate": 0.0018939298311860732, "loss": 0.1768, "step": 15832 }, { "epoch": 0.028074990757806662, "grad_norm": 18.25, "learning_rate": 0.0018939017973981084, "loss": 0.2214, "step": 15834 }, { "epoch": 0.028078536923116477, "grad_norm": 2.109375, "learning_rate": 0.001893873760138078, "loss": 0.333, "step": 15836 }, { "epoch": 0.02808208308842629, "grad_norm": 0.58203125, "learning_rate": 0.0018938457194061047, "loss": 0.2659, "step": 15838 }, { "epoch": 0.028085629253736106, "grad_norm": 0.98046875, "learning_rate": 0.0018938176752023115, "loss": 0.1937, "step": 15840 }, { "epoch": 0.02808917541904592, "grad_norm": 0.67578125, "learning_rate": 0.001893789627526821, "loss": 0.2785, "step": 15842 }, { "epoch": 0.02809272158435574, "grad_norm": 0.3046875, "learning_rate": 0.0018937615763797557, "loss": 0.2623, "step": 15844 }, { "epoch": 0.028096267749665553, "grad_norm": 0.54296875, "learning_rate": 0.0018937335217612385, "loss": 0.2229, "step": 15846 }, { "epoch": 0.028099813914975368, "grad_norm": 0.56640625, "learning_rate": 0.0018937054636713915, "loss": 0.179, "step": 15848 }, { "epoch": 0.028103360080285182, "grad_norm": 1.359375, "learning_rate": 0.001893677402110338, "loss": 0.2584, "step": 15850 }, { "epoch": 0.028106906245594997, "grad_norm": 0.439453125, "learning_rate": 0.0018936493370782007, "loss": 0.2035, "step": 15852 }, { "epoch": 0.02811045241090481, "grad_norm": 0.6015625, "learning_rate": 0.001893621268575102, "loss": 0.2927, "step": 15854 }, { "epoch": 0.02811399857621463, "grad_norm": 0.291015625, "learning_rate": 0.001893593196601165, "loss": 0.1962, "step": 15856 }, { "epoch": 0.028117544741524444, "grad_norm": 0.640625, "learning_rate": 0.0018935651211565126, "loss": 0.2311, "step": 15858 }, { "epoch": 0.02812109090683426, "grad_norm": 0.59375, "learning_rate": 0.001893537042241267, "loss": 0.2132, "step": 15860 }, { "epoch": 0.028124637072144073, "grad_norm": 0.65625, "learning_rate": 0.0018935089598555513, "loss": 0.2103, "step": 15862 }, { "epoch": 0.028128183237453887, "grad_norm": 1.0078125, "learning_rate": 0.0018934808739994884, "loss": 0.1775, "step": 15864 }, { "epoch": 0.028131729402763705, "grad_norm": 0.6953125, "learning_rate": 0.001893452784673201, "loss": 0.3314, "step": 15866 }, { "epoch": 0.02813527556807352, "grad_norm": 0.31640625, "learning_rate": 0.0018934246918768122, "loss": 0.1797, "step": 15868 }, { "epoch": 0.028138821733383335, "grad_norm": 0.76953125, "learning_rate": 0.0018933965956104443, "loss": 0.2096, "step": 15870 }, { "epoch": 0.02814236789869315, "grad_norm": 0.625, "learning_rate": 0.001893368495874221, "loss": 0.1255, "step": 15872 }, { "epoch": 0.028145914064002964, "grad_norm": 0.6484375, "learning_rate": 0.0018933403926682647, "loss": 0.2218, "step": 15874 }, { "epoch": 0.028149460229312778, "grad_norm": 1.3203125, "learning_rate": 0.0018933122859926981, "loss": 0.2418, "step": 15876 }, { "epoch": 0.028153006394622596, "grad_norm": 0.5390625, "learning_rate": 0.0018932841758476445, "loss": 0.1882, "step": 15878 }, { "epoch": 0.02815655255993241, "grad_norm": 0.79296875, "learning_rate": 0.0018932560622332267, "loss": 0.3185, "step": 15880 }, { "epoch": 0.028160098725242225, "grad_norm": 0.90234375, "learning_rate": 0.0018932279451495675, "loss": 0.2732, "step": 15882 }, { "epoch": 0.02816364489055204, "grad_norm": 1.484375, "learning_rate": 0.0018931998245967903, "loss": 0.365, "step": 15884 }, { "epoch": 0.028167191055861854, "grad_norm": 0.6875, "learning_rate": 0.0018931717005750176, "loss": 0.1813, "step": 15886 }, { "epoch": 0.02817073722117167, "grad_norm": 0.55078125, "learning_rate": 0.0018931435730843726, "loss": 0.2858, "step": 15888 }, { "epoch": 0.028174283386481487, "grad_norm": 1.84375, "learning_rate": 0.0018931154421249787, "loss": 0.3423, "step": 15890 }, { "epoch": 0.0281778295517913, "grad_norm": 1.5, "learning_rate": 0.001893087307696958, "loss": 0.2913, "step": 15892 }, { "epoch": 0.028181375717101116, "grad_norm": 0.68359375, "learning_rate": 0.0018930591698004347, "loss": 0.2432, "step": 15894 }, { "epoch": 0.02818492188241093, "grad_norm": 1.6875, "learning_rate": 0.0018930310284355305, "loss": 0.2646, "step": 15896 }, { "epoch": 0.028188468047720745, "grad_norm": 0.7265625, "learning_rate": 0.0018930028836023698, "loss": 0.2917, "step": 15898 }, { "epoch": 0.028192014213030563, "grad_norm": 0.46875, "learning_rate": 0.0018929747353010752, "loss": 0.2206, "step": 15900 }, { "epoch": 0.028195560378340378, "grad_norm": 0.515625, "learning_rate": 0.0018929465835317695, "loss": 0.2153, "step": 15902 }, { "epoch": 0.028199106543650192, "grad_norm": 0.5546875, "learning_rate": 0.001892918428294576, "loss": 0.2367, "step": 15904 }, { "epoch": 0.028202652708960007, "grad_norm": 0.5234375, "learning_rate": 0.0018928902695896177, "loss": 0.1805, "step": 15906 }, { "epoch": 0.02820619887426982, "grad_norm": 0.93359375, "learning_rate": 0.0018928621074170184, "loss": 0.203, "step": 15908 }, { "epoch": 0.028209745039579636, "grad_norm": 1.21875, "learning_rate": 0.0018928339417769002, "loss": 0.2046, "step": 15910 }, { "epoch": 0.028213291204889454, "grad_norm": 0.78515625, "learning_rate": 0.0018928057726693872, "loss": 0.2524, "step": 15912 }, { "epoch": 0.02821683737019927, "grad_norm": 0.396484375, "learning_rate": 0.0018927776000946025, "loss": 0.1715, "step": 15914 }, { "epoch": 0.028220383535509083, "grad_norm": 1.609375, "learning_rate": 0.0018927494240526688, "loss": 0.2176, "step": 15916 }, { "epoch": 0.028223929700818898, "grad_norm": 0.7421875, "learning_rate": 0.0018927212445437097, "loss": 0.213, "step": 15918 }, { "epoch": 0.028227475866128712, "grad_norm": 0.50390625, "learning_rate": 0.0018926930615678482, "loss": 0.1996, "step": 15920 }, { "epoch": 0.028231022031438527, "grad_norm": 0.55859375, "learning_rate": 0.0018926648751252078, "loss": 0.2574, "step": 15922 }, { "epoch": 0.028234568196748345, "grad_norm": 0.27734375, "learning_rate": 0.0018926366852159118, "loss": 0.1654, "step": 15924 }, { "epoch": 0.02823811436205816, "grad_norm": 0.69140625, "learning_rate": 0.001892608491840083, "loss": 0.1936, "step": 15926 }, { "epoch": 0.028241660527367974, "grad_norm": 0.29296875, "learning_rate": 0.0018925802949978454, "loss": 0.2084, "step": 15928 }, { "epoch": 0.02824520669267779, "grad_norm": 1.109375, "learning_rate": 0.001892552094689322, "loss": 0.3007, "step": 15930 }, { "epoch": 0.028248752857987603, "grad_norm": 0.90234375, "learning_rate": 0.0018925238909146358, "loss": 0.2479, "step": 15932 }, { "epoch": 0.02825229902329742, "grad_norm": 0.51171875, "learning_rate": 0.0018924956836739106, "loss": 0.2443, "step": 15934 }, { "epoch": 0.028255845188607236, "grad_norm": 0.47265625, "learning_rate": 0.0018924674729672694, "loss": 0.1874, "step": 15936 }, { "epoch": 0.02825939135391705, "grad_norm": 0.62109375, "learning_rate": 0.0018924392587948366, "loss": 0.2762, "step": 15938 }, { "epoch": 0.028262937519226865, "grad_norm": 0.353515625, "learning_rate": 0.001892411041156734, "loss": 0.2239, "step": 15940 }, { "epoch": 0.02826648368453668, "grad_norm": 1.75, "learning_rate": 0.001892382820053086, "loss": 0.2542, "step": 15942 }, { "epoch": 0.028270029849846494, "grad_norm": 0.390625, "learning_rate": 0.0018923545954840156, "loss": 0.2308, "step": 15944 }, { "epoch": 0.028273576015156312, "grad_norm": 0.64453125, "learning_rate": 0.0018923263674496466, "loss": 0.3358, "step": 15946 }, { "epoch": 0.028277122180466126, "grad_norm": 0.703125, "learning_rate": 0.0018922981359501025, "loss": 0.2517, "step": 15948 }, { "epoch": 0.02828066834577594, "grad_norm": 0.435546875, "learning_rate": 0.0018922699009855065, "loss": 0.2051, "step": 15950 }, { "epoch": 0.028284214511085756, "grad_norm": 0.2392578125, "learning_rate": 0.0018922416625559819, "loss": 0.2164, "step": 15952 }, { "epoch": 0.02828776067639557, "grad_norm": 0.94921875, "learning_rate": 0.0018922134206616529, "loss": 0.1689, "step": 15954 }, { "epoch": 0.028291306841705385, "grad_norm": 0.3125, "learning_rate": 0.0018921851753026424, "loss": 0.1976, "step": 15956 }, { "epoch": 0.028294853007015203, "grad_norm": 3.046875, "learning_rate": 0.001892156926479074, "loss": 0.2118, "step": 15958 }, { "epoch": 0.028298399172325017, "grad_norm": 0.46484375, "learning_rate": 0.0018921286741910713, "loss": 0.2075, "step": 15960 }, { "epoch": 0.028301945337634832, "grad_norm": 0.51171875, "learning_rate": 0.001892100418438758, "loss": 0.2397, "step": 15962 }, { "epoch": 0.028305491502944646, "grad_norm": 0.5859375, "learning_rate": 0.0018920721592222574, "loss": 0.1644, "step": 15964 }, { "epoch": 0.02830903766825446, "grad_norm": 0.26171875, "learning_rate": 0.0018920438965416935, "loss": 0.2419, "step": 15966 }, { "epoch": 0.02831258383356428, "grad_norm": 0.5234375, "learning_rate": 0.0018920156303971897, "loss": 0.2436, "step": 15968 }, { "epoch": 0.028316129998874093, "grad_norm": 0.5703125, "learning_rate": 0.0018919873607888694, "loss": 0.2288, "step": 15970 }, { "epoch": 0.028319676164183908, "grad_norm": 2.015625, "learning_rate": 0.0018919590877168567, "loss": 0.1821, "step": 15972 }, { "epoch": 0.028323222329493723, "grad_norm": 0.408203125, "learning_rate": 0.0018919308111812745, "loss": 0.179, "step": 15974 }, { "epoch": 0.028326768494803537, "grad_norm": 0.3828125, "learning_rate": 0.0018919025311822475, "loss": 0.2558, "step": 15976 }, { "epoch": 0.02833031466011335, "grad_norm": 0.5546875, "learning_rate": 0.0018918742477198984, "loss": 0.1427, "step": 15978 }, { "epoch": 0.02833386082542317, "grad_norm": 0.51953125, "learning_rate": 0.0018918459607943515, "loss": 0.1755, "step": 15980 }, { "epoch": 0.028337406990732984, "grad_norm": 0.59375, "learning_rate": 0.0018918176704057301, "loss": 0.2999, "step": 15982 }, { "epoch": 0.0283409531560428, "grad_norm": 0.60546875, "learning_rate": 0.0018917893765541586, "loss": 0.2466, "step": 15984 }, { "epoch": 0.028344499321352613, "grad_norm": 0.2890625, "learning_rate": 0.00189176107923976, "loss": 0.3597, "step": 15986 }, { "epoch": 0.028348045486662428, "grad_norm": 0.6171875, "learning_rate": 0.0018917327784626584, "loss": 0.2457, "step": 15988 }, { "epoch": 0.028351591651972242, "grad_norm": 0.796875, "learning_rate": 0.0018917044742229772, "loss": 0.1995, "step": 15990 }, { "epoch": 0.02835513781728206, "grad_norm": 0.6484375, "learning_rate": 0.0018916761665208409, "loss": 0.1778, "step": 15992 }, { "epoch": 0.028358683982591875, "grad_norm": 0.5625, "learning_rate": 0.001891647855356373, "loss": 0.3216, "step": 15994 }, { "epoch": 0.02836223014790169, "grad_norm": 0.2021484375, "learning_rate": 0.0018916195407296967, "loss": 0.2082, "step": 15996 }, { "epoch": 0.028365776313211504, "grad_norm": 1.171875, "learning_rate": 0.001891591222640937, "loss": 0.2779, "step": 15998 }, { "epoch": 0.02836932247852132, "grad_norm": 0.474609375, "learning_rate": 0.0018915629010902166, "loss": 0.2353, "step": 16000 }, { "epoch": 0.028372868643831137, "grad_norm": 0.53125, "learning_rate": 0.00189153457607766, "loss": 0.2481, "step": 16002 }, { "epoch": 0.02837641480914095, "grad_norm": 1.046875, "learning_rate": 0.0018915062476033914, "loss": 0.2794, "step": 16004 }, { "epoch": 0.028379960974450766, "grad_norm": 0.322265625, "learning_rate": 0.0018914779156675335, "loss": 0.1641, "step": 16006 }, { "epoch": 0.02838350713976058, "grad_norm": 4.40625, "learning_rate": 0.0018914495802702113, "loss": 0.3629, "step": 16008 }, { "epoch": 0.028387053305070395, "grad_norm": 0.326171875, "learning_rate": 0.0018914212414115486, "loss": 0.2027, "step": 16010 }, { "epoch": 0.02839059947038021, "grad_norm": 0.546875, "learning_rate": 0.0018913928990916687, "loss": 0.2024, "step": 16012 }, { "epoch": 0.028394145635690028, "grad_norm": 1.015625, "learning_rate": 0.001891364553310696, "loss": 0.2727, "step": 16014 }, { "epoch": 0.028397691800999842, "grad_norm": 0.52734375, "learning_rate": 0.0018913362040687547, "loss": 0.1945, "step": 16016 }, { "epoch": 0.028401237966309657, "grad_norm": 1.078125, "learning_rate": 0.0018913078513659682, "loss": 0.3466, "step": 16018 }, { "epoch": 0.02840478413161947, "grad_norm": 0.90625, "learning_rate": 0.001891279495202461, "loss": 0.1855, "step": 16020 }, { "epoch": 0.028408330296929286, "grad_norm": 0.7265625, "learning_rate": 0.0018912511355783567, "loss": 0.1622, "step": 16022 }, { "epoch": 0.0284118764622391, "grad_norm": 1.1953125, "learning_rate": 0.0018912227724937798, "loss": 0.2448, "step": 16024 }, { "epoch": 0.02841542262754892, "grad_norm": 0.53515625, "learning_rate": 0.0018911944059488539, "loss": 0.2216, "step": 16026 }, { "epoch": 0.028418968792858733, "grad_norm": 0.69921875, "learning_rate": 0.0018911660359437034, "loss": 0.3516, "step": 16028 }, { "epoch": 0.028422514958168547, "grad_norm": 1.0625, "learning_rate": 0.0018911376624784525, "loss": 0.204, "step": 16030 }, { "epoch": 0.028426061123478362, "grad_norm": 0.515625, "learning_rate": 0.0018911092855532243, "loss": 0.2476, "step": 16032 }, { "epoch": 0.028429607288788177, "grad_norm": 0.609375, "learning_rate": 0.001891080905168144, "loss": 0.2303, "step": 16034 }, { "epoch": 0.028433153454097995, "grad_norm": 0.421875, "learning_rate": 0.0018910525213233355, "loss": 0.2817, "step": 16036 }, { "epoch": 0.02843669961940781, "grad_norm": 0.59765625, "learning_rate": 0.0018910241340189225, "loss": 0.1912, "step": 16038 }, { "epoch": 0.028440245784717624, "grad_norm": 0.8203125, "learning_rate": 0.0018909957432550297, "loss": 0.2838, "step": 16040 }, { "epoch": 0.028443791950027438, "grad_norm": 0.271484375, "learning_rate": 0.0018909673490317806, "loss": 0.1876, "step": 16042 }, { "epoch": 0.028447338115337253, "grad_norm": 0.8515625, "learning_rate": 0.0018909389513493, "loss": 0.1592, "step": 16044 }, { "epoch": 0.028450884280647067, "grad_norm": 0.421875, "learning_rate": 0.0018909105502077118, "loss": 0.1832, "step": 16046 }, { "epoch": 0.028454430445956885, "grad_norm": 5.53125, "learning_rate": 0.0018908821456071403, "loss": 0.2691, "step": 16048 }, { "epoch": 0.0284579766112667, "grad_norm": 0.5546875, "learning_rate": 0.0018908537375477098, "loss": 0.2313, "step": 16050 }, { "epoch": 0.028461522776576514, "grad_norm": 6.125, "learning_rate": 0.0018908253260295443, "loss": 0.5124, "step": 16052 }, { "epoch": 0.02846506894188633, "grad_norm": 1.046875, "learning_rate": 0.001890796911052768, "loss": 0.2686, "step": 16054 }, { "epoch": 0.028468615107196144, "grad_norm": 0.84375, "learning_rate": 0.0018907684926175057, "loss": 0.2473, "step": 16056 }, { "epoch": 0.028472161272505958, "grad_norm": 0.59765625, "learning_rate": 0.0018907400707238813, "loss": 0.2157, "step": 16058 }, { "epoch": 0.028475707437815776, "grad_norm": 1.1171875, "learning_rate": 0.001890711645372019, "loss": 0.3402, "step": 16060 }, { "epoch": 0.02847925360312559, "grad_norm": 0.30078125, "learning_rate": 0.001890683216562043, "loss": 0.309, "step": 16062 }, { "epoch": 0.028482799768435405, "grad_norm": 0.486328125, "learning_rate": 0.0018906547842940785, "loss": 0.1839, "step": 16064 }, { "epoch": 0.02848634593374522, "grad_norm": 0.80859375, "learning_rate": 0.001890626348568249, "loss": 0.2789, "step": 16066 }, { "epoch": 0.028489892099055034, "grad_norm": 0.72265625, "learning_rate": 0.001890597909384679, "loss": 0.186, "step": 16068 }, { "epoch": 0.028493438264364852, "grad_norm": 0.609375, "learning_rate": 0.0018905694667434932, "loss": 0.2678, "step": 16070 }, { "epoch": 0.028496984429674667, "grad_norm": 0.8359375, "learning_rate": 0.0018905410206448154, "loss": 0.2975, "step": 16072 }, { "epoch": 0.02850053059498448, "grad_norm": 1.65625, "learning_rate": 0.0018905125710887707, "loss": 0.2584, "step": 16074 }, { "epoch": 0.028504076760294296, "grad_norm": 0.5390625, "learning_rate": 0.001890484118075483, "loss": 0.2225, "step": 16076 }, { "epoch": 0.02850762292560411, "grad_norm": 1.84375, "learning_rate": 0.0018904556616050772, "loss": 0.4534, "step": 16078 }, { "epoch": 0.028511169090913925, "grad_norm": 1.25, "learning_rate": 0.001890427201677677, "loss": 0.2263, "step": 16080 }, { "epoch": 0.028514715256223743, "grad_norm": 0.373046875, "learning_rate": 0.001890398738293408, "loss": 0.1873, "step": 16082 }, { "epoch": 0.028518261421533558, "grad_norm": 0.84375, "learning_rate": 0.0018903702714523933, "loss": 0.1838, "step": 16084 }, { "epoch": 0.028521807586843372, "grad_norm": 4.15625, "learning_rate": 0.0018903418011547589, "loss": 0.3164, "step": 16086 }, { "epoch": 0.028525353752153187, "grad_norm": 0.640625, "learning_rate": 0.0018903133274006281, "loss": 0.251, "step": 16088 }, { "epoch": 0.028528899917463, "grad_norm": 0.3671875, "learning_rate": 0.0018902848501901257, "loss": 0.2652, "step": 16090 }, { "epoch": 0.028532446082772816, "grad_norm": 0.671875, "learning_rate": 0.0018902563695233768, "loss": 0.2756, "step": 16092 }, { "epoch": 0.028535992248082634, "grad_norm": 1.984375, "learning_rate": 0.0018902278854005057, "loss": 0.3013, "step": 16094 }, { "epoch": 0.02853953841339245, "grad_norm": 0.48828125, "learning_rate": 0.0018901993978216363, "loss": 0.2504, "step": 16096 }, { "epoch": 0.028543084578702263, "grad_norm": 1.5859375, "learning_rate": 0.001890170906786894, "loss": 0.2205, "step": 16098 }, { "epoch": 0.028546630744012078, "grad_norm": 0.7890625, "learning_rate": 0.0018901424122964032, "loss": 0.2133, "step": 16100 }, { "epoch": 0.028550176909321892, "grad_norm": 0.65625, "learning_rate": 0.0018901139143502885, "loss": 0.2287, "step": 16102 }, { "epoch": 0.02855372307463171, "grad_norm": 4.375, "learning_rate": 0.0018900854129486745, "loss": 0.2745, "step": 16104 }, { "epoch": 0.028557269239941525, "grad_norm": 0.5546875, "learning_rate": 0.0018900569080916858, "loss": 0.2275, "step": 16106 }, { "epoch": 0.02856081540525134, "grad_norm": 0.82421875, "learning_rate": 0.001890028399779447, "loss": 0.3168, "step": 16108 }, { "epoch": 0.028564361570561154, "grad_norm": 1.546875, "learning_rate": 0.001889999888012083, "loss": 0.2457, "step": 16110 }, { "epoch": 0.02856790773587097, "grad_norm": 0.326171875, "learning_rate": 0.001889971372789718, "loss": 0.188, "step": 16112 }, { "epoch": 0.028571453901180783, "grad_norm": 0.703125, "learning_rate": 0.0018899428541124777, "loss": 0.2551, "step": 16114 }, { "epoch": 0.0285750000664906, "grad_norm": 0.54296875, "learning_rate": 0.0018899143319804858, "loss": 0.1573, "step": 16116 }, { "epoch": 0.028578546231800415, "grad_norm": 0.5, "learning_rate": 0.0018898858063938677, "loss": 0.1726, "step": 16118 }, { "epoch": 0.02858209239711023, "grad_norm": 0.373046875, "learning_rate": 0.0018898572773527478, "loss": 0.1967, "step": 16120 }, { "epoch": 0.028585638562420045, "grad_norm": 0.48828125, "learning_rate": 0.0018898287448572508, "loss": 0.2627, "step": 16122 }, { "epoch": 0.02858918472772986, "grad_norm": 0.384765625, "learning_rate": 0.0018898002089075018, "loss": 0.1974, "step": 16124 }, { "epoch": 0.028592730893039674, "grad_norm": 0.55859375, "learning_rate": 0.0018897716695036257, "loss": 0.236, "step": 16126 }, { "epoch": 0.02859627705834949, "grad_norm": 1.53125, "learning_rate": 0.0018897431266457466, "loss": 0.2361, "step": 16128 }, { "epoch": 0.028599823223659306, "grad_norm": 0.78125, "learning_rate": 0.00188971458033399, "loss": 0.2677, "step": 16130 }, { "epoch": 0.02860336938896912, "grad_norm": 0.9921875, "learning_rate": 0.0018896860305684807, "loss": 0.2073, "step": 16132 }, { "epoch": 0.028606915554278935, "grad_norm": 3.109375, "learning_rate": 0.001889657477349343, "loss": 0.3756, "step": 16134 }, { "epoch": 0.02861046171958875, "grad_norm": 0.40234375, "learning_rate": 0.0018896289206767028, "loss": 0.2062, "step": 16136 }, { "epoch": 0.028614007884898568, "grad_norm": 1.703125, "learning_rate": 0.0018896003605506839, "loss": 0.299, "step": 16138 }, { "epoch": 0.028617554050208382, "grad_norm": 0.41015625, "learning_rate": 0.0018895717969714119, "loss": 0.2244, "step": 16140 }, { "epoch": 0.028621100215518197, "grad_norm": 1.2421875, "learning_rate": 0.0018895432299390112, "loss": 0.2021, "step": 16142 }, { "epoch": 0.02862464638082801, "grad_norm": 0.5703125, "learning_rate": 0.0018895146594536075, "loss": 0.2252, "step": 16144 }, { "epoch": 0.028628192546137826, "grad_norm": 0.75390625, "learning_rate": 0.0018894860855153252, "loss": 0.2229, "step": 16146 }, { "epoch": 0.02863173871144764, "grad_norm": 15.9375, "learning_rate": 0.001889457508124289, "loss": 0.5262, "step": 16148 }, { "epoch": 0.02863528487675746, "grad_norm": 0.58203125, "learning_rate": 0.0018894289272806245, "loss": 0.1838, "step": 16150 }, { "epoch": 0.028638831042067273, "grad_norm": 0.98046875, "learning_rate": 0.0018894003429844562, "loss": 0.3266, "step": 16152 }, { "epoch": 0.028642377207377088, "grad_norm": 8.1875, "learning_rate": 0.0018893717552359096, "loss": 0.2512, "step": 16154 }, { "epoch": 0.028645923372686902, "grad_norm": 0.7578125, "learning_rate": 0.0018893431640351092, "loss": 0.2521, "step": 16156 }, { "epoch": 0.028649469537996717, "grad_norm": 0.9609375, "learning_rate": 0.0018893145693821805, "loss": 0.2877, "step": 16158 }, { "epoch": 0.02865301570330653, "grad_norm": 1.1484375, "learning_rate": 0.0018892859712772484, "loss": 0.2224, "step": 16160 }, { "epoch": 0.02865656186861635, "grad_norm": 2.34375, "learning_rate": 0.0018892573697204377, "loss": 0.222, "step": 16162 }, { "epoch": 0.028660108033926164, "grad_norm": 0.447265625, "learning_rate": 0.001889228764711874, "loss": 0.2235, "step": 16164 }, { "epoch": 0.02866365419923598, "grad_norm": 0.484375, "learning_rate": 0.0018892001562516821, "loss": 0.2232, "step": 16166 }, { "epoch": 0.028667200364545793, "grad_norm": 0.39453125, "learning_rate": 0.0018891715443399868, "loss": 0.2098, "step": 16168 }, { "epoch": 0.028670746529855608, "grad_norm": 0.400390625, "learning_rate": 0.0018891429289769137, "loss": 0.1835, "step": 16170 }, { "epoch": 0.028674292695165426, "grad_norm": 2.671875, "learning_rate": 0.0018891143101625878, "loss": 0.3569, "step": 16172 }, { "epoch": 0.02867783886047524, "grad_norm": 1.2421875, "learning_rate": 0.0018890856878971346, "loss": 0.2325, "step": 16174 }, { "epoch": 0.028681385025785055, "grad_norm": 0.796875, "learning_rate": 0.0018890570621806786, "loss": 0.2588, "step": 16176 }, { "epoch": 0.02868493119109487, "grad_norm": 0.447265625, "learning_rate": 0.0018890284330133455, "loss": 0.2173, "step": 16178 }, { "epoch": 0.028688477356404684, "grad_norm": 2.96875, "learning_rate": 0.0018889998003952604, "loss": 0.1701, "step": 16180 }, { "epoch": 0.0286920235217145, "grad_norm": 0.828125, "learning_rate": 0.0018889711643265484, "loss": 0.1776, "step": 16182 }, { "epoch": 0.028695569687024317, "grad_norm": 0.80078125, "learning_rate": 0.001888942524807335, "loss": 0.1984, "step": 16184 }, { "epoch": 0.02869911585233413, "grad_norm": 0.83203125, "learning_rate": 0.0018889138818377448, "loss": 0.2511, "step": 16186 }, { "epoch": 0.028702662017643946, "grad_norm": 0.90234375, "learning_rate": 0.001888885235417904, "loss": 0.2487, "step": 16188 }, { "epoch": 0.02870620818295376, "grad_norm": 0.2216796875, "learning_rate": 0.0018888565855479373, "loss": 0.2029, "step": 16190 }, { "epoch": 0.028709754348263575, "grad_norm": 1.5859375, "learning_rate": 0.00188882793222797, "loss": 0.2647, "step": 16192 }, { "epoch": 0.02871330051357339, "grad_norm": 1.1484375, "learning_rate": 0.0018887992754581275, "loss": 0.2147, "step": 16194 }, { "epoch": 0.028716846678883207, "grad_norm": 1.2890625, "learning_rate": 0.0018887706152385356, "loss": 0.2279, "step": 16196 }, { "epoch": 0.028720392844193022, "grad_norm": 0.5234375, "learning_rate": 0.0018887419515693188, "loss": 0.2022, "step": 16198 }, { "epoch": 0.028723939009502836, "grad_norm": 0.8203125, "learning_rate": 0.001888713284450603, "loss": 0.2689, "step": 16200 }, { "epoch": 0.02872748517481265, "grad_norm": 0.94921875, "learning_rate": 0.0018886846138825132, "loss": 0.22, "step": 16202 }, { "epoch": 0.028731031340122466, "grad_norm": 0.75390625, "learning_rate": 0.0018886559398651752, "loss": 0.3533, "step": 16204 }, { "epoch": 0.028734577505432284, "grad_norm": 0.298828125, "learning_rate": 0.0018886272623987142, "loss": 0.1996, "step": 16206 }, { "epoch": 0.028738123670742098, "grad_norm": 1.7734375, "learning_rate": 0.0018885985814832559, "loss": 0.2396, "step": 16208 }, { "epoch": 0.028741669836051913, "grad_norm": 0.6640625, "learning_rate": 0.0018885698971189248, "loss": 0.2406, "step": 16210 }, { "epoch": 0.028745216001361727, "grad_norm": 1.5703125, "learning_rate": 0.0018885412093058477, "loss": 0.2215, "step": 16212 }, { "epoch": 0.028748762166671542, "grad_norm": 0.57421875, "learning_rate": 0.0018885125180441492, "loss": 0.3141, "step": 16214 }, { "epoch": 0.028752308331981356, "grad_norm": 1.046875, "learning_rate": 0.001888483823333955, "loss": 0.3075, "step": 16216 }, { "epoch": 0.028755854497291174, "grad_norm": 0.703125, "learning_rate": 0.0018884551251753903, "loss": 0.3037, "step": 16218 }, { "epoch": 0.02875940066260099, "grad_norm": 0.6953125, "learning_rate": 0.0018884264235685812, "loss": 0.1991, "step": 16220 }, { "epoch": 0.028762946827910803, "grad_norm": 0.2109375, "learning_rate": 0.0018883977185136526, "loss": 0.1997, "step": 16222 }, { "epoch": 0.028766492993220618, "grad_norm": 0.498046875, "learning_rate": 0.0018883690100107307, "loss": 0.1624, "step": 16224 }, { "epoch": 0.028770039158530433, "grad_norm": 0.78125, "learning_rate": 0.0018883402980599403, "loss": 0.2061, "step": 16226 }, { "epoch": 0.028773585323840247, "grad_norm": 0.375, "learning_rate": 0.0018883115826614078, "loss": 0.2027, "step": 16228 }, { "epoch": 0.028777131489150065, "grad_norm": 0.84375, "learning_rate": 0.0018882828638152578, "loss": 0.2728, "step": 16230 }, { "epoch": 0.02878067765445988, "grad_norm": 1.2421875, "learning_rate": 0.0018882541415216167, "loss": 0.2691, "step": 16232 }, { "epoch": 0.028784223819769694, "grad_norm": 0.6640625, "learning_rate": 0.0018882254157806101, "loss": 0.2265, "step": 16234 }, { "epoch": 0.02878776998507951, "grad_norm": 0.84375, "learning_rate": 0.0018881966865923632, "loss": 0.2075, "step": 16236 }, { "epoch": 0.028791316150389323, "grad_norm": 0.8125, "learning_rate": 0.0018881679539570018, "loss": 0.2381, "step": 16238 }, { "epoch": 0.02879486231569914, "grad_norm": 0.427734375, "learning_rate": 0.0018881392178746513, "loss": 0.1985, "step": 16240 }, { "epoch": 0.028798408481008956, "grad_norm": 0.671875, "learning_rate": 0.001888110478345438, "loss": 0.2827, "step": 16242 }, { "epoch": 0.02880195464631877, "grad_norm": 0.60546875, "learning_rate": 0.0018880817353694873, "loss": 0.2171, "step": 16244 }, { "epoch": 0.028805500811628585, "grad_norm": 2.875, "learning_rate": 0.0018880529889469249, "loss": 0.2308, "step": 16246 }, { "epoch": 0.0288090469769384, "grad_norm": 0.9921875, "learning_rate": 0.001888024239077876, "loss": 0.365, "step": 16248 }, { "epoch": 0.028812593142248214, "grad_norm": 1.953125, "learning_rate": 0.0018879954857624675, "loss": 0.3029, "step": 16250 }, { "epoch": 0.028816139307558032, "grad_norm": 0.609375, "learning_rate": 0.001887966729000824, "loss": 0.1876, "step": 16252 }, { "epoch": 0.028819685472867847, "grad_norm": 0.82421875, "learning_rate": 0.0018879379687930717, "loss": 0.1694, "step": 16254 }, { "epoch": 0.02882323163817766, "grad_norm": 0.546875, "learning_rate": 0.0018879092051393366, "loss": 0.185, "step": 16256 }, { "epoch": 0.028826777803487476, "grad_norm": 0.314453125, "learning_rate": 0.0018878804380397444, "loss": 0.2309, "step": 16258 }, { "epoch": 0.02883032396879729, "grad_norm": 1.1328125, "learning_rate": 0.001887851667494421, "loss": 0.1924, "step": 16260 }, { "epoch": 0.028833870134107105, "grad_norm": 0.73046875, "learning_rate": 0.0018878228935034918, "loss": 0.2033, "step": 16262 }, { "epoch": 0.028837416299416923, "grad_norm": 0.734375, "learning_rate": 0.0018877941160670825, "loss": 0.2618, "step": 16264 }, { "epoch": 0.028840962464726737, "grad_norm": 0.78125, "learning_rate": 0.0018877653351853196, "loss": 0.314, "step": 16266 }, { "epoch": 0.028844508630036552, "grad_norm": 1.0859375, "learning_rate": 0.0018877365508583289, "loss": 0.2583, "step": 16268 }, { "epoch": 0.028848054795346367, "grad_norm": 0.640625, "learning_rate": 0.001887707763086236, "loss": 0.2073, "step": 16270 }, { "epoch": 0.02885160096065618, "grad_norm": 3.890625, "learning_rate": 0.0018876789718691672, "loss": 0.3067, "step": 16272 }, { "epoch": 0.028855147125966, "grad_norm": 1.46875, "learning_rate": 0.0018876501772072474, "loss": 0.2392, "step": 16274 }, { "epoch": 0.028858693291275814, "grad_norm": 0.34765625, "learning_rate": 0.001887621379100604, "loss": 0.2292, "step": 16276 }, { "epoch": 0.02886223945658563, "grad_norm": 0.55078125, "learning_rate": 0.0018875925775493619, "loss": 0.3087, "step": 16278 }, { "epoch": 0.028865785621895443, "grad_norm": 0.57421875, "learning_rate": 0.0018875637725536472, "loss": 0.2272, "step": 16280 }, { "epoch": 0.028869331787205257, "grad_norm": 0.53515625, "learning_rate": 0.001887534964113586, "loss": 0.5092, "step": 16282 }, { "epoch": 0.028872877952515072, "grad_norm": 0.345703125, "learning_rate": 0.0018875061522293044, "loss": 0.2413, "step": 16284 }, { "epoch": 0.02887642411782489, "grad_norm": 1.1640625, "learning_rate": 0.0018874773369009286, "loss": 0.3652, "step": 16286 }, { "epoch": 0.028879970283134705, "grad_norm": 1.15625, "learning_rate": 0.001887448518128584, "loss": 0.2412, "step": 16288 }, { "epoch": 0.02888351644844452, "grad_norm": 0.76953125, "learning_rate": 0.0018874196959123974, "loss": 0.2275, "step": 16290 }, { "epoch": 0.028887062613754334, "grad_norm": 1.1328125, "learning_rate": 0.001887390870252494, "loss": 0.3363, "step": 16292 }, { "epoch": 0.028890608779064148, "grad_norm": 0.28515625, "learning_rate": 0.0018873620411490005, "loss": 0.1917, "step": 16294 }, { "epoch": 0.028894154944373963, "grad_norm": 0.96875, "learning_rate": 0.0018873332086020425, "loss": 0.2718, "step": 16296 }, { "epoch": 0.02889770110968378, "grad_norm": 0.9609375, "learning_rate": 0.0018873043726117466, "loss": 0.2529, "step": 16298 }, { "epoch": 0.028901247274993595, "grad_norm": 0.263671875, "learning_rate": 0.0018872755331782389, "loss": 0.3467, "step": 16300 }, { "epoch": 0.02890479344030341, "grad_norm": 0.2060546875, "learning_rate": 0.0018872466903016452, "loss": 0.2387, "step": 16302 }, { "epoch": 0.028908339605613224, "grad_norm": 0.44921875, "learning_rate": 0.0018872178439820915, "loss": 0.288, "step": 16304 }, { "epoch": 0.02891188577092304, "grad_norm": 0.486328125, "learning_rate": 0.0018871889942197045, "loss": 0.2158, "step": 16306 }, { "epoch": 0.028915431936232857, "grad_norm": 1.1953125, "learning_rate": 0.0018871601410146103, "loss": 0.1974, "step": 16308 }, { "epoch": 0.02891897810154267, "grad_norm": 1.234375, "learning_rate": 0.0018871312843669343, "loss": 0.2197, "step": 16310 }, { "epoch": 0.028922524266852486, "grad_norm": 1.3671875, "learning_rate": 0.001887102424276804, "loss": 0.2766, "step": 16312 }, { "epoch": 0.0289260704321623, "grad_norm": 0.361328125, "learning_rate": 0.0018870735607443441, "loss": 0.2176, "step": 16314 }, { "epoch": 0.028929616597472115, "grad_norm": 0.7734375, "learning_rate": 0.0018870446937696822, "loss": 0.2404, "step": 16316 }, { "epoch": 0.02893316276278193, "grad_norm": 0.47265625, "learning_rate": 0.001887015823352944, "loss": 0.2791, "step": 16318 }, { "epoch": 0.028936708928091748, "grad_norm": 1.0703125, "learning_rate": 0.0018869869494942556, "loss": 0.2242, "step": 16320 }, { "epoch": 0.028940255093401562, "grad_norm": 0.9375, "learning_rate": 0.0018869580721937432, "loss": 0.2457, "step": 16322 }, { "epoch": 0.028943801258711377, "grad_norm": 2.328125, "learning_rate": 0.0018869291914515333, "loss": 0.5809, "step": 16324 }, { "epoch": 0.02894734742402119, "grad_norm": 0.6171875, "learning_rate": 0.0018869003072677526, "loss": 0.2402, "step": 16326 }, { "epoch": 0.028950893589331006, "grad_norm": 1.9609375, "learning_rate": 0.0018868714196425267, "loss": 0.3535, "step": 16328 }, { "epoch": 0.02895443975464082, "grad_norm": 0.50390625, "learning_rate": 0.0018868425285759824, "loss": 0.1926, "step": 16330 }, { "epoch": 0.02895798591995064, "grad_norm": 0.50390625, "learning_rate": 0.0018868136340682458, "loss": 0.1949, "step": 16332 }, { "epoch": 0.028961532085260453, "grad_norm": 0.93359375, "learning_rate": 0.0018867847361194437, "loss": 0.265, "step": 16334 }, { "epoch": 0.028965078250570268, "grad_norm": 0.4609375, "learning_rate": 0.001886755834729702, "loss": 0.1774, "step": 16336 }, { "epoch": 0.028968624415880082, "grad_norm": 8.5625, "learning_rate": 0.0018867269298991475, "loss": 0.4013, "step": 16338 }, { "epoch": 0.028972170581189897, "grad_norm": 1.03125, "learning_rate": 0.001886698021627906, "loss": 0.2338, "step": 16340 }, { "epoch": 0.028975716746499715, "grad_norm": 2.515625, "learning_rate": 0.0018866691099161045, "loss": 0.2397, "step": 16342 }, { "epoch": 0.02897926291180953, "grad_norm": 0.392578125, "learning_rate": 0.0018866401947638693, "loss": 0.2269, "step": 16344 }, { "epoch": 0.028982809077119344, "grad_norm": 1.4140625, "learning_rate": 0.001886611276171327, "loss": 0.3197, "step": 16346 }, { "epoch": 0.02898635524242916, "grad_norm": 0.6875, "learning_rate": 0.0018865823541386036, "loss": 0.1638, "step": 16348 }, { "epoch": 0.028989901407738973, "grad_norm": 0.419921875, "learning_rate": 0.0018865534286658262, "loss": 0.213, "step": 16350 }, { "epoch": 0.028993447573048788, "grad_norm": 1.2265625, "learning_rate": 0.0018865244997531205, "loss": 0.2, "step": 16352 }, { "epoch": 0.028996993738358606, "grad_norm": 0.57421875, "learning_rate": 0.0018864955674006139, "loss": 0.2437, "step": 16354 }, { "epoch": 0.02900053990366842, "grad_norm": 0.369140625, "learning_rate": 0.0018864666316084325, "loss": 0.205, "step": 16356 }, { "epoch": 0.029004086068978235, "grad_norm": 0.63671875, "learning_rate": 0.0018864376923767026, "loss": 0.2274, "step": 16358 }, { "epoch": 0.02900763223428805, "grad_norm": 3.890625, "learning_rate": 0.0018864087497055513, "loss": 0.3339, "step": 16360 }, { "epoch": 0.029011178399597864, "grad_norm": 0.53125, "learning_rate": 0.0018863798035951053, "loss": 0.2139, "step": 16362 }, { "epoch": 0.02901472456490768, "grad_norm": 1.8828125, "learning_rate": 0.00188635085404549, "loss": 0.3722, "step": 16364 }, { "epoch": 0.029018270730217496, "grad_norm": 0.515625, "learning_rate": 0.0018863219010568336, "loss": 0.1834, "step": 16366 }, { "epoch": 0.02902181689552731, "grad_norm": 0.8125, "learning_rate": 0.0018862929446292614, "loss": 0.205, "step": 16368 }, { "epoch": 0.029025363060837125, "grad_norm": 4.71875, "learning_rate": 0.0018862639847629008, "loss": 0.3994, "step": 16370 }, { "epoch": 0.02902890922614694, "grad_norm": 0.443359375, "learning_rate": 0.0018862350214578782, "loss": 0.2011, "step": 16372 }, { "epoch": 0.029032455391456755, "grad_norm": 1.0546875, "learning_rate": 0.0018862060547143204, "loss": 0.3336, "step": 16374 }, { "epoch": 0.029036001556766573, "grad_norm": 0.416015625, "learning_rate": 0.001886177084532354, "loss": 0.1906, "step": 16376 }, { "epoch": 0.029039547722076387, "grad_norm": 1.765625, "learning_rate": 0.0018861481109121057, "loss": 0.3205, "step": 16378 }, { "epoch": 0.0290430938873862, "grad_norm": 4.09375, "learning_rate": 0.0018861191338537021, "loss": 0.1958, "step": 16380 }, { "epoch": 0.029046640052696016, "grad_norm": 6.375, "learning_rate": 0.00188609015335727, "loss": 0.4469, "step": 16382 }, { "epoch": 0.02905018621800583, "grad_norm": 0.59765625, "learning_rate": 0.0018860611694229365, "loss": 0.271, "step": 16384 }, { "epoch": 0.029053732383315645, "grad_norm": 0.396484375, "learning_rate": 0.0018860321820508277, "loss": 0.2677, "step": 16386 }, { "epoch": 0.029057278548625463, "grad_norm": 0.921875, "learning_rate": 0.001886003191241071, "loss": 0.2044, "step": 16388 }, { "epoch": 0.029060824713935278, "grad_norm": 0.36328125, "learning_rate": 0.0018859741969937927, "loss": 0.1816, "step": 16390 }, { "epoch": 0.029064370879245092, "grad_norm": 1.234375, "learning_rate": 0.00188594519930912, "loss": 0.2402, "step": 16392 }, { "epoch": 0.029067917044554907, "grad_norm": 0.55078125, "learning_rate": 0.0018859161981871792, "loss": 0.1972, "step": 16394 }, { "epoch": 0.02907146320986472, "grad_norm": 0.2431640625, "learning_rate": 0.0018858871936280979, "loss": 0.1519, "step": 16396 }, { "epoch": 0.029075009375174536, "grad_norm": 0.5078125, "learning_rate": 0.0018858581856320022, "loss": 0.167, "step": 16398 }, { "epoch": 0.029078555540484354, "grad_norm": 3.125, "learning_rate": 0.0018858291741990192, "loss": 0.322, "step": 16400 }, { "epoch": 0.02908210170579417, "grad_norm": 0.416015625, "learning_rate": 0.001885800159329276, "loss": 0.1824, "step": 16402 }, { "epoch": 0.029085647871103983, "grad_norm": 0.34375, "learning_rate": 0.0018857711410228998, "loss": 0.1885, "step": 16404 }, { "epoch": 0.029089194036413798, "grad_norm": 0.310546875, "learning_rate": 0.0018857421192800164, "loss": 0.1803, "step": 16406 }, { "epoch": 0.029092740201723612, "grad_norm": 0.80859375, "learning_rate": 0.0018857130941007535, "loss": 0.25, "step": 16408 }, { "epoch": 0.02909628636703343, "grad_norm": 1.390625, "learning_rate": 0.001885684065485238, "loss": 0.2244, "step": 16410 }, { "epoch": 0.029099832532343245, "grad_norm": 0.62109375, "learning_rate": 0.0018856550334335972, "loss": 0.2335, "step": 16412 }, { "epoch": 0.02910337869765306, "grad_norm": 4.96875, "learning_rate": 0.001885625997945957, "loss": 0.4523, "step": 16414 }, { "epoch": 0.029106924862962874, "grad_norm": 0.953125, "learning_rate": 0.001885596959022445, "loss": 0.2302, "step": 16416 }, { "epoch": 0.02911047102827269, "grad_norm": 0.79296875, "learning_rate": 0.0018855679166631888, "loss": 0.2314, "step": 16418 }, { "epoch": 0.029114017193582503, "grad_norm": 0.578125, "learning_rate": 0.0018855388708683145, "loss": 0.2205, "step": 16420 }, { "epoch": 0.02911756335889232, "grad_norm": 0.69921875, "learning_rate": 0.0018855098216379494, "loss": 0.1928, "step": 16422 }, { "epoch": 0.029121109524202136, "grad_norm": 0.369140625, "learning_rate": 0.001885480768972221, "loss": 0.1918, "step": 16424 }, { "epoch": 0.02912465568951195, "grad_norm": 0.6328125, "learning_rate": 0.0018854517128712555, "loss": 0.1821, "step": 16426 }, { "epoch": 0.029128201854821765, "grad_norm": 0.474609375, "learning_rate": 0.0018854226533351808, "loss": 0.2326, "step": 16428 }, { "epoch": 0.02913174802013158, "grad_norm": 0.5859375, "learning_rate": 0.0018853935903641234, "loss": 0.2422, "step": 16430 }, { "epoch": 0.029135294185441394, "grad_norm": 0.396484375, "learning_rate": 0.0018853645239582109, "loss": 0.2677, "step": 16432 }, { "epoch": 0.029138840350751212, "grad_norm": 0.400390625, "learning_rate": 0.0018853354541175703, "loss": 0.2216, "step": 16434 }, { "epoch": 0.029142386516061027, "grad_norm": 0.31640625, "learning_rate": 0.0018853063808423282, "loss": 0.1685, "step": 16436 }, { "epoch": 0.02914593268137084, "grad_norm": 0.515625, "learning_rate": 0.0018852773041326122, "loss": 0.2739, "step": 16438 }, { "epoch": 0.029149478846680656, "grad_norm": 0.546875, "learning_rate": 0.0018852482239885492, "loss": 0.2305, "step": 16440 }, { "epoch": 0.02915302501199047, "grad_norm": 0.431640625, "learning_rate": 0.0018852191404102672, "loss": 0.2419, "step": 16442 }, { "epoch": 0.029156571177300288, "grad_norm": 1.09375, "learning_rate": 0.0018851900533978926, "loss": 0.2139, "step": 16444 }, { "epoch": 0.029160117342610103, "grad_norm": 0.71875, "learning_rate": 0.0018851609629515526, "loss": 0.2155, "step": 16446 }, { "epoch": 0.029163663507919917, "grad_norm": 1.21875, "learning_rate": 0.0018851318690713745, "loss": 0.1672, "step": 16448 }, { "epoch": 0.029167209673229732, "grad_norm": 0.384765625, "learning_rate": 0.001885102771757486, "loss": 0.2126, "step": 16450 }, { "epoch": 0.029170755838539546, "grad_norm": 1.1796875, "learning_rate": 0.001885073671010014, "loss": 0.2298, "step": 16452 }, { "epoch": 0.02917430200384936, "grad_norm": 0.625, "learning_rate": 0.0018850445668290856, "loss": 0.2557, "step": 16454 }, { "epoch": 0.02917784816915918, "grad_norm": 0.84765625, "learning_rate": 0.0018850154592148284, "loss": 0.4395, "step": 16456 }, { "epoch": 0.029181394334468994, "grad_norm": 1.2734375, "learning_rate": 0.0018849863481673697, "loss": 0.2591, "step": 16458 }, { "epoch": 0.029184940499778808, "grad_norm": 0.53125, "learning_rate": 0.0018849572336868364, "loss": 0.1886, "step": 16460 }, { "epoch": 0.029188486665088623, "grad_norm": 0.6015625, "learning_rate": 0.001884928115773356, "loss": 0.1877, "step": 16462 }, { "epoch": 0.029192032830398437, "grad_norm": 0.65234375, "learning_rate": 0.0018848989944270564, "loss": 0.1752, "step": 16464 }, { "epoch": 0.029195578995708252, "grad_norm": 1.65625, "learning_rate": 0.0018848698696480645, "loss": 0.203, "step": 16466 }, { "epoch": 0.02919912516101807, "grad_norm": 0.80859375, "learning_rate": 0.0018848407414365075, "loss": 0.1898, "step": 16468 }, { "epoch": 0.029202671326327884, "grad_norm": 0.54296875, "learning_rate": 0.001884811609792513, "loss": 0.2323, "step": 16470 }, { "epoch": 0.0292062174916377, "grad_norm": 0.2314453125, "learning_rate": 0.0018847824747162082, "loss": 0.2808, "step": 16472 }, { "epoch": 0.029209763656947513, "grad_norm": 0.5234375, "learning_rate": 0.001884753336207721, "loss": 0.1978, "step": 16474 }, { "epoch": 0.029213309822257328, "grad_norm": 0.4140625, "learning_rate": 0.0018847241942671785, "loss": 0.2062, "step": 16476 }, { "epoch": 0.029216855987567146, "grad_norm": 0.6015625, "learning_rate": 0.001884695048894708, "loss": 0.2442, "step": 16478 }, { "epoch": 0.02922040215287696, "grad_norm": 1.8359375, "learning_rate": 0.001884665900090437, "loss": 0.3183, "step": 16480 }, { "epoch": 0.029223948318186775, "grad_norm": 4.625, "learning_rate": 0.0018846367478544937, "loss": 0.2864, "step": 16482 }, { "epoch": 0.02922749448349659, "grad_norm": 4.375, "learning_rate": 0.001884607592187005, "loss": 0.2509, "step": 16484 }, { "epoch": 0.029231040648806404, "grad_norm": 1.8359375, "learning_rate": 0.0018845784330880982, "loss": 0.3211, "step": 16486 }, { "epoch": 0.02923458681411622, "grad_norm": 0.44140625, "learning_rate": 0.001884549270557901, "loss": 0.271, "step": 16488 }, { "epoch": 0.029238132979426037, "grad_norm": 0.306640625, "learning_rate": 0.0018845201045965413, "loss": 0.2279, "step": 16490 }, { "epoch": 0.02924167914473585, "grad_norm": 0.83203125, "learning_rate": 0.0018844909352041458, "loss": 0.2755, "step": 16492 }, { "epoch": 0.029245225310045666, "grad_norm": 0.58203125, "learning_rate": 0.0018844617623808432, "loss": 0.23, "step": 16494 }, { "epoch": 0.02924877147535548, "grad_norm": 0.67578125, "learning_rate": 0.00188443258612676, "loss": 0.2595, "step": 16496 }, { "epoch": 0.029252317640665295, "grad_norm": 0.58203125, "learning_rate": 0.001884403406442025, "loss": 0.2312, "step": 16498 }, { "epoch": 0.02925586380597511, "grad_norm": 0.30078125, "learning_rate": 0.001884374223326765, "loss": 0.1869, "step": 16500 }, { "epoch": 0.029259409971284928, "grad_norm": 1.8203125, "learning_rate": 0.0018843450367811072, "loss": 0.2256, "step": 16502 }, { "epoch": 0.029262956136594742, "grad_norm": 0.578125, "learning_rate": 0.0018843158468051804, "loss": 0.2081, "step": 16504 }, { "epoch": 0.029266502301904557, "grad_norm": 2.5625, "learning_rate": 0.0018842866533991115, "loss": 0.4354, "step": 16506 }, { "epoch": 0.02927004846721437, "grad_norm": 0.427734375, "learning_rate": 0.0018842574565630281, "loss": 0.2215, "step": 16508 }, { "epoch": 0.029273594632524186, "grad_norm": 1.3125, "learning_rate": 0.0018842282562970584, "loss": 0.3376, "step": 16510 }, { "epoch": 0.029277140797834004, "grad_norm": 1.3203125, "learning_rate": 0.0018841990526013298, "loss": 0.2389, "step": 16512 }, { "epoch": 0.02928068696314382, "grad_norm": 0.625, "learning_rate": 0.0018841698454759698, "loss": 0.2105, "step": 16514 }, { "epoch": 0.029284233128453633, "grad_norm": 0.33203125, "learning_rate": 0.001884140634921107, "loss": 0.1605, "step": 16516 }, { "epoch": 0.029287779293763447, "grad_norm": 1.7109375, "learning_rate": 0.001884111420936868, "loss": 0.4578, "step": 16518 }, { "epoch": 0.029291325459073262, "grad_norm": 1.1015625, "learning_rate": 0.0018840822035233811, "loss": 0.2545, "step": 16520 }, { "epoch": 0.029294871624383077, "grad_norm": 0.8828125, "learning_rate": 0.0018840529826807744, "loss": 0.2587, "step": 16522 }, { "epoch": 0.029298417789692895, "grad_norm": 0.91015625, "learning_rate": 0.0018840237584091752, "loss": 0.3245, "step": 16524 }, { "epoch": 0.02930196395500271, "grad_norm": 0.84375, "learning_rate": 0.0018839945307087117, "loss": 0.2438, "step": 16526 }, { "epoch": 0.029305510120312524, "grad_norm": 1.125, "learning_rate": 0.0018839652995795114, "loss": 0.3821, "step": 16528 }, { "epoch": 0.02930905628562234, "grad_norm": 1.0546875, "learning_rate": 0.0018839360650217021, "loss": 0.2183, "step": 16530 }, { "epoch": 0.029312602450932153, "grad_norm": 0.73046875, "learning_rate": 0.0018839068270354118, "loss": 0.1633, "step": 16532 }, { "epoch": 0.029316148616241967, "grad_norm": 5.09375, "learning_rate": 0.0018838775856207686, "loss": 0.2708, "step": 16534 }, { "epoch": 0.029319694781551785, "grad_norm": 0.6796875, "learning_rate": 0.0018838483407779, "loss": 0.2068, "step": 16536 }, { "epoch": 0.0293232409468616, "grad_norm": 0.35546875, "learning_rate": 0.0018838190925069345, "loss": 0.1839, "step": 16538 }, { "epoch": 0.029326787112171415, "grad_norm": 0.61328125, "learning_rate": 0.0018837898408079993, "loss": 0.2545, "step": 16540 }, { "epoch": 0.02933033327748123, "grad_norm": 2.46875, "learning_rate": 0.0018837605856812224, "loss": 0.3261, "step": 16542 }, { "epoch": 0.029333879442791044, "grad_norm": 1.046875, "learning_rate": 0.0018837313271267322, "loss": 0.2515, "step": 16544 }, { "epoch": 0.02933742560810086, "grad_norm": 0.404296875, "learning_rate": 0.0018837020651446563, "loss": 0.2496, "step": 16546 }, { "epoch": 0.029340971773410676, "grad_norm": 0.40234375, "learning_rate": 0.0018836727997351228, "loss": 0.22, "step": 16548 }, { "epoch": 0.02934451793872049, "grad_norm": 1.1015625, "learning_rate": 0.0018836435308982598, "loss": 0.1889, "step": 16550 }, { "epoch": 0.029348064104030305, "grad_norm": 1.1875, "learning_rate": 0.001883614258634195, "loss": 0.2547, "step": 16552 }, { "epoch": 0.02935161026934012, "grad_norm": 0.5390625, "learning_rate": 0.0018835849829430568, "loss": 0.2459, "step": 16554 }, { "epoch": 0.029355156434649934, "grad_norm": 1.125, "learning_rate": 0.001883555703824973, "loss": 0.2455, "step": 16556 }, { "epoch": 0.029358702599959752, "grad_norm": 0.32421875, "learning_rate": 0.0018835264212800715, "loss": 0.2801, "step": 16558 }, { "epoch": 0.029362248765269567, "grad_norm": 0.55859375, "learning_rate": 0.001883497135308481, "loss": 0.2469, "step": 16560 }, { "epoch": 0.02936579493057938, "grad_norm": 0.470703125, "learning_rate": 0.001883467845910329, "loss": 0.1964, "step": 16562 }, { "epoch": 0.029369341095889196, "grad_norm": 1.40625, "learning_rate": 0.0018834385530857435, "loss": 0.2267, "step": 16564 }, { "epoch": 0.02937288726119901, "grad_norm": 0.66796875, "learning_rate": 0.001883409256834853, "loss": 0.2216, "step": 16566 }, { "epoch": 0.029376433426508825, "grad_norm": 0.451171875, "learning_rate": 0.0018833799571577852, "loss": 0.2015, "step": 16568 }, { "epoch": 0.029379979591818643, "grad_norm": 0.396484375, "learning_rate": 0.0018833506540546687, "loss": 0.1955, "step": 16570 }, { "epoch": 0.029383525757128458, "grad_norm": 0.62109375, "learning_rate": 0.0018833213475256316, "loss": 0.2211, "step": 16572 }, { "epoch": 0.029387071922438272, "grad_norm": 0.36328125, "learning_rate": 0.0018832920375708019, "loss": 0.2667, "step": 16574 }, { "epoch": 0.029390618087748087, "grad_norm": 4.3125, "learning_rate": 0.0018832627241903077, "loss": 0.2771, "step": 16576 }, { "epoch": 0.0293941642530579, "grad_norm": 1.515625, "learning_rate": 0.0018832334073842774, "loss": 0.3164, "step": 16578 }, { "epoch": 0.02939771041836772, "grad_norm": 0.72265625, "learning_rate": 0.0018832040871528393, "loss": 0.2078, "step": 16580 }, { "epoch": 0.029401256583677534, "grad_norm": 0.298828125, "learning_rate": 0.0018831747634961212, "loss": 0.2292, "step": 16582 }, { "epoch": 0.02940480274898735, "grad_norm": 0.322265625, "learning_rate": 0.0018831454364142514, "loss": 0.2135, "step": 16584 }, { "epoch": 0.029408348914297163, "grad_norm": 3.296875, "learning_rate": 0.0018831161059073586, "loss": 0.2987, "step": 16586 }, { "epoch": 0.029411895079606978, "grad_norm": 1.1328125, "learning_rate": 0.0018830867719755709, "loss": 0.2286, "step": 16588 }, { "epoch": 0.029415441244916792, "grad_norm": 0.609375, "learning_rate": 0.0018830574346190166, "loss": 0.2297, "step": 16590 }, { "epoch": 0.02941898741022661, "grad_norm": 0.29296875, "learning_rate": 0.001883028093837824, "loss": 0.2936, "step": 16592 }, { "epoch": 0.029422533575536425, "grad_norm": 1.0625, "learning_rate": 0.001882998749632121, "loss": 0.2445, "step": 16594 }, { "epoch": 0.02942607974084624, "grad_norm": 0.4296875, "learning_rate": 0.0018829694020020364, "loss": 0.2801, "step": 16596 }, { "epoch": 0.029429625906156054, "grad_norm": 0.30859375, "learning_rate": 0.0018829400509476986, "loss": 0.1658, "step": 16598 }, { "epoch": 0.02943317207146587, "grad_norm": 0.373046875, "learning_rate": 0.0018829106964692358, "loss": 0.218, "step": 16600 }, { "epoch": 0.029436718236775683, "grad_norm": 2.96875, "learning_rate": 0.0018828813385667765, "loss": 0.2704, "step": 16602 }, { "epoch": 0.0294402644020855, "grad_norm": 0.87109375, "learning_rate": 0.0018828519772404483, "loss": 0.2337, "step": 16604 }, { "epoch": 0.029443810567395316, "grad_norm": 0.75, "learning_rate": 0.0018828226124903808, "loss": 0.2671, "step": 16606 }, { "epoch": 0.02944735673270513, "grad_norm": 0.43359375, "learning_rate": 0.0018827932443167019, "loss": 0.2349, "step": 16608 }, { "epoch": 0.029450902898014945, "grad_norm": 0.49609375, "learning_rate": 0.0018827638727195403, "loss": 0.2073, "step": 16610 }, { "epoch": 0.02945444906332476, "grad_norm": 1.1328125, "learning_rate": 0.001882734497699024, "loss": 0.2541, "step": 16612 }, { "epoch": 0.029457995228634577, "grad_norm": 0.75390625, "learning_rate": 0.0018827051192552816, "loss": 0.2349, "step": 16614 }, { "epoch": 0.029461541393944392, "grad_norm": 3.578125, "learning_rate": 0.0018826757373884416, "loss": 0.2521, "step": 16616 }, { "epoch": 0.029465087559254206, "grad_norm": 1.4375, "learning_rate": 0.0018826463520986326, "loss": 0.2359, "step": 16618 }, { "epoch": 0.02946863372456402, "grad_norm": 0.83203125, "learning_rate": 0.0018826169633859831, "loss": 0.282, "step": 16620 }, { "epoch": 0.029472179889873835, "grad_norm": 0.578125, "learning_rate": 0.0018825875712506215, "loss": 0.1747, "step": 16622 }, { "epoch": 0.02947572605518365, "grad_norm": 0.63671875, "learning_rate": 0.0018825581756926764, "loss": 0.1783, "step": 16624 }, { "epoch": 0.029479272220493468, "grad_norm": 0.9921875, "learning_rate": 0.0018825287767122768, "loss": 0.1752, "step": 16626 }, { "epoch": 0.029482818385803283, "grad_norm": 0.859375, "learning_rate": 0.0018824993743095507, "loss": 0.3269, "step": 16628 }, { "epoch": 0.029486364551113097, "grad_norm": 0.341796875, "learning_rate": 0.0018824699684846264, "loss": 0.2186, "step": 16630 }, { "epoch": 0.02948991071642291, "grad_norm": 0.6953125, "learning_rate": 0.0018824405592376334, "loss": 0.1926, "step": 16632 }, { "epoch": 0.029493456881732726, "grad_norm": 0.95703125, "learning_rate": 0.0018824111465687, "loss": 0.3023, "step": 16634 }, { "epoch": 0.02949700304704254, "grad_norm": 0.671875, "learning_rate": 0.0018823817304779546, "loss": 0.1736, "step": 16636 }, { "epoch": 0.02950054921235236, "grad_norm": 0.609375, "learning_rate": 0.001882352310965526, "loss": 0.1993, "step": 16638 }, { "epoch": 0.029504095377662173, "grad_norm": 1.71875, "learning_rate": 0.001882322888031543, "loss": 0.3678, "step": 16640 }, { "epoch": 0.029507641542971988, "grad_norm": 0.486328125, "learning_rate": 0.0018822934616761337, "loss": 0.2318, "step": 16642 }, { "epoch": 0.029511187708281802, "grad_norm": 2.09375, "learning_rate": 0.0018822640318994274, "loss": 0.3982, "step": 16644 }, { "epoch": 0.029514733873591617, "grad_norm": 0.8671875, "learning_rate": 0.0018822345987015523, "loss": 0.3106, "step": 16646 }, { "epoch": 0.029518280038901435, "grad_norm": 0.79296875, "learning_rate": 0.001882205162082638, "loss": 0.2578, "step": 16648 }, { "epoch": 0.02952182620421125, "grad_norm": 0.84765625, "learning_rate": 0.001882175722042812, "loss": 0.2203, "step": 16650 }, { "epoch": 0.029525372369521064, "grad_norm": 0.9140625, "learning_rate": 0.0018821462785822045, "loss": 0.331, "step": 16652 }, { "epoch": 0.02952891853483088, "grad_norm": 0.60546875, "learning_rate": 0.0018821168317009432, "loss": 0.2047, "step": 16654 }, { "epoch": 0.029532464700140693, "grad_norm": 0.6953125, "learning_rate": 0.0018820873813991572, "loss": 0.2383, "step": 16656 }, { "epoch": 0.029536010865450508, "grad_norm": 0.55078125, "learning_rate": 0.001882057927676975, "loss": 0.2129, "step": 16658 }, { "epoch": 0.029539557030760326, "grad_norm": 10.9375, "learning_rate": 0.001882028470534526, "loss": 0.3519, "step": 16660 }, { "epoch": 0.02954310319607014, "grad_norm": 0.9296875, "learning_rate": 0.001881999009971939, "loss": 0.2205, "step": 16662 }, { "epoch": 0.029546649361379955, "grad_norm": 0.470703125, "learning_rate": 0.0018819695459893421, "loss": 0.2093, "step": 16664 }, { "epoch": 0.02955019552668977, "grad_norm": 1.359375, "learning_rate": 0.0018819400785868646, "loss": 0.2302, "step": 16666 }, { "epoch": 0.029553741691999584, "grad_norm": 0.65625, "learning_rate": 0.0018819106077646356, "loss": 0.2871, "step": 16668 }, { "epoch": 0.0295572878573094, "grad_norm": 0.640625, "learning_rate": 0.001881881133522784, "loss": 0.2343, "step": 16670 }, { "epoch": 0.029560834022619217, "grad_norm": 1.953125, "learning_rate": 0.0018818516558614382, "loss": 0.3144, "step": 16672 }, { "epoch": 0.02956438018792903, "grad_norm": 0.455078125, "learning_rate": 0.0018818221747807274, "loss": 0.2072, "step": 16674 }, { "epoch": 0.029567926353238846, "grad_norm": 6.0, "learning_rate": 0.0018817926902807804, "loss": 0.2485, "step": 16676 }, { "epoch": 0.02957147251854866, "grad_norm": 1.75, "learning_rate": 0.0018817632023617266, "loss": 0.2983, "step": 16678 }, { "epoch": 0.029575018683858475, "grad_norm": 2.296875, "learning_rate": 0.0018817337110236946, "loss": 0.2936, "step": 16680 }, { "epoch": 0.029578564849168293, "grad_norm": 0.291015625, "learning_rate": 0.0018817042162668132, "loss": 0.1842, "step": 16682 }, { "epoch": 0.029582111014478107, "grad_norm": 0.482421875, "learning_rate": 0.001881674718091212, "loss": 0.1882, "step": 16684 }, { "epoch": 0.029585657179787922, "grad_norm": 0.37890625, "learning_rate": 0.0018816452164970192, "loss": 0.2116, "step": 16686 }, { "epoch": 0.029589203345097737, "grad_norm": 0.486328125, "learning_rate": 0.0018816157114843646, "loss": 0.1892, "step": 16688 }, { "epoch": 0.02959274951040755, "grad_norm": 0.8515625, "learning_rate": 0.0018815862030533768, "loss": 0.2035, "step": 16690 }, { "epoch": 0.029596295675717366, "grad_norm": 0.58203125, "learning_rate": 0.001881556691204185, "loss": 0.3968, "step": 16692 }, { "epoch": 0.029599841841027184, "grad_norm": 0.51171875, "learning_rate": 0.001881527175936918, "loss": 0.217, "step": 16694 }, { "epoch": 0.029603388006336998, "grad_norm": 0.474609375, "learning_rate": 0.0018814976572517053, "loss": 0.2913, "step": 16696 }, { "epoch": 0.029606934171646813, "grad_norm": 7.4375, "learning_rate": 0.001881468135148676, "loss": 0.4673, "step": 16698 }, { "epoch": 0.029610480336956627, "grad_norm": 0.84765625, "learning_rate": 0.0018814386096279586, "loss": 0.2237, "step": 16700 }, { "epoch": 0.029614026502266442, "grad_norm": 1.0, "learning_rate": 0.0018814090806896827, "loss": 0.2352, "step": 16702 }, { "epoch": 0.029617572667576256, "grad_norm": 0.48046875, "learning_rate": 0.0018813795483339775, "loss": 0.174, "step": 16704 }, { "epoch": 0.029621118832886074, "grad_norm": 0.49609375, "learning_rate": 0.001881350012560972, "loss": 0.241, "step": 16706 }, { "epoch": 0.02962466499819589, "grad_norm": 0.5546875, "learning_rate": 0.0018813204733707954, "loss": 0.232, "step": 16708 }, { "epoch": 0.029628211163505704, "grad_norm": 0.458984375, "learning_rate": 0.0018812909307635768, "loss": 0.1704, "step": 16710 }, { "epoch": 0.029631757328815518, "grad_norm": 1.09375, "learning_rate": 0.0018812613847394458, "loss": 0.2548, "step": 16712 }, { "epoch": 0.029635303494125333, "grad_norm": 1.078125, "learning_rate": 0.0018812318352985312, "loss": 0.2749, "step": 16714 }, { "epoch": 0.02963884965943515, "grad_norm": 2.65625, "learning_rate": 0.0018812022824409623, "loss": 0.2695, "step": 16716 }, { "epoch": 0.029642395824744965, "grad_norm": 1.03125, "learning_rate": 0.0018811727261668681, "loss": 0.2257, "step": 16718 }, { "epoch": 0.02964594199005478, "grad_norm": 0.70703125, "learning_rate": 0.0018811431664763787, "loss": 0.243, "step": 16720 }, { "epoch": 0.029649488155364594, "grad_norm": 1.25, "learning_rate": 0.0018811136033696223, "loss": 0.4543, "step": 16722 }, { "epoch": 0.02965303432067441, "grad_norm": 0.5234375, "learning_rate": 0.0018810840368467289, "loss": 0.2042, "step": 16724 }, { "epoch": 0.029656580485984223, "grad_norm": 1.046875, "learning_rate": 0.0018810544669078278, "loss": 0.2378, "step": 16726 }, { "epoch": 0.02966012665129404, "grad_norm": 0.515625, "learning_rate": 0.0018810248935530478, "loss": 0.1971, "step": 16728 }, { "epoch": 0.029663672816603856, "grad_norm": 0.4375, "learning_rate": 0.0018809953167825192, "loss": 0.2117, "step": 16730 }, { "epoch": 0.02966721898191367, "grad_norm": 0.8046875, "learning_rate": 0.0018809657365963703, "loss": 0.2126, "step": 16732 }, { "epoch": 0.029670765147223485, "grad_norm": 0.431640625, "learning_rate": 0.001880936152994731, "loss": 0.2501, "step": 16734 }, { "epoch": 0.0296743113125333, "grad_norm": 1.875, "learning_rate": 0.0018809065659777305, "loss": 0.2054, "step": 16736 }, { "epoch": 0.029677857477843114, "grad_norm": 0.41796875, "learning_rate": 0.0018808769755454982, "loss": 0.2013, "step": 16738 }, { "epoch": 0.029681403643152932, "grad_norm": 0.5625, "learning_rate": 0.001880847381698164, "loss": 0.2365, "step": 16740 }, { "epoch": 0.029684949808462747, "grad_norm": 0.734375, "learning_rate": 0.0018808177844358565, "loss": 0.204, "step": 16742 }, { "epoch": 0.02968849597377256, "grad_norm": 4.8125, "learning_rate": 0.001880788183758706, "loss": 0.2949, "step": 16744 }, { "epoch": 0.029692042139082376, "grad_norm": 0.314453125, "learning_rate": 0.0018807585796668412, "loss": 0.1435, "step": 16746 }, { "epoch": 0.02969558830439219, "grad_norm": 0.56640625, "learning_rate": 0.0018807289721603918, "loss": 0.2879, "step": 16748 }, { "epoch": 0.02969913446970201, "grad_norm": 0.431640625, "learning_rate": 0.0018806993612394873, "loss": 0.2119, "step": 16750 }, { "epoch": 0.029702680635011823, "grad_norm": 0.72265625, "learning_rate": 0.0018806697469042578, "loss": 0.1907, "step": 16752 }, { "epoch": 0.029706226800321638, "grad_norm": 0.5234375, "learning_rate": 0.0018806401291548318, "loss": 0.3089, "step": 16754 }, { "epoch": 0.029709772965631452, "grad_norm": 0.416015625, "learning_rate": 0.0018806105079913393, "loss": 0.1845, "step": 16756 }, { "epoch": 0.029713319130941267, "grad_norm": 0.671875, "learning_rate": 0.0018805808834139101, "loss": 0.2809, "step": 16758 }, { "epoch": 0.02971686529625108, "grad_norm": 0.796875, "learning_rate": 0.0018805512554226737, "loss": 0.1991, "step": 16760 }, { "epoch": 0.0297204114615609, "grad_norm": 0.77734375, "learning_rate": 0.0018805216240177594, "loss": 0.2467, "step": 16762 }, { "epoch": 0.029723957626870714, "grad_norm": 0.578125, "learning_rate": 0.0018804919891992969, "loss": 0.2788, "step": 16764 }, { "epoch": 0.02972750379218053, "grad_norm": 1.7890625, "learning_rate": 0.001880462350967416, "loss": 0.2334, "step": 16766 }, { "epoch": 0.029731049957490343, "grad_norm": 1.5625, "learning_rate": 0.0018804327093222455, "loss": 0.1918, "step": 16768 }, { "epoch": 0.029734596122800157, "grad_norm": 0.33984375, "learning_rate": 0.0018804030642639161, "loss": 0.2141, "step": 16770 }, { "epoch": 0.029738142288109972, "grad_norm": 0.337890625, "learning_rate": 0.001880373415792557, "loss": 0.1788, "step": 16772 }, { "epoch": 0.02974168845341979, "grad_norm": 0.380859375, "learning_rate": 0.0018803437639082975, "loss": 0.1849, "step": 16774 }, { "epoch": 0.029745234618729605, "grad_norm": 0.484375, "learning_rate": 0.001880314108611268, "loss": 0.2101, "step": 16776 }, { "epoch": 0.02974878078403942, "grad_norm": 0.78125, "learning_rate": 0.0018802844499015978, "loss": 0.4142, "step": 16778 }, { "epoch": 0.029752326949349234, "grad_norm": 1.359375, "learning_rate": 0.0018802547877794168, "loss": 0.3349, "step": 16780 }, { "epoch": 0.02975587311465905, "grad_norm": 0.8828125, "learning_rate": 0.0018802251222448543, "loss": 0.2832, "step": 16782 }, { "epoch": 0.029759419279968866, "grad_norm": 0.86328125, "learning_rate": 0.0018801954532980405, "loss": 0.2191, "step": 16784 }, { "epoch": 0.02976296544527868, "grad_norm": 0.357421875, "learning_rate": 0.0018801657809391049, "loss": 0.2201, "step": 16786 }, { "epoch": 0.029766511610588495, "grad_norm": 0.65625, "learning_rate": 0.0018801361051681775, "loss": 0.1762, "step": 16788 }, { "epoch": 0.02977005777589831, "grad_norm": 0.5859375, "learning_rate": 0.0018801064259853877, "loss": 0.2584, "step": 16790 }, { "epoch": 0.029773603941208125, "grad_norm": 0.6640625, "learning_rate": 0.0018800767433908658, "loss": 0.2513, "step": 16792 }, { "epoch": 0.02977715010651794, "grad_norm": 1.0234375, "learning_rate": 0.0018800470573847412, "loss": 0.1866, "step": 16794 }, { "epoch": 0.029780696271827757, "grad_norm": 1.3984375, "learning_rate": 0.001880017367967144, "loss": 0.2667, "step": 16796 }, { "epoch": 0.02978424243713757, "grad_norm": 0.40234375, "learning_rate": 0.0018799876751382037, "loss": 0.1656, "step": 16798 }, { "epoch": 0.029787788602447386, "grad_norm": 1.2890625, "learning_rate": 0.0018799579788980503, "loss": 0.2954, "step": 16800 }, { "epoch": 0.0297913347677572, "grad_norm": 0.6328125, "learning_rate": 0.001879928279246814, "loss": 0.1839, "step": 16802 }, { "epoch": 0.029794880933067015, "grad_norm": 0.734375, "learning_rate": 0.0018798985761846246, "loss": 0.228, "step": 16804 }, { "epoch": 0.02979842709837683, "grad_norm": 0.357421875, "learning_rate": 0.0018798688697116116, "loss": 0.1738, "step": 16806 }, { "epoch": 0.029801973263686648, "grad_norm": 0.396484375, "learning_rate": 0.0018798391598279052, "loss": 0.3059, "step": 16808 }, { "epoch": 0.029805519428996462, "grad_norm": 3.078125, "learning_rate": 0.0018798094465336352, "loss": 0.3257, "step": 16810 }, { "epoch": 0.029809065594306277, "grad_norm": 1.015625, "learning_rate": 0.0018797797298289317, "loss": 0.2544, "step": 16812 }, { "epoch": 0.02981261175961609, "grad_norm": 0.7265625, "learning_rate": 0.0018797500097139247, "loss": 0.1742, "step": 16814 }, { "epoch": 0.029816157924925906, "grad_norm": 0.73046875, "learning_rate": 0.0018797202861887442, "loss": 0.1878, "step": 16816 }, { "epoch": 0.029819704090235724, "grad_norm": 0.50390625, "learning_rate": 0.0018796905592535196, "loss": 0.1701, "step": 16818 }, { "epoch": 0.02982325025554554, "grad_norm": 0.78125, "learning_rate": 0.0018796608289083818, "loss": 0.2311, "step": 16820 }, { "epoch": 0.029826796420855353, "grad_norm": 0.75, "learning_rate": 0.0018796310951534603, "loss": 0.1479, "step": 16822 }, { "epoch": 0.029830342586165168, "grad_norm": 0.453125, "learning_rate": 0.0018796013579888853, "loss": 0.1896, "step": 16824 }, { "epoch": 0.029833888751474982, "grad_norm": 0.84765625, "learning_rate": 0.001879571617414787, "loss": 0.1928, "step": 16826 }, { "epoch": 0.029837434916784797, "grad_norm": 0.314453125, "learning_rate": 0.001879541873431295, "loss": 0.2392, "step": 16828 }, { "epoch": 0.029840981082094615, "grad_norm": 0.82421875, "learning_rate": 0.0018795121260385397, "loss": 0.2058, "step": 16830 }, { "epoch": 0.02984452724740443, "grad_norm": 0.61328125, "learning_rate": 0.0018794823752366512, "loss": 0.1877, "step": 16832 }, { "epoch": 0.029848073412714244, "grad_norm": 0.65234375, "learning_rate": 0.0018794526210257595, "loss": 0.2212, "step": 16834 }, { "epoch": 0.02985161957802406, "grad_norm": 0.51171875, "learning_rate": 0.0018794228634059949, "loss": 0.2698, "step": 16836 }, { "epoch": 0.029855165743333873, "grad_norm": 0.55078125, "learning_rate": 0.0018793931023774874, "loss": 0.1771, "step": 16838 }, { "epoch": 0.029858711908643688, "grad_norm": 1.59375, "learning_rate": 0.001879363337940367, "loss": 0.3661, "step": 16840 }, { "epoch": 0.029862258073953506, "grad_norm": 0.40625, "learning_rate": 0.0018793335700947643, "loss": 0.2074, "step": 16842 }, { "epoch": 0.02986580423926332, "grad_norm": 0.875, "learning_rate": 0.0018793037988408094, "loss": 0.2325, "step": 16844 }, { "epoch": 0.029869350404573135, "grad_norm": 0.259765625, "learning_rate": 0.001879274024178632, "loss": 0.2228, "step": 16846 }, { "epoch": 0.02987289656988295, "grad_norm": 0.45703125, "learning_rate": 0.0018792442461083628, "loss": 0.2294, "step": 16848 }, { "epoch": 0.029876442735192764, "grad_norm": 0.2412109375, "learning_rate": 0.001879214464630132, "loss": 0.2239, "step": 16850 }, { "epoch": 0.029879988900502582, "grad_norm": 0.345703125, "learning_rate": 0.0018791846797440697, "loss": 0.2419, "step": 16852 }, { "epoch": 0.029883535065812396, "grad_norm": 1.578125, "learning_rate": 0.001879154891450306, "loss": 0.2174, "step": 16854 }, { "epoch": 0.02988708123112221, "grad_norm": 3.0, "learning_rate": 0.0018791250997489718, "loss": 0.4148, "step": 16856 }, { "epoch": 0.029890627396432026, "grad_norm": 1.046875, "learning_rate": 0.0018790953046401969, "loss": 0.2759, "step": 16858 }, { "epoch": 0.02989417356174184, "grad_norm": 0.3203125, "learning_rate": 0.0018790655061241112, "loss": 0.3068, "step": 16860 }, { "epoch": 0.029897719727051655, "grad_norm": 0.3984375, "learning_rate": 0.0018790357042008461, "loss": 0.1669, "step": 16862 }, { "epoch": 0.029901265892361473, "grad_norm": 0.29296875, "learning_rate": 0.0018790058988705312, "loss": 0.2189, "step": 16864 }, { "epoch": 0.029904812057671287, "grad_norm": 0.369140625, "learning_rate": 0.0018789760901332967, "loss": 0.2027, "step": 16866 }, { "epoch": 0.029908358222981102, "grad_norm": 0.99609375, "learning_rate": 0.0018789462779892736, "loss": 0.1999, "step": 16868 }, { "epoch": 0.029911904388290916, "grad_norm": 0.6875, "learning_rate": 0.001878916462438592, "loss": 0.3361, "step": 16870 }, { "epoch": 0.02991545055360073, "grad_norm": 0.392578125, "learning_rate": 0.0018788866434813819, "loss": 0.1945, "step": 16872 }, { "epoch": 0.029918996718910545, "grad_norm": 0.7734375, "learning_rate": 0.0018788568211177744, "loss": 0.196, "step": 16874 }, { "epoch": 0.029922542884220363, "grad_norm": 0.78515625, "learning_rate": 0.0018788269953478993, "loss": 0.2187, "step": 16876 }, { "epoch": 0.029926089049530178, "grad_norm": 0.85546875, "learning_rate": 0.0018787971661718874, "loss": 0.2111, "step": 16878 }, { "epoch": 0.029929635214839993, "grad_norm": 2.28125, "learning_rate": 0.0018787673335898692, "loss": 0.2855, "step": 16880 }, { "epoch": 0.029933181380149807, "grad_norm": 1.015625, "learning_rate": 0.0018787374976019751, "loss": 0.2007, "step": 16882 }, { "epoch": 0.02993672754545962, "grad_norm": 0.984375, "learning_rate": 0.0018787076582083351, "loss": 0.2528, "step": 16884 }, { "epoch": 0.02994027371076944, "grad_norm": 1.9765625, "learning_rate": 0.0018786778154090804, "loss": 0.2743, "step": 16886 }, { "epoch": 0.029943819876079254, "grad_norm": 0.53125, "learning_rate": 0.0018786479692043411, "loss": 0.2032, "step": 16888 }, { "epoch": 0.02994736604138907, "grad_norm": 0.349609375, "learning_rate": 0.0018786181195942482, "loss": 0.1751, "step": 16890 }, { "epoch": 0.029950912206698883, "grad_norm": 1.546875, "learning_rate": 0.0018785882665789317, "loss": 0.2956, "step": 16892 }, { "epoch": 0.029954458372008698, "grad_norm": 0.875, "learning_rate": 0.0018785584101585224, "loss": 0.2187, "step": 16894 }, { "epoch": 0.029958004537318512, "grad_norm": 0.4375, "learning_rate": 0.001878528550333151, "loss": 0.2531, "step": 16896 }, { "epoch": 0.02996155070262833, "grad_norm": 0.431640625, "learning_rate": 0.0018784986871029478, "loss": 0.2083, "step": 16898 }, { "epoch": 0.029965096867938145, "grad_norm": 0.388671875, "learning_rate": 0.0018784688204680437, "loss": 0.2307, "step": 16900 }, { "epoch": 0.02996864303324796, "grad_norm": 0.44140625, "learning_rate": 0.0018784389504285688, "loss": 0.2447, "step": 16902 }, { "epoch": 0.029972189198557774, "grad_norm": 0.486328125, "learning_rate": 0.0018784090769846545, "loss": 0.2985, "step": 16904 }, { "epoch": 0.02997573536386759, "grad_norm": 0.55859375, "learning_rate": 0.0018783792001364308, "loss": 0.2225, "step": 16906 }, { "epoch": 0.029979281529177403, "grad_norm": 0.95703125, "learning_rate": 0.0018783493198840288, "loss": 0.1836, "step": 16908 }, { "epoch": 0.02998282769448722, "grad_norm": 0.625, "learning_rate": 0.0018783194362275788, "loss": 0.2482, "step": 16910 }, { "epoch": 0.029986373859797036, "grad_norm": 1.625, "learning_rate": 0.001878289549167212, "loss": 0.2458, "step": 16912 }, { "epoch": 0.02998992002510685, "grad_norm": 2.15625, "learning_rate": 0.0018782596587030582, "loss": 0.2405, "step": 16914 }, { "epoch": 0.029993466190416665, "grad_norm": 0.78125, "learning_rate": 0.0018782297648352493, "loss": 0.3013, "step": 16916 }, { "epoch": 0.02999701235572648, "grad_norm": 0.390625, "learning_rate": 0.0018781998675639153, "loss": 0.1412, "step": 16918 }, { "epoch": 0.030000558521036298, "grad_norm": 0.5390625, "learning_rate": 0.0018781699668891873, "loss": 0.2485, "step": 16920 }, { "epoch": 0.030004104686346112, "grad_norm": 1.234375, "learning_rate": 0.0018781400628111956, "loss": 0.2366, "step": 16922 }, { "epoch": 0.030007650851655927, "grad_norm": 0.296875, "learning_rate": 0.0018781101553300712, "loss": 0.1848, "step": 16924 }, { "epoch": 0.03001119701696574, "grad_norm": 0.56640625, "learning_rate": 0.0018780802444459451, "loss": 0.2282, "step": 16926 }, { "epoch": 0.030014743182275556, "grad_norm": 0.609375, "learning_rate": 0.0018780503301589477, "loss": 0.2358, "step": 16928 }, { "epoch": 0.03001828934758537, "grad_norm": 1.4140625, "learning_rate": 0.0018780204124692107, "loss": 0.2081, "step": 16930 }, { "epoch": 0.03002183551289519, "grad_norm": 0.71875, "learning_rate": 0.0018779904913768643, "loss": 0.194, "step": 16932 }, { "epoch": 0.030025381678205003, "grad_norm": 5.4375, "learning_rate": 0.0018779605668820388, "loss": 0.1854, "step": 16934 }, { "epoch": 0.030028927843514817, "grad_norm": 1.265625, "learning_rate": 0.0018779306389848658, "loss": 0.2488, "step": 16936 }, { "epoch": 0.030032474008824632, "grad_norm": 0.80078125, "learning_rate": 0.0018779007076854765, "loss": 0.2456, "step": 16938 }, { "epoch": 0.030036020174134447, "grad_norm": 0.439453125, "learning_rate": 0.001877870772984001, "loss": 0.2359, "step": 16940 }, { "epoch": 0.03003956633944426, "grad_norm": 0.5859375, "learning_rate": 0.0018778408348805707, "loss": 0.2924, "step": 16942 }, { "epoch": 0.03004311250475408, "grad_norm": 0.74609375, "learning_rate": 0.0018778108933753162, "loss": 0.2236, "step": 16944 }, { "epoch": 0.030046658670063894, "grad_norm": 2.859375, "learning_rate": 0.001877780948468369, "loss": 0.2881, "step": 16946 }, { "epoch": 0.030050204835373708, "grad_norm": 1.8984375, "learning_rate": 0.0018777510001598595, "loss": 0.3003, "step": 16948 }, { "epoch": 0.030053751000683523, "grad_norm": 0.423828125, "learning_rate": 0.001877721048449919, "loss": 0.3232, "step": 16950 }, { "epoch": 0.030057297165993337, "grad_norm": 0.267578125, "learning_rate": 0.0018776910933386783, "loss": 0.1536, "step": 16952 }, { "epoch": 0.030060843331303155, "grad_norm": 2.09375, "learning_rate": 0.0018776611348262687, "loss": 0.2436, "step": 16954 }, { "epoch": 0.03006438949661297, "grad_norm": 1.6640625, "learning_rate": 0.0018776311729128208, "loss": 0.1865, "step": 16956 }, { "epoch": 0.030067935661922784, "grad_norm": 0.546875, "learning_rate": 0.0018776012075984658, "loss": 0.2413, "step": 16958 }, { "epoch": 0.0300714818272326, "grad_norm": 0.37890625, "learning_rate": 0.001877571238883335, "loss": 0.208, "step": 16960 }, { "epoch": 0.030075027992542414, "grad_norm": 0.8359375, "learning_rate": 0.0018775412667675591, "loss": 0.2029, "step": 16962 }, { "epoch": 0.030078574157852228, "grad_norm": 0.470703125, "learning_rate": 0.0018775112912512697, "loss": 0.1643, "step": 16964 }, { "epoch": 0.030082120323162046, "grad_norm": 1.328125, "learning_rate": 0.001877481312334597, "loss": 0.3411, "step": 16966 }, { "epoch": 0.03008566648847186, "grad_norm": 0.61328125, "learning_rate": 0.001877451330017673, "loss": 0.2141, "step": 16968 }, { "epoch": 0.030089212653781675, "grad_norm": 0.70703125, "learning_rate": 0.0018774213443006284, "loss": 0.1575, "step": 16970 }, { "epoch": 0.03009275881909149, "grad_norm": 0.37890625, "learning_rate": 0.0018773913551835945, "loss": 0.2291, "step": 16972 }, { "epoch": 0.030096304984401304, "grad_norm": 0.6875, "learning_rate": 0.0018773613626667024, "loss": 0.2575, "step": 16974 }, { "epoch": 0.03009985114971112, "grad_norm": 0.90234375, "learning_rate": 0.0018773313667500833, "loss": 0.2712, "step": 16976 }, { "epoch": 0.030103397315020937, "grad_norm": 0.390625, "learning_rate": 0.0018773013674338681, "loss": 0.2503, "step": 16978 }, { "epoch": 0.03010694348033075, "grad_norm": 0.60546875, "learning_rate": 0.0018772713647181882, "loss": 0.217, "step": 16980 }, { "epoch": 0.030110489645640566, "grad_norm": 0.380859375, "learning_rate": 0.0018772413586031751, "loss": 0.2303, "step": 16982 }, { "epoch": 0.03011403581095038, "grad_norm": 0.279296875, "learning_rate": 0.0018772113490889595, "loss": 0.2836, "step": 16984 }, { "epoch": 0.030117581976260195, "grad_norm": 1.53125, "learning_rate": 0.0018771813361756731, "loss": 0.3995, "step": 16986 }, { "epoch": 0.030121128141570013, "grad_norm": 0.71875, "learning_rate": 0.0018771513198634468, "loss": 0.1998, "step": 16988 }, { "epoch": 0.030124674306879828, "grad_norm": 0.380859375, "learning_rate": 0.001877121300152412, "loss": 0.2062, "step": 16990 }, { "epoch": 0.030128220472189642, "grad_norm": 0.3515625, "learning_rate": 0.0018770912770427006, "loss": 0.2045, "step": 16992 }, { "epoch": 0.030131766637499457, "grad_norm": 1.7421875, "learning_rate": 0.001877061250534443, "loss": 0.2022, "step": 16994 }, { "epoch": 0.03013531280280927, "grad_norm": 0.330078125, "learning_rate": 0.0018770312206277708, "loss": 0.1698, "step": 16996 }, { "epoch": 0.030138858968119086, "grad_norm": 0.349609375, "learning_rate": 0.0018770011873228151, "loss": 0.3099, "step": 16998 }, { "epoch": 0.030142405133428904, "grad_norm": 0.75390625, "learning_rate": 0.001876971150619708, "loss": 0.3465, "step": 17000 }, { "epoch": 0.03014595129873872, "grad_norm": 0.98046875, "learning_rate": 0.0018769411105185802, "loss": 0.2811, "step": 17002 }, { "epoch": 0.030149497464048533, "grad_norm": 1.1328125, "learning_rate": 0.0018769110670195633, "loss": 0.1728, "step": 17004 }, { "epoch": 0.030153043629358348, "grad_norm": 0.85546875, "learning_rate": 0.0018768810201227888, "loss": 0.2316, "step": 17006 }, { "epoch": 0.030156589794668162, "grad_norm": 0.423828125, "learning_rate": 0.001876850969828388, "loss": 0.2049, "step": 17008 }, { "epoch": 0.030160135959977977, "grad_norm": 0.333984375, "learning_rate": 0.0018768209161364922, "loss": 0.1935, "step": 17010 }, { "epoch": 0.030163682125287795, "grad_norm": 0.42578125, "learning_rate": 0.0018767908590472328, "loss": 0.2269, "step": 17012 }, { "epoch": 0.03016722829059761, "grad_norm": 0.5703125, "learning_rate": 0.0018767607985607417, "loss": 0.2079, "step": 17014 }, { "epoch": 0.030170774455907424, "grad_norm": 0.3515625, "learning_rate": 0.00187673073467715, "loss": 0.2292, "step": 17016 }, { "epoch": 0.03017432062121724, "grad_norm": 0.421875, "learning_rate": 0.001876700667396589, "loss": 0.1944, "step": 17018 }, { "epoch": 0.030177866786527053, "grad_norm": 0.2119140625, "learning_rate": 0.0018766705967191905, "loss": 0.2205, "step": 17020 }, { "epoch": 0.03018141295183687, "grad_norm": 1.1484375, "learning_rate": 0.0018766405226450862, "loss": 0.2445, "step": 17022 }, { "epoch": 0.030184959117146686, "grad_norm": 0.34765625, "learning_rate": 0.0018766104451744072, "loss": 0.1733, "step": 17024 }, { "epoch": 0.0301885052824565, "grad_norm": 0.306640625, "learning_rate": 0.0018765803643072852, "loss": 0.1993, "step": 17026 }, { "epoch": 0.030192051447766315, "grad_norm": 0.423828125, "learning_rate": 0.0018765502800438519, "loss": 0.245, "step": 17028 }, { "epoch": 0.03019559761307613, "grad_norm": 0.92578125, "learning_rate": 0.0018765201923842388, "loss": 0.2859, "step": 17030 }, { "epoch": 0.030199143778385944, "grad_norm": 1.6328125, "learning_rate": 0.0018764901013285774, "loss": 0.2456, "step": 17032 }, { "epoch": 0.030202689943695762, "grad_norm": 0.51171875, "learning_rate": 0.0018764600068769991, "loss": 0.1842, "step": 17034 }, { "epoch": 0.030206236109005576, "grad_norm": 1.8046875, "learning_rate": 0.0018764299090296356, "loss": 0.2503, "step": 17036 }, { "epoch": 0.03020978227431539, "grad_norm": 3.109375, "learning_rate": 0.0018763998077866194, "loss": 0.2284, "step": 17038 }, { "epoch": 0.030213328439625205, "grad_norm": 0.287109375, "learning_rate": 0.001876369703148081, "loss": 0.2129, "step": 17040 }, { "epoch": 0.03021687460493502, "grad_norm": 0.498046875, "learning_rate": 0.0018763395951141525, "loss": 0.179, "step": 17042 }, { "epoch": 0.030220420770244835, "grad_norm": 1.1328125, "learning_rate": 0.0018763094836849654, "loss": 0.1709, "step": 17044 }, { "epoch": 0.030223966935554653, "grad_norm": 0.796875, "learning_rate": 0.0018762793688606517, "loss": 0.2237, "step": 17046 }, { "epoch": 0.030227513100864467, "grad_norm": 0.55859375, "learning_rate": 0.0018762492506413432, "loss": 0.26, "step": 17048 }, { "epoch": 0.03023105926617428, "grad_norm": 1.6484375, "learning_rate": 0.001876219129027171, "loss": 0.2591, "step": 17050 }, { "epoch": 0.030234605431484096, "grad_norm": 1.25, "learning_rate": 0.0018761890040182676, "loss": 0.3108, "step": 17052 }, { "epoch": 0.03023815159679391, "grad_norm": 0.67578125, "learning_rate": 0.001876158875614764, "loss": 0.3266, "step": 17054 }, { "epoch": 0.03024169776210373, "grad_norm": 1.40625, "learning_rate": 0.0018761287438167924, "loss": 0.3167, "step": 17056 }, { "epoch": 0.030245243927413543, "grad_norm": 1.359375, "learning_rate": 0.0018760986086244844, "loss": 0.2147, "step": 17058 }, { "epoch": 0.030248790092723358, "grad_norm": 0.78125, "learning_rate": 0.0018760684700379721, "loss": 0.2478, "step": 17060 }, { "epoch": 0.030252336258033172, "grad_norm": 0.45703125, "learning_rate": 0.0018760383280573875, "loss": 0.2234, "step": 17062 }, { "epoch": 0.030255882423342987, "grad_norm": 0.87109375, "learning_rate": 0.0018760081826828612, "loss": 0.2975, "step": 17064 }, { "epoch": 0.0302594285886528, "grad_norm": 0.93359375, "learning_rate": 0.0018759780339145265, "loss": 0.3501, "step": 17066 }, { "epoch": 0.03026297475396262, "grad_norm": 0.5234375, "learning_rate": 0.001875947881752514, "loss": 0.2559, "step": 17068 }, { "epoch": 0.030266520919272434, "grad_norm": 0.462890625, "learning_rate": 0.001875917726196957, "loss": 0.2329, "step": 17070 }, { "epoch": 0.03027006708458225, "grad_norm": 0.80859375, "learning_rate": 0.0018758875672479861, "loss": 0.2273, "step": 17072 }, { "epoch": 0.030273613249892063, "grad_norm": 0.48046875, "learning_rate": 0.0018758574049057336, "loss": 0.2218, "step": 17074 }, { "epoch": 0.030277159415201878, "grad_norm": 0.52734375, "learning_rate": 0.0018758272391703318, "loss": 0.2829, "step": 17076 }, { "epoch": 0.030280705580511692, "grad_norm": 0.3359375, "learning_rate": 0.001875797070041912, "loss": 0.233, "step": 17078 }, { "epoch": 0.03028425174582151, "grad_norm": 0.96484375, "learning_rate": 0.0018757668975206065, "loss": 0.2369, "step": 17080 }, { "epoch": 0.030287797911131325, "grad_norm": 0.87890625, "learning_rate": 0.0018757367216065473, "loss": 0.2397, "step": 17082 }, { "epoch": 0.03029134407644114, "grad_norm": 0.400390625, "learning_rate": 0.001875706542299866, "loss": 0.219, "step": 17084 }, { "epoch": 0.030294890241750954, "grad_norm": 0.361328125, "learning_rate": 0.0018756763596006953, "loss": 0.1815, "step": 17086 }, { "epoch": 0.03029843640706077, "grad_norm": 0.5078125, "learning_rate": 0.0018756461735091666, "loss": 0.1752, "step": 17088 }, { "epoch": 0.030301982572370587, "grad_norm": 0.341796875, "learning_rate": 0.0018756159840254121, "loss": 0.2077, "step": 17090 }, { "epoch": 0.0303055287376804, "grad_norm": 0.490234375, "learning_rate": 0.0018755857911495638, "loss": 0.1905, "step": 17092 }, { "epoch": 0.030309074902990216, "grad_norm": 0.6875, "learning_rate": 0.0018755555948817536, "loss": 0.2014, "step": 17094 }, { "epoch": 0.03031262106830003, "grad_norm": 6.96875, "learning_rate": 0.0018755253952221138, "loss": 0.3129, "step": 17096 }, { "epoch": 0.030316167233609845, "grad_norm": 0.96875, "learning_rate": 0.0018754951921707765, "loss": 0.1956, "step": 17098 }, { "epoch": 0.03031971339891966, "grad_norm": 1.140625, "learning_rate": 0.0018754649857278735, "loss": 0.1567, "step": 17100 }, { "epoch": 0.030323259564229477, "grad_norm": 2.015625, "learning_rate": 0.0018754347758935376, "loss": 0.2174, "step": 17102 }, { "epoch": 0.030326805729539292, "grad_norm": 1.2109375, "learning_rate": 0.0018754045626679, "loss": 0.1959, "step": 17104 }, { "epoch": 0.030330351894849106, "grad_norm": 0.59375, "learning_rate": 0.0018753743460510933, "loss": 0.2569, "step": 17106 }, { "epoch": 0.03033389806015892, "grad_norm": 0.74609375, "learning_rate": 0.0018753441260432498, "loss": 0.1682, "step": 17108 }, { "epoch": 0.030337444225468736, "grad_norm": 1.71875, "learning_rate": 0.0018753139026445012, "loss": 0.378, "step": 17110 }, { "epoch": 0.03034099039077855, "grad_norm": 0.87109375, "learning_rate": 0.0018752836758549798, "loss": 0.2137, "step": 17112 }, { "epoch": 0.030344536556088368, "grad_norm": 0.69140625, "learning_rate": 0.0018752534456748186, "loss": 0.1817, "step": 17114 }, { "epoch": 0.030348082721398183, "grad_norm": 0.328125, "learning_rate": 0.0018752232121041486, "loss": 0.3175, "step": 17116 }, { "epoch": 0.030351628886707997, "grad_norm": 0.35546875, "learning_rate": 0.0018751929751431025, "loss": 0.1888, "step": 17118 }, { "epoch": 0.030355175052017812, "grad_norm": 0.34765625, "learning_rate": 0.001875162734791813, "loss": 0.2529, "step": 17120 }, { "epoch": 0.030358721217327626, "grad_norm": 0.3671875, "learning_rate": 0.0018751324910504118, "loss": 0.3258, "step": 17122 }, { "epoch": 0.030362267382637444, "grad_norm": 0.921875, "learning_rate": 0.0018751022439190313, "loss": 0.2126, "step": 17124 }, { "epoch": 0.03036581354794726, "grad_norm": 6.03125, "learning_rate": 0.0018750719933978038, "loss": 0.2061, "step": 17126 }, { "epoch": 0.030369359713257073, "grad_norm": 0.65234375, "learning_rate": 0.0018750417394868618, "loss": 0.2403, "step": 17128 }, { "epoch": 0.030372905878566888, "grad_norm": 1.6015625, "learning_rate": 0.001875011482186337, "loss": 0.2513, "step": 17130 }, { "epoch": 0.030376452043876703, "grad_norm": 1.1640625, "learning_rate": 0.0018749812214963627, "loss": 0.2021, "step": 17132 }, { "epoch": 0.030379998209186517, "grad_norm": 1.140625, "learning_rate": 0.0018749509574170703, "loss": 0.2372, "step": 17134 }, { "epoch": 0.030383544374496335, "grad_norm": 2.171875, "learning_rate": 0.0018749206899485926, "loss": 0.2956, "step": 17136 }, { "epoch": 0.03038709053980615, "grad_norm": 0.349609375, "learning_rate": 0.0018748904190910621, "loss": 0.2667, "step": 17138 }, { "epoch": 0.030390636705115964, "grad_norm": 4.375, "learning_rate": 0.001874860144844611, "loss": 0.4426, "step": 17140 }, { "epoch": 0.03039418287042578, "grad_norm": 0.55078125, "learning_rate": 0.0018748298672093718, "loss": 0.2179, "step": 17142 }, { "epoch": 0.030397729035735593, "grad_norm": 0.56640625, "learning_rate": 0.0018747995861854766, "loss": 0.227, "step": 17144 }, { "epoch": 0.030401275201045408, "grad_norm": 0.65625, "learning_rate": 0.001874769301773058, "loss": 0.2886, "step": 17146 }, { "epoch": 0.030404821366355226, "grad_norm": 0.35546875, "learning_rate": 0.0018747390139722488, "loss": 0.1827, "step": 17148 }, { "epoch": 0.03040836753166504, "grad_norm": 2.453125, "learning_rate": 0.001874708722783181, "loss": 0.2352, "step": 17150 }, { "epoch": 0.030411913696974855, "grad_norm": 0.54296875, "learning_rate": 0.0018746784282059874, "loss": 0.2116, "step": 17152 }, { "epoch": 0.03041545986228467, "grad_norm": 0.458984375, "learning_rate": 0.0018746481302408004, "loss": 0.2293, "step": 17154 }, { "epoch": 0.030419006027594484, "grad_norm": 0.8203125, "learning_rate": 0.001874617828887752, "loss": 0.2005, "step": 17156 }, { "epoch": 0.030422552192904302, "grad_norm": 0.671875, "learning_rate": 0.0018745875241469756, "loss": 0.1953, "step": 17158 }, { "epoch": 0.030426098358214117, "grad_norm": 0.34765625, "learning_rate": 0.001874557216018603, "loss": 0.2324, "step": 17160 }, { "epoch": 0.03042964452352393, "grad_norm": 0.30859375, "learning_rate": 0.0018745269045027672, "loss": 0.2548, "step": 17162 }, { "epoch": 0.030433190688833746, "grad_norm": 0.69921875, "learning_rate": 0.0018744965895996007, "loss": 0.1941, "step": 17164 }, { "epoch": 0.03043673685414356, "grad_norm": 0.408203125, "learning_rate": 0.001874466271309236, "loss": 0.2761, "step": 17166 }, { "epoch": 0.030440283019453375, "grad_norm": 0.6640625, "learning_rate": 0.0018744359496318055, "loss": 0.2193, "step": 17168 }, { "epoch": 0.030443829184763193, "grad_norm": 0.546875, "learning_rate": 0.0018744056245674422, "loss": 0.2135, "step": 17170 }, { "epoch": 0.030447375350073008, "grad_norm": 0.6328125, "learning_rate": 0.0018743752961162788, "loss": 0.2046, "step": 17172 }, { "epoch": 0.030450921515382822, "grad_norm": 0.69140625, "learning_rate": 0.001874344964278447, "loss": 0.3431, "step": 17174 }, { "epoch": 0.030454467680692637, "grad_norm": 0.486328125, "learning_rate": 0.0018743146290540805, "loss": 0.1866, "step": 17176 }, { "epoch": 0.03045801384600245, "grad_norm": 0.244140625, "learning_rate": 0.0018742842904433115, "loss": 0.1631, "step": 17178 }, { "epoch": 0.030461560011312266, "grad_norm": 0.53515625, "learning_rate": 0.001874253948446273, "loss": 0.2105, "step": 17180 }, { "epoch": 0.030465106176622084, "grad_norm": 0.59765625, "learning_rate": 0.0018742236030630972, "loss": 0.269, "step": 17182 }, { "epoch": 0.0304686523419319, "grad_norm": 0.390625, "learning_rate": 0.001874193254293917, "loss": 0.2082, "step": 17184 }, { "epoch": 0.030472198507241713, "grad_norm": 0.51171875, "learning_rate": 0.0018741629021388653, "loss": 0.1461, "step": 17186 }, { "epoch": 0.030475744672551527, "grad_norm": 0.77734375, "learning_rate": 0.0018741325465980747, "loss": 0.1725, "step": 17188 }, { "epoch": 0.030479290837861342, "grad_norm": 0.91015625, "learning_rate": 0.0018741021876716784, "loss": 0.2368, "step": 17190 }, { "epoch": 0.03048283700317116, "grad_norm": 0.671875, "learning_rate": 0.0018740718253598087, "loss": 0.2017, "step": 17192 }, { "epoch": 0.030486383168480975, "grad_norm": 0.39453125, "learning_rate": 0.0018740414596625978, "loss": 0.1784, "step": 17194 }, { "epoch": 0.03048992933379079, "grad_norm": 0.3515625, "learning_rate": 0.00187401109058018, "loss": 0.2186, "step": 17196 }, { "epoch": 0.030493475499100604, "grad_norm": 0.72265625, "learning_rate": 0.0018739807181126866, "loss": 0.1943, "step": 17198 }, { "epoch": 0.030497021664410418, "grad_norm": 0.5546875, "learning_rate": 0.0018739503422602517, "loss": 0.2272, "step": 17200 }, { "epoch": 0.030500567829720233, "grad_norm": 0.4140625, "learning_rate": 0.0018739199630230072, "loss": 0.1759, "step": 17202 }, { "epoch": 0.03050411399503005, "grad_norm": 0.419921875, "learning_rate": 0.0018738895804010865, "loss": 0.2049, "step": 17204 }, { "epoch": 0.030507660160339865, "grad_norm": 0.359375, "learning_rate": 0.0018738591943946224, "loss": 0.2916, "step": 17206 }, { "epoch": 0.03051120632564968, "grad_norm": 1.8515625, "learning_rate": 0.0018738288050037478, "loss": 0.2168, "step": 17208 }, { "epoch": 0.030514752490959494, "grad_norm": 0.87109375, "learning_rate": 0.001873798412228595, "loss": 0.1849, "step": 17210 }, { "epoch": 0.03051829865626931, "grad_norm": 1.859375, "learning_rate": 0.0018737680160692976, "loss": 0.2193, "step": 17212 }, { "epoch": 0.030521844821579124, "grad_norm": 0.4296875, "learning_rate": 0.0018737376165259886, "loss": 0.1575, "step": 17214 }, { "epoch": 0.03052539098688894, "grad_norm": 1.3671875, "learning_rate": 0.0018737072135988004, "loss": 0.3244, "step": 17216 }, { "epoch": 0.030528937152198756, "grad_norm": 0.61328125, "learning_rate": 0.0018736768072878662, "loss": 0.2045, "step": 17218 }, { "epoch": 0.03053248331750857, "grad_norm": 0.275390625, "learning_rate": 0.0018736463975933195, "loss": 0.2313, "step": 17220 }, { "epoch": 0.030536029482818385, "grad_norm": 1.2890625, "learning_rate": 0.0018736159845152928, "loss": 0.3612, "step": 17222 }, { "epoch": 0.0305395756481282, "grad_norm": 0.6171875, "learning_rate": 0.0018735855680539187, "loss": 0.2265, "step": 17224 }, { "epoch": 0.030543121813438018, "grad_norm": 0.408203125, "learning_rate": 0.001873555148209331, "loss": 0.3208, "step": 17226 }, { "epoch": 0.030546667978747832, "grad_norm": 0.345703125, "learning_rate": 0.0018735247249816623, "loss": 0.2595, "step": 17228 }, { "epoch": 0.030550214144057647, "grad_norm": 1.015625, "learning_rate": 0.0018734942983710458, "loss": 0.2208, "step": 17230 }, { "epoch": 0.03055376030936746, "grad_norm": 0.265625, "learning_rate": 0.0018734638683776145, "loss": 0.297, "step": 17232 }, { "epoch": 0.030557306474677276, "grad_norm": 0.486328125, "learning_rate": 0.001873433435001502, "loss": 0.2394, "step": 17234 }, { "epoch": 0.03056085263998709, "grad_norm": 0.73828125, "learning_rate": 0.0018734029982428403, "loss": 0.1875, "step": 17236 }, { "epoch": 0.03056439880529691, "grad_norm": 0.376953125, "learning_rate": 0.001873372558101763, "loss": 0.1928, "step": 17238 }, { "epoch": 0.030567944970606723, "grad_norm": 0.81640625, "learning_rate": 0.001873342114578404, "loss": 0.2573, "step": 17240 }, { "epoch": 0.030571491135916538, "grad_norm": 1.0625, "learning_rate": 0.0018733116676728954, "loss": 0.2527, "step": 17242 }, { "epoch": 0.030575037301226352, "grad_norm": 0.32421875, "learning_rate": 0.0018732812173853709, "loss": 0.1357, "step": 17244 }, { "epoch": 0.030578583466536167, "grad_norm": 0.40625, "learning_rate": 0.0018732507637159634, "loss": 0.2199, "step": 17246 }, { "epoch": 0.03058212963184598, "grad_norm": 0.37109375, "learning_rate": 0.0018732203066648063, "loss": 0.2373, "step": 17248 }, { "epoch": 0.0305856757971558, "grad_norm": 1.03125, "learning_rate": 0.0018731898462320327, "loss": 0.2899, "step": 17250 }, { "epoch": 0.030589221962465614, "grad_norm": 0.90234375, "learning_rate": 0.001873159382417776, "loss": 0.2681, "step": 17252 }, { "epoch": 0.03059276812777543, "grad_norm": 1.2734375, "learning_rate": 0.001873128915222169, "loss": 0.2425, "step": 17254 }, { "epoch": 0.030596314293085243, "grad_norm": 0.5546875, "learning_rate": 0.0018730984446453454, "loss": 0.1893, "step": 17256 }, { "epoch": 0.030599860458395058, "grad_norm": 0.43359375, "learning_rate": 0.0018730679706874384, "loss": 0.1932, "step": 17258 }, { "epoch": 0.030603406623704876, "grad_norm": 0.8828125, "learning_rate": 0.0018730374933485807, "loss": 0.2455, "step": 17260 }, { "epoch": 0.03060695278901469, "grad_norm": 0.77734375, "learning_rate": 0.001873007012628906, "loss": 0.2182, "step": 17262 }, { "epoch": 0.030610498954324505, "grad_norm": 4.59375, "learning_rate": 0.0018729765285285481, "loss": 0.4682, "step": 17264 }, { "epoch": 0.03061404511963432, "grad_norm": 1.6640625, "learning_rate": 0.00187294604104764, "loss": 0.212, "step": 17266 }, { "epoch": 0.030617591284944134, "grad_norm": 0.65625, "learning_rate": 0.0018729155501863142, "loss": 0.2242, "step": 17268 }, { "epoch": 0.03062113745025395, "grad_norm": 2.578125, "learning_rate": 0.0018728850559447052, "loss": 0.2564, "step": 17270 }, { "epoch": 0.030624683615563766, "grad_norm": 0.5859375, "learning_rate": 0.001872854558322946, "loss": 0.2246, "step": 17272 }, { "epoch": 0.03062822978087358, "grad_norm": 1.4453125, "learning_rate": 0.0018728240573211697, "loss": 0.2896, "step": 17274 }, { "epoch": 0.030631775946183395, "grad_norm": 1.84375, "learning_rate": 0.0018727935529395097, "loss": 0.5221, "step": 17276 }, { "epoch": 0.03063532211149321, "grad_norm": 0.47265625, "learning_rate": 0.0018727630451781, "loss": 0.2076, "step": 17278 }, { "epoch": 0.030638868276803025, "grad_norm": 0.7578125, "learning_rate": 0.0018727325340370732, "loss": 0.2596, "step": 17280 }, { "epoch": 0.03064241444211284, "grad_norm": 0.77734375, "learning_rate": 0.0018727020195165637, "loss": 0.1966, "step": 17282 }, { "epoch": 0.030645960607422657, "grad_norm": 0.71484375, "learning_rate": 0.0018726715016167037, "loss": 0.2053, "step": 17284 }, { "epoch": 0.03064950677273247, "grad_norm": 1.4921875, "learning_rate": 0.001872640980337628, "loss": 0.239, "step": 17286 }, { "epoch": 0.030653052938042286, "grad_norm": 0.71484375, "learning_rate": 0.0018726104556794692, "loss": 0.2036, "step": 17288 }, { "epoch": 0.0306565991033521, "grad_norm": 0.470703125, "learning_rate": 0.0018725799276423612, "loss": 0.2449, "step": 17290 }, { "epoch": 0.030660145268661915, "grad_norm": 0.26171875, "learning_rate": 0.0018725493962264371, "loss": 0.2187, "step": 17292 }, { "epoch": 0.030663691433971733, "grad_norm": 0.4375, "learning_rate": 0.001872518861431831, "loss": 0.295, "step": 17294 }, { "epoch": 0.030667237599281548, "grad_norm": 0.373046875, "learning_rate": 0.0018724883232586757, "loss": 0.1794, "step": 17296 }, { "epoch": 0.030670783764591363, "grad_norm": 1.3671875, "learning_rate": 0.0018724577817071054, "loss": 0.3304, "step": 17298 }, { "epoch": 0.030674329929901177, "grad_norm": 1.3046875, "learning_rate": 0.0018724272367772537, "loss": 0.3189, "step": 17300 }, { "epoch": 0.03067787609521099, "grad_norm": 1.0703125, "learning_rate": 0.0018723966884692535, "loss": 0.2513, "step": 17302 }, { "epoch": 0.030681422260520806, "grad_norm": 0.4609375, "learning_rate": 0.0018723661367832391, "loss": 0.2062, "step": 17304 }, { "epoch": 0.030684968425830624, "grad_norm": 0.93359375, "learning_rate": 0.0018723355817193438, "loss": 0.2779, "step": 17306 }, { "epoch": 0.03068851459114044, "grad_norm": 0.8984375, "learning_rate": 0.0018723050232777015, "loss": 0.2425, "step": 17308 }, { "epoch": 0.030692060756450253, "grad_norm": 0.8125, "learning_rate": 0.0018722744614584454, "loss": 0.2628, "step": 17310 }, { "epoch": 0.030695606921760068, "grad_norm": 1.171875, "learning_rate": 0.0018722438962617096, "loss": 0.193, "step": 17312 }, { "epoch": 0.030699153087069882, "grad_norm": 0.431640625, "learning_rate": 0.0018722133276876272, "loss": 0.2005, "step": 17314 }, { "epoch": 0.030702699252379697, "grad_norm": 0.36328125, "learning_rate": 0.0018721827557363327, "loss": 0.1877, "step": 17316 }, { "epoch": 0.030706245417689515, "grad_norm": 0.38671875, "learning_rate": 0.0018721521804079592, "loss": 0.3564, "step": 17318 }, { "epoch": 0.03070979158299933, "grad_norm": 3.09375, "learning_rate": 0.0018721216017026406, "loss": 0.3906, "step": 17320 }, { "epoch": 0.030713337748309144, "grad_norm": 0.6953125, "learning_rate": 0.0018720910196205103, "loss": 0.1994, "step": 17322 }, { "epoch": 0.03071688391361896, "grad_norm": 0.408203125, "learning_rate": 0.0018720604341617027, "loss": 0.1929, "step": 17324 }, { "epoch": 0.030720430078928773, "grad_norm": 1.609375, "learning_rate": 0.0018720298453263511, "loss": 0.2712, "step": 17326 }, { "epoch": 0.03072397624423859, "grad_norm": 0.80078125, "learning_rate": 0.0018719992531145897, "loss": 0.1947, "step": 17328 }, { "epoch": 0.030727522409548406, "grad_norm": 0.443359375, "learning_rate": 0.0018719686575265515, "loss": 0.207, "step": 17330 }, { "epoch": 0.03073106857485822, "grad_norm": 0.25390625, "learning_rate": 0.0018719380585623712, "loss": 0.2166, "step": 17332 }, { "epoch": 0.030734614740168035, "grad_norm": 2.0625, "learning_rate": 0.0018719074562221821, "loss": 0.2616, "step": 17334 }, { "epoch": 0.03073816090547785, "grad_norm": 0.314453125, "learning_rate": 0.0018718768505061182, "loss": 0.18, "step": 17336 }, { "epoch": 0.030741707070787664, "grad_norm": 0.8203125, "learning_rate": 0.0018718462414143132, "loss": 0.222, "step": 17338 }, { "epoch": 0.030745253236097482, "grad_norm": 0.384765625, "learning_rate": 0.001871815628946901, "loss": 0.2071, "step": 17340 }, { "epoch": 0.030748799401407297, "grad_norm": 0.7109375, "learning_rate": 0.0018717850131040159, "loss": 0.1489, "step": 17342 }, { "epoch": 0.03075234556671711, "grad_norm": 0.99609375, "learning_rate": 0.0018717543938857912, "loss": 0.3018, "step": 17344 }, { "epoch": 0.030755891732026926, "grad_norm": 0.43359375, "learning_rate": 0.0018717237712923612, "loss": 0.2209, "step": 17346 }, { "epoch": 0.03075943789733674, "grad_norm": 1.34375, "learning_rate": 0.0018716931453238597, "loss": 0.5243, "step": 17348 }, { "epoch": 0.030762984062646555, "grad_norm": 1.5078125, "learning_rate": 0.0018716625159804206, "loss": 0.2587, "step": 17350 }, { "epoch": 0.030766530227956373, "grad_norm": 0.353515625, "learning_rate": 0.0018716318832621777, "loss": 0.1936, "step": 17352 }, { "epoch": 0.030770076393266187, "grad_norm": 0.63671875, "learning_rate": 0.0018716012471692652, "loss": 0.2319, "step": 17354 }, { "epoch": 0.030773622558576002, "grad_norm": 0.61328125, "learning_rate": 0.0018715706077018174, "loss": 0.2333, "step": 17356 }, { "epoch": 0.030777168723885816, "grad_norm": 1.03125, "learning_rate": 0.0018715399648599677, "loss": 0.2349, "step": 17358 }, { "epoch": 0.03078071488919563, "grad_norm": 15.9375, "learning_rate": 0.0018715093186438502, "loss": 0.1792, "step": 17360 }, { "epoch": 0.03078426105450545, "grad_norm": 0.1845703125, "learning_rate": 0.0018714786690535992, "loss": 0.181, "step": 17362 }, { "epoch": 0.030787807219815264, "grad_norm": 3.40625, "learning_rate": 0.0018714480160893488, "loss": 0.3829, "step": 17364 }, { "epoch": 0.030791353385125078, "grad_norm": 0.484375, "learning_rate": 0.0018714173597512327, "loss": 0.3335, "step": 17366 }, { "epoch": 0.030794899550434893, "grad_norm": 0.60546875, "learning_rate": 0.0018713867000393852, "loss": 0.2416, "step": 17368 }, { "epoch": 0.030798445715744707, "grad_norm": 0.7109375, "learning_rate": 0.0018713560369539404, "loss": 0.2032, "step": 17370 }, { "epoch": 0.030801991881054522, "grad_norm": 2.796875, "learning_rate": 0.001871325370495032, "loss": 0.3384, "step": 17372 }, { "epoch": 0.03080553804636434, "grad_norm": 0.91796875, "learning_rate": 0.0018712947006627953, "loss": 0.3237, "step": 17374 }, { "epoch": 0.030809084211674154, "grad_norm": 0.67578125, "learning_rate": 0.0018712640274573629, "loss": 0.1685, "step": 17376 }, { "epoch": 0.03081263037698397, "grad_norm": 0.5234375, "learning_rate": 0.00187123335087887, "loss": 0.2009, "step": 17378 }, { "epoch": 0.030816176542293783, "grad_norm": 0.310546875, "learning_rate": 0.00187120267092745, "loss": 0.1704, "step": 17380 }, { "epoch": 0.030819722707603598, "grad_norm": 0.49609375, "learning_rate": 0.0018711719876032377, "loss": 0.1788, "step": 17382 }, { "epoch": 0.030823268872913413, "grad_norm": 0.4375, "learning_rate": 0.0018711413009063673, "loss": 0.1816, "step": 17384 }, { "epoch": 0.03082681503822323, "grad_norm": 0.462890625, "learning_rate": 0.0018711106108369728, "loss": 0.2069, "step": 17386 }, { "epoch": 0.030830361203533045, "grad_norm": 0.65625, "learning_rate": 0.001871079917395188, "loss": 0.1887, "step": 17388 }, { "epoch": 0.03083390736884286, "grad_norm": 0.5546875, "learning_rate": 0.0018710492205811478, "loss": 0.3078, "step": 17390 }, { "epoch": 0.030837453534152674, "grad_norm": 0.953125, "learning_rate": 0.0018710185203949861, "loss": 0.2971, "step": 17392 }, { "epoch": 0.03084099969946249, "grad_norm": 0.54296875, "learning_rate": 0.0018709878168368373, "loss": 0.2023, "step": 17394 }, { "epoch": 0.030844545864772307, "grad_norm": 0.31640625, "learning_rate": 0.0018709571099068357, "loss": 0.2332, "step": 17396 }, { "epoch": 0.03084809203008212, "grad_norm": 0.3046875, "learning_rate": 0.0018709263996051152, "loss": 0.1837, "step": 17398 }, { "epoch": 0.030851638195391936, "grad_norm": 0.462890625, "learning_rate": 0.001870895685931811, "loss": 0.2283, "step": 17400 }, { "epoch": 0.03085518436070175, "grad_norm": 0.87890625, "learning_rate": 0.0018708649688870562, "loss": 0.2538, "step": 17402 }, { "epoch": 0.030858730526011565, "grad_norm": 0.6015625, "learning_rate": 0.0018708342484709862, "loss": 0.2414, "step": 17404 }, { "epoch": 0.03086227669132138, "grad_norm": 0.248046875, "learning_rate": 0.001870803524683735, "loss": 0.302, "step": 17406 }, { "epoch": 0.030865822856631198, "grad_norm": 0.478515625, "learning_rate": 0.001870772797525437, "loss": 0.2277, "step": 17408 }, { "epoch": 0.030869369021941012, "grad_norm": 0.54296875, "learning_rate": 0.001870742066996226, "loss": 0.262, "step": 17410 }, { "epoch": 0.030872915187250827, "grad_norm": 0.396484375, "learning_rate": 0.0018707113330962374, "loss": 0.2552, "step": 17412 }, { "epoch": 0.03087646135256064, "grad_norm": 0.84375, "learning_rate": 0.0018706805958256048, "loss": 0.2257, "step": 17414 }, { "epoch": 0.030880007517870456, "grad_norm": 1.34375, "learning_rate": 0.001870649855184463, "loss": 0.3411, "step": 17416 }, { "epoch": 0.03088355368318027, "grad_norm": 0.369140625, "learning_rate": 0.0018706191111729465, "loss": 0.2146, "step": 17418 }, { "epoch": 0.03088709984849009, "grad_norm": 0.322265625, "learning_rate": 0.0018705883637911895, "loss": 0.184, "step": 17420 }, { "epoch": 0.030890646013799903, "grad_norm": 3.03125, "learning_rate": 0.0018705576130393267, "loss": 0.3681, "step": 17422 }, { "epoch": 0.030894192179109718, "grad_norm": 1.1015625, "learning_rate": 0.0018705268589174924, "loss": 0.249, "step": 17424 }, { "epoch": 0.030897738344419532, "grad_norm": 0.1875, "learning_rate": 0.0018704961014258212, "loss": 0.2316, "step": 17426 }, { "epoch": 0.030901284509729347, "grad_norm": 0.79296875, "learning_rate": 0.001870465340564448, "loss": 0.2682, "step": 17428 }, { "epoch": 0.030904830675039165, "grad_norm": 0.609375, "learning_rate": 0.0018704345763335061, "loss": 0.2361, "step": 17430 }, { "epoch": 0.03090837684034898, "grad_norm": 0.65625, "learning_rate": 0.0018704038087331315, "loss": 0.2588, "step": 17432 }, { "epoch": 0.030911923005658794, "grad_norm": 0.734375, "learning_rate": 0.001870373037763458, "loss": 0.2163, "step": 17434 }, { "epoch": 0.03091546917096861, "grad_norm": 0.265625, "learning_rate": 0.0018703422634246205, "loss": 0.2092, "step": 17436 }, { "epoch": 0.030919015336278423, "grad_norm": 0.50390625, "learning_rate": 0.0018703114857167534, "loss": 0.2253, "step": 17438 }, { "epoch": 0.030922561501588237, "grad_norm": 0.640625, "learning_rate": 0.001870280704639991, "loss": 0.3098, "step": 17440 }, { "epoch": 0.030926107666898055, "grad_norm": 0.2294921875, "learning_rate": 0.0018702499201944684, "loss": 0.1965, "step": 17442 }, { "epoch": 0.03092965383220787, "grad_norm": 0.1904296875, "learning_rate": 0.0018702191323803203, "loss": 0.2075, "step": 17444 }, { "epoch": 0.030933199997517685, "grad_norm": 0.451171875, "learning_rate": 0.0018701883411976806, "loss": 0.2383, "step": 17446 }, { "epoch": 0.0309367461628275, "grad_norm": 0.43359375, "learning_rate": 0.001870157546646685, "loss": 0.2165, "step": 17448 }, { "epoch": 0.030940292328137314, "grad_norm": 0.609375, "learning_rate": 0.0018701267487274673, "loss": 0.3288, "step": 17450 }, { "epoch": 0.030943838493447128, "grad_norm": 1.828125, "learning_rate": 0.0018700959474401626, "loss": 0.3086, "step": 17452 }, { "epoch": 0.030947384658756946, "grad_norm": 0.494140625, "learning_rate": 0.0018700651427849056, "loss": 0.2196, "step": 17454 }, { "epoch": 0.03095093082406676, "grad_norm": 5.3125, "learning_rate": 0.001870034334761831, "loss": 0.3595, "step": 17456 }, { "epoch": 0.030954476989376575, "grad_norm": 0.58984375, "learning_rate": 0.0018700035233710735, "loss": 0.2255, "step": 17458 }, { "epoch": 0.03095802315468639, "grad_norm": 0.6015625, "learning_rate": 0.001869972708612768, "loss": 0.1879, "step": 17460 }, { "epoch": 0.030961569319996204, "grad_norm": 1.59375, "learning_rate": 0.001869941890487049, "loss": 0.2213, "step": 17462 }, { "epoch": 0.030965115485306022, "grad_norm": 0.88671875, "learning_rate": 0.0018699110689940513, "loss": 0.177, "step": 17464 }, { "epoch": 0.030968661650615837, "grad_norm": 0.2412109375, "learning_rate": 0.0018698802441339097, "loss": 0.1349, "step": 17466 }, { "epoch": 0.03097220781592565, "grad_norm": 0.322265625, "learning_rate": 0.0018698494159067594, "loss": 0.2248, "step": 17468 }, { "epoch": 0.030975753981235466, "grad_norm": 0.55078125, "learning_rate": 0.0018698185843127348, "loss": 0.1855, "step": 17470 }, { "epoch": 0.03097930014654528, "grad_norm": 0.6015625, "learning_rate": 0.001869787749351971, "loss": 0.1833, "step": 17472 }, { "epoch": 0.030982846311855095, "grad_norm": 0.56640625, "learning_rate": 0.0018697569110246027, "loss": 0.222, "step": 17474 }, { "epoch": 0.030986392477164913, "grad_norm": 0.63671875, "learning_rate": 0.0018697260693307648, "loss": 0.2189, "step": 17476 }, { "epoch": 0.030989938642474728, "grad_norm": 1.4921875, "learning_rate": 0.0018696952242705921, "loss": 0.2365, "step": 17478 }, { "epoch": 0.030993484807784542, "grad_norm": 0.515625, "learning_rate": 0.0018696643758442193, "loss": 0.1738, "step": 17480 }, { "epoch": 0.030997030973094357, "grad_norm": 0.5703125, "learning_rate": 0.001869633524051782, "loss": 0.2623, "step": 17482 }, { "epoch": 0.03100057713840417, "grad_norm": 0.734375, "learning_rate": 0.0018696026688934146, "loss": 0.2507, "step": 17484 }, { "epoch": 0.031004123303713986, "grad_norm": 2.09375, "learning_rate": 0.0018695718103692524, "loss": 0.2423, "step": 17486 }, { "epoch": 0.031007669469023804, "grad_norm": 0.412109375, "learning_rate": 0.0018695409484794296, "loss": 0.2604, "step": 17488 }, { "epoch": 0.03101121563433362, "grad_norm": 0.77734375, "learning_rate": 0.0018695100832240821, "loss": 0.3385, "step": 17490 }, { "epoch": 0.031014761799643433, "grad_norm": 0.455078125, "learning_rate": 0.0018694792146033443, "loss": 0.2156, "step": 17492 }, { "epoch": 0.031018307964953248, "grad_norm": 0.796875, "learning_rate": 0.0018694483426173517, "loss": 0.1883, "step": 17494 }, { "epoch": 0.031021854130263062, "grad_norm": 2.03125, "learning_rate": 0.0018694174672662387, "loss": 0.1886, "step": 17496 }, { "epoch": 0.03102540029557288, "grad_norm": 0.412109375, "learning_rate": 0.0018693865885501407, "loss": 0.2398, "step": 17498 }, { "epoch": 0.031028946460882695, "grad_norm": 0.408203125, "learning_rate": 0.0018693557064691927, "loss": 0.2397, "step": 17500 }, { "epoch": 0.03103249262619251, "grad_norm": 0.4921875, "learning_rate": 0.0018693248210235297, "loss": 0.1971, "step": 17502 }, { "epoch": 0.031036038791502324, "grad_norm": 0.466796875, "learning_rate": 0.0018692939322132868, "loss": 0.2357, "step": 17504 }, { "epoch": 0.03103958495681214, "grad_norm": 1.1953125, "learning_rate": 0.0018692630400385989, "loss": 0.2297, "step": 17506 }, { "epoch": 0.031043131122121953, "grad_norm": 0.77734375, "learning_rate": 0.0018692321444996017, "loss": 0.2307, "step": 17508 }, { "epoch": 0.03104667728743177, "grad_norm": 0.73046875, "learning_rate": 0.0018692012455964295, "loss": 0.1995, "step": 17510 }, { "epoch": 0.031050223452741586, "grad_norm": 0.86328125, "learning_rate": 0.0018691703433292182, "loss": 0.241, "step": 17512 }, { "epoch": 0.0310537696180514, "grad_norm": 0.98828125, "learning_rate": 0.0018691394376981025, "loss": 0.2082, "step": 17514 }, { "epoch": 0.031057315783361215, "grad_norm": 0.53125, "learning_rate": 0.001869108528703218, "loss": 0.218, "step": 17516 }, { "epoch": 0.03106086194867103, "grad_norm": 0.2431640625, "learning_rate": 0.001869077616344699, "loss": 0.1956, "step": 17518 }, { "epoch": 0.031064408113980844, "grad_norm": 1.09375, "learning_rate": 0.0018690467006226813, "loss": 0.4033, "step": 17520 }, { "epoch": 0.031067954279290662, "grad_norm": 0.65234375, "learning_rate": 0.0018690157815373, "loss": 0.2213, "step": 17522 }, { "epoch": 0.031071500444600476, "grad_norm": 1.296875, "learning_rate": 0.0018689848590886908, "loss": 0.1714, "step": 17524 }, { "epoch": 0.03107504660991029, "grad_norm": 0.53125, "learning_rate": 0.001868953933276988, "loss": 0.2766, "step": 17526 }, { "epoch": 0.031078592775220105, "grad_norm": 0.5234375, "learning_rate": 0.001868923004102328, "loss": 0.2182, "step": 17528 }, { "epoch": 0.03108213894052992, "grad_norm": 0.33984375, "learning_rate": 0.001868892071564845, "loss": 0.2309, "step": 17530 }, { "epoch": 0.031085685105839738, "grad_norm": 0.62109375, "learning_rate": 0.0018688611356646745, "loss": 0.198, "step": 17532 }, { "epoch": 0.031089231271149553, "grad_norm": 1.78125, "learning_rate": 0.0018688301964019524, "loss": 0.3087, "step": 17534 }, { "epoch": 0.031092777436459367, "grad_norm": 0.412109375, "learning_rate": 0.0018687992537768136, "loss": 0.2131, "step": 17536 }, { "epoch": 0.03109632360176918, "grad_norm": 3.71875, "learning_rate": 0.0018687683077893932, "loss": 0.5201, "step": 17538 }, { "epoch": 0.031099869767078996, "grad_norm": 0.66015625, "learning_rate": 0.0018687373584398267, "loss": 0.2124, "step": 17540 }, { "epoch": 0.03110341593238881, "grad_norm": 0.3671875, "learning_rate": 0.00186870640572825, "loss": 0.2241, "step": 17542 }, { "epoch": 0.03110696209769863, "grad_norm": 1.2109375, "learning_rate": 0.0018686754496547975, "loss": 0.2391, "step": 17544 }, { "epoch": 0.031110508263008443, "grad_norm": 0.62890625, "learning_rate": 0.0018686444902196054, "loss": 0.2052, "step": 17546 }, { "epoch": 0.031114054428318258, "grad_norm": 0.5, "learning_rate": 0.0018686135274228086, "loss": 0.2078, "step": 17548 }, { "epoch": 0.031117600593628073, "grad_norm": 0.30078125, "learning_rate": 0.0018685825612645428, "loss": 0.2223, "step": 17550 }, { "epoch": 0.031121146758937887, "grad_norm": 1.3046875, "learning_rate": 0.0018685515917449432, "loss": 0.2463, "step": 17552 }, { "epoch": 0.0311246929242477, "grad_norm": 5.96875, "learning_rate": 0.0018685206188641455, "loss": 0.3168, "step": 17554 }, { "epoch": 0.03112823908955752, "grad_norm": 1.8359375, "learning_rate": 0.0018684896426222853, "loss": 0.4675, "step": 17556 }, { "epoch": 0.031131785254867334, "grad_norm": 0.3359375, "learning_rate": 0.0018684586630194973, "loss": 0.2195, "step": 17558 }, { "epoch": 0.03113533142017715, "grad_norm": 2.53125, "learning_rate": 0.0018684276800559174, "loss": 0.3469, "step": 17560 }, { "epoch": 0.031138877585486963, "grad_norm": 0.2236328125, "learning_rate": 0.0018683966937316817, "loss": 0.2229, "step": 17562 }, { "epoch": 0.031142423750796778, "grad_norm": 0.69140625, "learning_rate": 0.0018683657040469248, "loss": 0.1791, "step": 17564 }, { "epoch": 0.031145969916106596, "grad_norm": 0.59765625, "learning_rate": 0.0018683347110017828, "loss": 0.2017, "step": 17566 }, { "epoch": 0.03114951608141641, "grad_norm": 0.36328125, "learning_rate": 0.001868303714596391, "loss": 0.222, "step": 17568 }, { "epoch": 0.031153062246726225, "grad_norm": 1.2109375, "learning_rate": 0.0018682727148308853, "loss": 0.2837, "step": 17570 }, { "epoch": 0.03115660841203604, "grad_norm": 0.7421875, "learning_rate": 0.0018682417117054006, "loss": 0.3787, "step": 17572 }, { "epoch": 0.031160154577345854, "grad_norm": 0.5390625, "learning_rate": 0.0018682107052200732, "loss": 0.2609, "step": 17574 }, { "epoch": 0.03116370074265567, "grad_norm": 0.8046875, "learning_rate": 0.0018681796953750383, "loss": 0.2596, "step": 17576 }, { "epoch": 0.031167246907965487, "grad_norm": 0.7421875, "learning_rate": 0.0018681486821704317, "loss": 0.2354, "step": 17578 }, { "epoch": 0.0311707930732753, "grad_norm": 0.31640625, "learning_rate": 0.0018681176656063887, "loss": 0.173, "step": 17580 }, { "epoch": 0.031174339238585116, "grad_norm": 1.59375, "learning_rate": 0.0018680866456830454, "loss": 0.3466, "step": 17582 }, { "epoch": 0.03117788540389493, "grad_norm": 0.76953125, "learning_rate": 0.0018680556224005371, "loss": 0.2364, "step": 17584 }, { "epoch": 0.031181431569204745, "grad_norm": 0.408203125, "learning_rate": 0.001868024595759, "loss": 0.2513, "step": 17586 }, { "epoch": 0.03118497773451456, "grad_norm": 0.9921875, "learning_rate": 0.0018679935657585694, "loss": 0.2596, "step": 17588 }, { "epoch": 0.031188523899824377, "grad_norm": 3.140625, "learning_rate": 0.001867962532399381, "loss": 0.3123, "step": 17590 }, { "epoch": 0.031192070065134192, "grad_norm": 0.5703125, "learning_rate": 0.0018679314956815702, "loss": 0.2948, "step": 17592 }, { "epoch": 0.031195616230444007, "grad_norm": 0.416015625, "learning_rate": 0.0018679004556052734, "loss": 0.275, "step": 17594 }, { "epoch": 0.03119916239575382, "grad_norm": 0.34765625, "learning_rate": 0.001867869412170626, "loss": 0.1929, "step": 17596 }, { "epoch": 0.031202708561063636, "grad_norm": 1.1953125, "learning_rate": 0.001867838365377764, "loss": 0.2254, "step": 17598 }, { "epoch": 0.031206254726373454, "grad_norm": 0.5390625, "learning_rate": 0.0018678073152268227, "loss": 0.2186, "step": 17600 }, { "epoch": 0.031209800891683268, "grad_norm": 0.65625, "learning_rate": 0.0018677762617179384, "loss": 0.2129, "step": 17602 }, { "epoch": 0.031213347056993083, "grad_norm": 0.337890625, "learning_rate": 0.0018677452048512465, "loss": 0.2509, "step": 17604 }, { "epoch": 0.031216893222302897, "grad_norm": 0.390625, "learning_rate": 0.0018677141446268833, "loss": 0.246, "step": 17606 }, { "epoch": 0.031220439387612712, "grad_norm": 0.466796875, "learning_rate": 0.0018676830810449843, "loss": 0.195, "step": 17608 }, { "epoch": 0.031223985552922526, "grad_norm": 0.64453125, "learning_rate": 0.0018676520141056854, "loss": 0.1648, "step": 17610 }, { "epoch": 0.031227531718232344, "grad_norm": 0.609375, "learning_rate": 0.0018676209438091226, "loss": 0.2633, "step": 17612 }, { "epoch": 0.03123107788354216, "grad_norm": 1.515625, "learning_rate": 0.0018675898701554314, "loss": 0.2349, "step": 17614 }, { "epoch": 0.031234624048851974, "grad_norm": 0.5859375, "learning_rate": 0.0018675587931447479, "loss": 0.1571, "step": 17616 }, { "epoch": 0.031238170214161788, "grad_norm": 4.125, "learning_rate": 0.0018675277127772085, "loss": 0.2977, "step": 17618 }, { "epoch": 0.031241716379471603, "grad_norm": 1.25, "learning_rate": 0.0018674966290529488, "loss": 0.2097, "step": 17620 }, { "epoch": 0.031245262544781417, "grad_norm": 0.73828125, "learning_rate": 0.0018674655419721044, "loss": 0.2524, "step": 17622 }, { "epoch": 0.031248808710091235, "grad_norm": 0.625, "learning_rate": 0.0018674344515348111, "loss": 0.1837, "step": 17624 }, { "epoch": 0.031252354875401046, "grad_norm": 0.44921875, "learning_rate": 0.001867403357741206, "loss": 0.1841, "step": 17626 }, { "epoch": 0.03125590104071086, "grad_norm": 2.171875, "learning_rate": 0.001867372260591424, "loss": 0.1978, "step": 17628 }, { "epoch": 0.03125944720602068, "grad_norm": 2.421875, "learning_rate": 0.0018673411600856017, "loss": 0.2488, "step": 17630 }, { "epoch": 0.0312629933713305, "grad_norm": 1.5546875, "learning_rate": 0.0018673100562238746, "loss": 0.3018, "step": 17632 }, { "epoch": 0.03126653953664031, "grad_norm": 0.84765625, "learning_rate": 0.0018672789490063793, "loss": 0.3209, "step": 17634 }, { "epoch": 0.031270085701950126, "grad_norm": 0.52734375, "learning_rate": 0.0018672478384332513, "loss": 0.2762, "step": 17636 }, { "epoch": 0.03127363186725994, "grad_norm": 0.490234375, "learning_rate": 0.0018672167245046272, "loss": 0.2803, "step": 17638 }, { "epoch": 0.031277178032569755, "grad_norm": 0.388671875, "learning_rate": 0.0018671856072206426, "loss": 0.179, "step": 17640 }, { "epoch": 0.03128072419787957, "grad_norm": 0.48828125, "learning_rate": 0.0018671544865814338, "loss": 0.2225, "step": 17642 }, { "epoch": 0.031284270363189384, "grad_norm": 2.796875, "learning_rate": 0.0018671233625871368, "loss": 0.2085, "step": 17644 }, { "epoch": 0.0312878165284992, "grad_norm": 5.25, "learning_rate": 0.0018670922352378883, "loss": 0.5095, "step": 17646 }, { "epoch": 0.03129136269380901, "grad_norm": 0.48828125, "learning_rate": 0.0018670611045338236, "loss": 0.2, "step": 17648 }, { "epoch": 0.03129490885911883, "grad_norm": 0.3125, "learning_rate": 0.0018670299704750789, "loss": 0.322, "step": 17650 }, { "epoch": 0.03129845502442865, "grad_norm": 0.5390625, "learning_rate": 0.0018669988330617911, "loss": 0.2108, "step": 17652 }, { "epoch": 0.031302001189738464, "grad_norm": 0.81640625, "learning_rate": 0.001866967692294096, "loss": 0.2359, "step": 17654 }, { "epoch": 0.03130554735504828, "grad_norm": 2.109375, "learning_rate": 0.0018669365481721292, "loss": 0.2707, "step": 17656 }, { "epoch": 0.03130909352035809, "grad_norm": 0.244140625, "learning_rate": 0.0018669054006960279, "loss": 0.1762, "step": 17658 }, { "epoch": 0.03131263968566791, "grad_norm": 0.71484375, "learning_rate": 0.0018668742498659276, "loss": 0.2128, "step": 17660 }, { "epoch": 0.03131618585097772, "grad_norm": 0.75390625, "learning_rate": 0.0018668430956819647, "loss": 0.2031, "step": 17662 }, { "epoch": 0.03131973201628754, "grad_norm": 0.65234375, "learning_rate": 0.001866811938144276, "loss": 0.2487, "step": 17664 }, { "epoch": 0.03132327818159735, "grad_norm": 1.46875, "learning_rate": 0.001866780777252997, "loss": 0.2409, "step": 17666 }, { "epoch": 0.031326824346907166, "grad_norm": 0.423828125, "learning_rate": 0.0018667496130082642, "loss": 0.2776, "step": 17668 }, { "epoch": 0.03133037051221698, "grad_norm": 0.34375, "learning_rate": 0.001866718445410214, "loss": 0.1541, "step": 17670 }, { "epoch": 0.031333916677526795, "grad_norm": 0.8515625, "learning_rate": 0.0018666872744589826, "loss": 0.2181, "step": 17672 }, { "epoch": 0.031337462842836616, "grad_norm": 0.51171875, "learning_rate": 0.0018666561001547068, "loss": 0.3044, "step": 17674 }, { "epoch": 0.03134100900814643, "grad_norm": 0.439453125, "learning_rate": 0.001866624922497522, "loss": 0.1773, "step": 17676 }, { "epoch": 0.031344555173456246, "grad_norm": 0.50390625, "learning_rate": 0.0018665937414875655, "loss": 0.2183, "step": 17678 }, { "epoch": 0.03134810133876606, "grad_norm": 0.48046875, "learning_rate": 0.001866562557124973, "loss": 0.2307, "step": 17680 }, { "epoch": 0.031351647504075875, "grad_norm": 0.9296875, "learning_rate": 0.0018665313694098814, "loss": 0.2377, "step": 17682 }, { "epoch": 0.03135519366938569, "grad_norm": 0.9296875, "learning_rate": 0.0018665001783424267, "loss": 0.2033, "step": 17684 }, { "epoch": 0.031358739834695504, "grad_norm": 1.0234375, "learning_rate": 0.0018664689839227454, "loss": 0.1996, "step": 17686 }, { "epoch": 0.03136228600000532, "grad_norm": 0.3671875, "learning_rate": 0.0018664377861509743, "loss": 0.1965, "step": 17688 }, { "epoch": 0.03136583216531513, "grad_norm": 0.94921875, "learning_rate": 0.0018664065850272492, "loss": 0.1761, "step": 17690 }, { "epoch": 0.03136937833062495, "grad_norm": 0.42578125, "learning_rate": 0.0018663753805517069, "loss": 0.5204, "step": 17692 }, { "epoch": 0.03137292449593476, "grad_norm": 1.5, "learning_rate": 0.001866344172724484, "loss": 0.397, "step": 17694 }, { "epoch": 0.031376470661244577, "grad_norm": 0.36328125, "learning_rate": 0.0018663129615457167, "loss": 0.1731, "step": 17696 }, { "epoch": 0.0313800168265544, "grad_norm": 0.6640625, "learning_rate": 0.0018662817470155415, "loss": 0.3383, "step": 17698 }, { "epoch": 0.03138356299186421, "grad_norm": 1.0, "learning_rate": 0.0018662505291340951, "loss": 0.3073, "step": 17700 }, { "epoch": 0.03138710915717403, "grad_norm": 0.40234375, "learning_rate": 0.0018662193079015143, "loss": 0.2507, "step": 17702 }, { "epoch": 0.03139065532248384, "grad_norm": 0.54296875, "learning_rate": 0.0018661880833179352, "loss": 0.1776, "step": 17704 }, { "epoch": 0.031394201487793656, "grad_norm": 1.9296875, "learning_rate": 0.0018661568553834944, "loss": 0.3084, "step": 17706 }, { "epoch": 0.03139774765310347, "grad_norm": 0.703125, "learning_rate": 0.0018661256240983285, "loss": 0.2354, "step": 17708 }, { "epoch": 0.031401293818413285, "grad_norm": 0.75390625, "learning_rate": 0.0018660943894625744, "loss": 0.1604, "step": 17710 }, { "epoch": 0.0314048399837231, "grad_norm": 1.4296875, "learning_rate": 0.0018660631514763682, "loss": 0.2698, "step": 17712 }, { "epoch": 0.031408386149032914, "grad_norm": 1.328125, "learning_rate": 0.001866031910139847, "loss": 0.2641, "step": 17714 }, { "epoch": 0.03141193231434273, "grad_norm": 0.88671875, "learning_rate": 0.001866000665453147, "loss": 0.1925, "step": 17716 }, { "epoch": 0.031415478479652544, "grad_norm": 0.82421875, "learning_rate": 0.0018659694174164047, "loss": 0.1866, "step": 17718 }, { "epoch": 0.031419024644962365, "grad_norm": 0.435546875, "learning_rate": 0.0018659381660297576, "loss": 0.296, "step": 17720 }, { "epoch": 0.03142257081027218, "grad_norm": 1.7109375, "learning_rate": 0.0018659069112933416, "loss": 0.2878, "step": 17722 }, { "epoch": 0.031426116975581994, "grad_norm": 0.62890625, "learning_rate": 0.0018658756532072937, "loss": 0.1713, "step": 17724 }, { "epoch": 0.03142966314089181, "grad_norm": 1.5703125, "learning_rate": 0.0018658443917717506, "loss": 0.2247, "step": 17726 }, { "epoch": 0.03143320930620162, "grad_norm": 0.412109375, "learning_rate": 0.0018658131269868492, "loss": 0.3071, "step": 17728 }, { "epoch": 0.03143675547151144, "grad_norm": 0.376953125, "learning_rate": 0.0018657818588527259, "loss": 0.2512, "step": 17730 }, { "epoch": 0.03144030163682125, "grad_norm": 0.67578125, "learning_rate": 0.0018657505873695174, "loss": 0.1854, "step": 17732 }, { "epoch": 0.03144384780213107, "grad_norm": 0.71875, "learning_rate": 0.0018657193125373608, "loss": 0.2104, "step": 17734 }, { "epoch": 0.03144739396744088, "grad_norm": 1.203125, "learning_rate": 0.0018656880343563924, "loss": 0.3567, "step": 17736 }, { "epoch": 0.031450940132750696, "grad_norm": 2.765625, "learning_rate": 0.0018656567528267498, "loss": 0.4753, "step": 17738 }, { "epoch": 0.03145448629806051, "grad_norm": 0.462890625, "learning_rate": 0.0018656254679485689, "loss": 0.2237, "step": 17740 }, { "epoch": 0.03145803246337033, "grad_norm": 0.470703125, "learning_rate": 0.001865594179721987, "loss": 0.2699, "step": 17742 }, { "epoch": 0.03146157862868015, "grad_norm": 0.5, "learning_rate": 0.0018655628881471412, "loss": 0.1918, "step": 17744 }, { "epoch": 0.03146512479398996, "grad_norm": 1.9140625, "learning_rate": 0.0018655315932241678, "loss": 0.2191, "step": 17746 }, { "epoch": 0.031468670959299776, "grad_norm": 0.35546875, "learning_rate": 0.0018655002949532035, "loss": 0.2002, "step": 17748 }, { "epoch": 0.03147221712460959, "grad_norm": 18.375, "learning_rate": 0.001865468993334386, "loss": 0.4105, "step": 17750 }, { "epoch": 0.031475763289919405, "grad_norm": 2.640625, "learning_rate": 0.0018654376883678516, "loss": 0.3898, "step": 17752 }, { "epoch": 0.03147930945522922, "grad_norm": 1.8984375, "learning_rate": 0.0018654063800537378, "loss": 0.2361, "step": 17754 }, { "epoch": 0.031482855620539034, "grad_norm": 0.56640625, "learning_rate": 0.0018653750683921804, "loss": 0.2057, "step": 17756 }, { "epoch": 0.03148640178584885, "grad_norm": 0.498046875, "learning_rate": 0.0018653437533833177, "loss": 0.2252, "step": 17758 }, { "epoch": 0.03148994795115866, "grad_norm": 0.62890625, "learning_rate": 0.0018653124350272857, "loss": 0.1853, "step": 17760 }, { "epoch": 0.03149349411646848, "grad_norm": 0.9375, "learning_rate": 0.0018652811133242216, "loss": 0.2732, "step": 17762 }, { "epoch": 0.03149704028177829, "grad_norm": 1.0546875, "learning_rate": 0.0018652497882742625, "loss": 0.1769, "step": 17764 }, { "epoch": 0.031500586447088114, "grad_norm": 0.51953125, "learning_rate": 0.0018652184598775452, "loss": 0.2036, "step": 17766 }, { "epoch": 0.03150413261239793, "grad_norm": 1.1796875, "learning_rate": 0.001865187128134207, "loss": 0.2111, "step": 17768 }, { "epoch": 0.03150767877770774, "grad_norm": 0.34375, "learning_rate": 0.0018651557930443846, "loss": 0.2134, "step": 17770 }, { "epoch": 0.03151122494301756, "grad_norm": 0.4609375, "learning_rate": 0.0018651244546082153, "loss": 0.1905, "step": 17772 }, { "epoch": 0.03151477110832737, "grad_norm": 0.63671875, "learning_rate": 0.001865093112825836, "loss": 0.2593, "step": 17774 }, { "epoch": 0.031518317273637186, "grad_norm": 1.3203125, "learning_rate": 0.0018650617676973843, "loss": 0.2784, "step": 17776 }, { "epoch": 0.031521863438947, "grad_norm": 7.53125, "learning_rate": 0.0018650304192229963, "loss": 0.1985, "step": 17778 }, { "epoch": 0.031525409604256815, "grad_norm": 1.25, "learning_rate": 0.00186499906740281, "loss": 0.2204, "step": 17780 }, { "epoch": 0.03152895576956663, "grad_norm": 0.94921875, "learning_rate": 0.0018649677122369616, "loss": 0.1602, "step": 17782 }, { "epoch": 0.031532501934876445, "grad_norm": 0.408203125, "learning_rate": 0.0018649363537255892, "loss": 0.2226, "step": 17784 }, { "epoch": 0.03153604810018626, "grad_norm": 1.078125, "learning_rate": 0.0018649049918688294, "loss": 0.2038, "step": 17786 }, { "epoch": 0.03153959426549608, "grad_norm": 0.55078125, "learning_rate": 0.0018648736266668197, "loss": 0.1976, "step": 17788 }, { "epoch": 0.031543140430805895, "grad_norm": 0.8046875, "learning_rate": 0.0018648422581196966, "loss": 0.2489, "step": 17790 }, { "epoch": 0.03154668659611571, "grad_norm": 0.361328125, "learning_rate": 0.001864810886227598, "loss": 0.1839, "step": 17792 }, { "epoch": 0.031550232761425524, "grad_norm": 0.310546875, "learning_rate": 0.001864779510990661, "loss": 0.1903, "step": 17794 }, { "epoch": 0.03155377892673534, "grad_norm": 0.24609375, "learning_rate": 0.0018647481324090226, "loss": 0.2307, "step": 17796 }, { "epoch": 0.03155732509204515, "grad_norm": 0.58984375, "learning_rate": 0.00186471675048282, "loss": 0.2603, "step": 17798 }, { "epoch": 0.03156087125735497, "grad_norm": 0.3359375, "learning_rate": 0.0018646853652121904, "loss": 0.2546, "step": 17800 }, { "epoch": 0.03156441742266478, "grad_norm": 0.86328125, "learning_rate": 0.0018646539765972711, "loss": 0.4379, "step": 17802 }, { "epoch": 0.0315679635879746, "grad_norm": 1.3828125, "learning_rate": 0.0018646225846381999, "loss": 0.2227, "step": 17804 }, { "epoch": 0.03157150975328441, "grad_norm": 0.70703125, "learning_rate": 0.0018645911893351134, "loss": 0.3412, "step": 17806 }, { "epoch": 0.031575055918594226, "grad_norm": 2.59375, "learning_rate": 0.0018645597906881493, "loss": 0.336, "step": 17808 }, { "epoch": 0.03157860208390405, "grad_norm": 0.9765625, "learning_rate": 0.0018645283886974447, "loss": 0.3767, "step": 17810 }, { "epoch": 0.03158214824921386, "grad_norm": 0.515625, "learning_rate": 0.0018644969833631368, "loss": 0.1747, "step": 17812 }, { "epoch": 0.03158569441452368, "grad_norm": 0.5703125, "learning_rate": 0.0018644655746853636, "loss": 0.1672, "step": 17814 }, { "epoch": 0.03158924057983349, "grad_norm": 0.98828125, "learning_rate": 0.0018644341626642617, "loss": 0.1931, "step": 17816 }, { "epoch": 0.031592786745143306, "grad_norm": 0.53515625, "learning_rate": 0.001864402747299969, "loss": 0.2153, "step": 17818 }, { "epoch": 0.03159633291045312, "grad_norm": 0.421875, "learning_rate": 0.0018643713285926228, "loss": 0.1417, "step": 17820 }, { "epoch": 0.031599879075762935, "grad_norm": 0.58984375, "learning_rate": 0.00186433990654236, "loss": 0.3583, "step": 17822 }, { "epoch": 0.03160342524107275, "grad_norm": 0.482421875, "learning_rate": 0.0018643084811493188, "loss": 0.2388, "step": 17824 }, { "epoch": 0.031606971406382564, "grad_norm": 1.625, "learning_rate": 0.001864277052413636, "loss": 0.3, "step": 17826 }, { "epoch": 0.03161051757169238, "grad_norm": 1.6875, "learning_rate": 0.0018642456203354495, "loss": 0.2398, "step": 17828 }, { "epoch": 0.03161406373700219, "grad_norm": 0.63671875, "learning_rate": 0.0018642141849148967, "loss": 0.2565, "step": 17830 }, { "epoch": 0.03161760990231201, "grad_norm": 0.240234375, "learning_rate": 0.0018641827461521145, "loss": 0.206, "step": 17832 }, { "epoch": 0.03162115606762183, "grad_norm": 0.341796875, "learning_rate": 0.0018641513040472412, "loss": 0.2475, "step": 17834 }, { "epoch": 0.031624702232931644, "grad_norm": 0.2294921875, "learning_rate": 0.001864119858600414, "loss": 0.2042, "step": 17836 }, { "epoch": 0.03162824839824146, "grad_norm": 0.408203125, "learning_rate": 0.0018640884098117703, "loss": 0.1757, "step": 17838 }, { "epoch": 0.03163179456355127, "grad_norm": 0.5625, "learning_rate": 0.0018640569576814476, "loss": 0.2236, "step": 17840 }, { "epoch": 0.03163534072886109, "grad_norm": 0.8359375, "learning_rate": 0.0018640255022095839, "loss": 0.534, "step": 17842 }, { "epoch": 0.0316388868941709, "grad_norm": 0.2236328125, "learning_rate": 0.0018639940433963164, "loss": 0.2065, "step": 17844 }, { "epoch": 0.03164243305948072, "grad_norm": 0.419921875, "learning_rate": 0.0018639625812417824, "loss": 0.239, "step": 17846 }, { "epoch": 0.03164597922479053, "grad_norm": 0.416015625, "learning_rate": 0.0018639311157461202, "loss": 0.1978, "step": 17848 }, { "epoch": 0.031649525390100346, "grad_norm": 0.41796875, "learning_rate": 0.001863899646909467, "loss": 0.2182, "step": 17850 }, { "epoch": 0.03165307155541016, "grad_norm": 0.458984375, "learning_rate": 0.00186386817473196, "loss": 0.214, "step": 17852 }, { "epoch": 0.031656617720719975, "grad_norm": 0.302734375, "learning_rate": 0.001863836699213738, "loss": 0.228, "step": 17854 }, { "epoch": 0.031660163886029796, "grad_norm": 0.408203125, "learning_rate": 0.0018638052203549375, "loss": 0.1656, "step": 17856 }, { "epoch": 0.03166371005133961, "grad_norm": 0.41015625, "learning_rate": 0.0018637737381556967, "loss": 0.2155, "step": 17858 }, { "epoch": 0.031667256216649425, "grad_norm": 0.37109375, "learning_rate": 0.0018637422526161533, "loss": 0.2515, "step": 17860 }, { "epoch": 0.03167080238195924, "grad_norm": 2.171875, "learning_rate": 0.001863710763736445, "loss": 0.3484, "step": 17862 }, { "epoch": 0.031674348547269054, "grad_norm": 0.306640625, "learning_rate": 0.0018636792715167095, "loss": 0.2331, "step": 17864 }, { "epoch": 0.03167789471257887, "grad_norm": 0.341796875, "learning_rate": 0.001863647775957084, "loss": 0.3115, "step": 17866 }, { "epoch": 0.031681440877888684, "grad_norm": 0.3125, "learning_rate": 0.0018636162770577069, "loss": 0.1633, "step": 17868 }, { "epoch": 0.0316849870431985, "grad_norm": 0.61328125, "learning_rate": 0.001863584774818716, "loss": 0.2658, "step": 17870 }, { "epoch": 0.03168853320850831, "grad_norm": 0.44921875, "learning_rate": 0.0018635532692402488, "loss": 0.218, "step": 17872 }, { "epoch": 0.03169207937381813, "grad_norm": 2.140625, "learning_rate": 0.001863521760322443, "loss": 0.2217, "step": 17874 }, { "epoch": 0.03169562553912794, "grad_norm": 0.6015625, "learning_rate": 0.0018634902480654364, "loss": 0.2117, "step": 17876 }, { "epoch": 0.03169917170443776, "grad_norm": 0.87890625, "learning_rate": 0.0018634587324693672, "loss": 0.1975, "step": 17878 }, { "epoch": 0.03170271786974758, "grad_norm": 0.546875, "learning_rate": 0.0018634272135343733, "loss": 0.3673, "step": 17880 }, { "epoch": 0.03170626403505739, "grad_norm": 0.53125, "learning_rate": 0.0018633956912605916, "loss": 0.4124, "step": 17882 }, { "epoch": 0.03170981020036721, "grad_norm": 1.6796875, "learning_rate": 0.0018633641656481607, "loss": 0.23, "step": 17884 }, { "epoch": 0.03171335636567702, "grad_norm": 0.75390625, "learning_rate": 0.0018633326366972185, "loss": 0.2893, "step": 17886 }, { "epoch": 0.031716902530986836, "grad_norm": 0.58984375, "learning_rate": 0.0018633011044079026, "loss": 0.1991, "step": 17888 }, { "epoch": 0.03172044869629665, "grad_norm": 0.322265625, "learning_rate": 0.0018632695687803512, "loss": 0.2246, "step": 17890 }, { "epoch": 0.031723994861606465, "grad_norm": 1.109375, "learning_rate": 0.001863238029814702, "loss": 0.2162, "step": 17892 }, { "epoch": 0.03172754102691628, "grad_norm": 2.71875, "learning_rate": 0.001863206487511093, "loss": 0.3298, "step": 17894 }, { "epoch": 0.031731087192226094, "grad_norm": 0.6875, "learning_rate": 0.0018631749418696621, "loss": 0.2567, "step": 17896 }, { "epoch": 0.03173463335753591, "grad_norm": 0.49609375, "learning_rate": 0.0018631433928905476, "loss": 0.2203, "step": 17898 }, { "epoch": 0.03173817952284572, "grad_norm": 0.55078125, "learning_rate": 0.0018631118405738867, "loss": 0.2427, "step": 17900 }, { "epoch": 0.031741725688155545, "grad_norm": 0.33203125, "learning_rate": 0.0018630802849198182, "loss": 0.1782, "step": 17902 }, { "epoch": 0.03174527185346536, "grad_norm": 0.5859375, "learning_rate": 0.0018630487259284798, "loss": 0.3068, "step": 17904 }, { "epoch": 0.031748818018775174, "grad_norm": 0.361328125, "learning_rate": 0.0018630171636000094, "loss": 0.3974, "step": 17906 }, { "epoch": 0.03175236418408499, "grad_norm": 0.400390625, "learning_rate": 0.0018629855979345451, "loss": 0.23, "step": 17908 }, { "epoch": 0.0317559103493948, "grad_norm": 0.328125, "learning_rate": 0.0018629540289322249, "loss": 0.2461, "step": 17910 }, { "epoch": 0.03175945651470462, "grad_norm": 0.984375, "learning_rate": 0.0018629224565931872, "loss": 0.2578, "step": 17912 }, { "epoch": 0.03176300268001443, "grad_norm": 0.6796875, "learning_rate": 0.0018628908809175697, "loss": 0.1649, "step": 17914 }, { "epoch": 0.03176654884532425, "grad_norm": 1.4453125, "learning_rate": 0.0018628593019055107, "loss": 0.2234, "step": 17916 }, { "epoch": 0.03177009501063406, "grad_norm": 0.51171875, "learning_rate": 0.001862827719557148, "loss": 0.2182, "step": 17918 }, { "epoch": 0.031773641175943876, "grad_norm": 0.32421875, "learning_rate": 0.0018627961338726204, "loss": 0.1624, "step": 17920 }, { "epoch": 0.03177718734125369, "grad_norm": 0.73046875, "learning_rate": 0.0018627645448520654, "loss": 0.2471, "step": 17922 }, { "epoch": 0.03178073350656351, "grad_norm": 0.455078125, "learning_rate": 0.0018627329524956212, "loss": 0.2871, "step": 17924 }, { "epoch": 0.031784279671873326, "grad_norm": 0.400390625, "learning_rate": 0.0018627013568034262, "loss": 0.197, "step": 17926 }, { "epoch": 0.03178782583718314, "grad_norm": 0.470703125, "learning_rate": 0.0018626697577756185, "loss": 0.2584, "step": 17928 }, { "epoch": 0.031791372002492956, "grad_norm": 0.86328125, "learning_rate": 0.0018626381554123362, "loss": 0.3106, "step": 17930 }, { "epoch": 0.03179491816780277, "grad_norm": 0.58984375, "learning_rate": 0.0018626065497137176, "loss": 0.2439, "step": 17932 }, { "epoch": 0.031798464333112585, "grad_norm": 0.5859375, "learning_rate": 0.001862574940679901, "loss": 0.2669, "step": 17934 }, { "epoch": 0.0318020104984224, "grad_norm": 0.5625, "learning_rate": 0.0018625433283110248, "loss": 0.1524, "step": 17936 }, { "epoch": 0.031805556663732214, "grad_norm": 0.73828125, "learning_rate": 0.0018625117126072266, "loss": 0.2604, "step": 17938 }, { "epoch": 0.03180910282904203, "grad_norm": 1.875, "learning_rate": 0.0018624800935686454, "loss": 0.283, "step": 17940 }, { "epoch": 0.03181264899435184, "grad_norm": 0.419921875, "learning_rate": 0.0018624484711954191, "loss": 0.2197, "step": 17942 }, { "epoch": 0.03181619515966166, "grad_norm": 0.62109375, "learning_rate": 0.001862416845487686, "loss": 0.2309, "step": 17944 }, { "epoch": 0.03181974132497148, "grad_norm": 0.859375, "learning_rate": 0.0018623852164455844, "loss": 0.2321, "step": 17946 }, { "epoch": 0.03182328749028129, "grad_norm": 0.828125, "learning_rate": 0.001862353584069253, "loss": 0.2239, "step": 17948 }, { "epoch": 0.03182683365559111, "grad_norm": 0.71484375, "learning_rate": 0.0018623219483588295, "loss": 0.1956, "step": 17950 }, { "epoch": 0.03183037982090092, "grad_norm": 1.078125, "learning_rate": 0.0018622903093144527, "loss": 0.2365, "step": 17952 }, { "epoch": 0.03183392598621074, "grad_norm": 0.36328125, "learning_rate": 0.001862258666936261, "loss": 0.1912, "step": 17954 }, { "epoch": 0.03183747215152055, "grad_norm": 0.31640625, "learning_rate": 0.0018622270212243928, "loss": 0.2004, "step": 17956 }, { "epoch": 0.031841018316830366, "grad_norm": 0.349609375, "learning_rate": 0.001862195372178986, "loss": 0.2406, "step": 17958 }, { "epoch": 0.03184456448214018, "grad_norm": 0.26171875, "learning_rate": 0.0018621637198001798, "loss": 0.2245, "step": 17960 }, { "epoch": 0.031848110647449995, "grad_norm": 0.4921875, "learning_rate": 0.0018621320640881116, "loss": 0.4642, "step": 17962 }, { "epoch": 0.03185165681275981, "grad_norm": 0.34765625, "learning_rate": 0.001862100405042921, "loss": 0.1687, "step": 17964 }, { "epoch": 0.031855202978069624, "grad_norm": 0.9609375, "learning_rate": 0.0018620687426647458, "loss": 0.2321, "step": 17966 }, { "epoch": 0.03185874914337944, "grad_norm": 0.2451171875, "learning_rate": 0.0018620370769537244, "loss": 0.2305, "step": 17968 }, { "epoch": 0.03186229530868926, "grad_norm": 0.431640625, "learning_rate": 0.0018620054079099956, "loss": 0.2585, "step": 17970 }, { "epoch": 0.031865841473999075, "grad_norm": 0.79296875, "learning_rate": 0.0018619737355336978, "loss": 0.1928, "step": 17972 }, { "epoch": 0.03186938763930889, "grad_norm": 0.384765625, "learning_rate": 0.0018619420598249695, "loss": 0.2071, "step": 17974 }, { "epoch": 0.031872933804618704, "grad_norm": 0.6171875, "learning_rate": 0.0018619103807839491, "loss": 0.2396, "step": 17976 }, { "epoch": 0.03187647996992852, "grad_norm": 0.375, "learning_rate": 0.0018618786984107751, "loss": 0.1904, "step": 17978 }, { "epoch": 0.03188002613523833, "grad_norm": 0.28515625, "learning_rate": 0.0018618470127055868, "loss": 0.2272, "step": 17980 }, { "epoch": 0.03188357230054815, "grad_norm": 0.59375, "learning_rate": 0.0018618153236685218, "loss": 0.1658, "step": 17982 }, { "epoch": 0.03188711846585796, "grad_norm": 0.5625, "learning_rate": 0.0018617836312997192, "loss": 0.1885, "step": 17984 }, { "epoch": 0.03189066463116778, "grad_norm": 0.6484375, "learning_rate": 0.0018617519355993172, "loss": 0.2103, "step": 17986 }, { "epoch": 0.03189421079647759, "grad_norm": 0.234375, "learning_rate": 0.001861720236567455, "loss": 0.2018, "step": 17988 }, { "epoch": 0.031897756961787406, "grad_norm": 0.4140625, "learning_rate": 0.001861688534204271, "loss": 0.1625, "step": 17990 }, { "epoch": 0.03190130312709723, "grad_norm": 2.90625, "learning_rate": 0.0018616568285099036, "loss": 0.2244, "step": 17992 }, { "epoch": 0.03190484929240704, "grad_norm": 0.35546875, "learning_rate": 0.001861625119484492, "loss": 0.2451, "step": 17994 }, { "epoch": 0.03190839545771686, "grad_norm": 0.392578125, "learning_rate": 0.0018615934071281742, "loss": 0.2537, "step": 17996 }, { "epoch": 0.03191194162302667, "grad_norm": 0.35546875, "learning_rate": 0.0018615616914410891, "loss": 0.21, "step": 17998 }, { "epoch": 0.031915487788336486, "grad_norm": 0.7734375, "learning_rate": 0.0018615299724233756, "loss": 0.4146, "step": 18000 }, { "epoch": 0.0319190339536463, "grad_norm": 0.30859375, "learning_rate": 0.0018614982500751724, "loss": 0.2046, "step": 18002 }, { "epoch": 0.031922580118956115, "grad_norm": 1.71875, "learning_rate": 0.0018614665243966183, "loss": 0.5474, "step": 18004 }, { "epoch": 0.03192612628426593, "grad_norm": 0.380859375, "learning_rate": 0.0018614347953878519, "loss": 0.1986, "step": 18006 }, { "epoch": 0.031929672449575744, "grad_norm": 0.337890625, "learning_rate": 0.0018614030630490122, "loss": 0.1813, "step": 18008 }, { "epoch": 0.03193321861488556, "grad_norm": 0.7890625, "learning_rate": 0.0018613713273802374, "loss": 0.2413, "step": 18010 }, { "epoch": 0.03193676478019537, "grad_norm": 0.2158203125, "learning_rate": 0.0018613395883816668, "loss": 0.1492, "step": 18012 }, { "epoch": 0.031940310945505195, "grad_norm": 0.46875, "learning_rate": 0.001861307846053439, "loss": 0.1784, "step": 18014 }, { "epoch": 0.03194385711081501, "grad_norm": 0.337890625, "learning_rate": 0.0018612761003956933, "loss": 0.2061, "step": 18016 }, { "epoch": 0.031947403276124824, "grad_norm": 0.9765625, "learning_rate": 0.0018612443514085678, "loss": 0.2748, "step": 18018 }, { "epoch": 0.03195094944143464, "grad_norm": 0.7890625, "learning_rate": 0.0018612125990922017, "loss": 0.2316, "step": 18020 }, { "epoch": 0.03195449560674445, "grad_norm": 0.70703125, "learning_rate": 0.001861180843446734, "loss": 0.2314, "step": 18022 }, { "epoch": 0.03195804177205427, "grad_norm": 1.3515625, "learning_rate": 0.0018611490844723032, "loss": 0.2928, "step": 18024 }, { "epoch": 0.03196158793736408, "grad_norm": 0.59765625, "learning_rate": 0.0018611173221690484, "loss": 0.2028, "step": 18026 }, { "epoch": 0.031965134102673896, "grad_norm": 0.6484375, "learning_rate": 0.0018610855565371087, "loss": 0.2538, "step": 18028 }, { "epoch": 0.03196868026798371, "grad_norm": 0.26171875, "learning_rate": 0.0018610537875766232, "loss": 0.1566, "step": 18030 }, { "epoch": 0.031972226433293525, "grad_norm": 0.5546875, "learning_rate": 0.0018610220152877302, "loss": 0.2096, "step": 18032 }, { "epoch": 0.03197577259860334, "grad_norm": 0.298828125, "learning_rate": 0.0018609902396705688, "loss": 0.1914, "step": 18034 }, { "epoch": 0.031979318763913155, "grad_norm": 0.78515625, "learning_rate": 0.0018609584607252783, "loss": 0.2491, "step": 18036 }, { "epoch": 0.031982864929222976, "grad_norm": 0.30078125, "learning_rate": 0.0018609266784519977, "loss": 0.2483, "step": 18038 }, { "epoch": 0.03198641109453279, "grad_norm": 0.55078125, "learning_rate": 0.0018608948928508654, "loss": 0.2743, "step": 18040 }, { "epoch": 0.031989957259842605, "grad_norm": 1.078125, "learning_rate": 0.0018608631039220209, "loss": 0.2122, "step": 18042 }, { "epoch": 0.03199350342515242, "grad_norm": 0.2578125, "learning_rate": 0.0018608313116656033, "loss": 0.2182, "step": 18044 }, { "epoch": 0.031997049590462234, "grad_norm": 1.2109375, "learning_rate": 0.0018607995160817516, "loss": 0.239, "step": 18046 }, { "epoch": 0.03200059575577205, "grad_norm": 0.6484375, "learning_rate": 0.0018607677171706045, "loss": 0.4483, "step": 18048 }, { "epoch": 0.03200414192108186, "grad_norm": 0.8359375, "learning_rate": 0.0018607359149323012, "loss": 0.2102, "step": 18050 }, { "epoch": 0.03200768808639168, "grad_norm": 0.4921875, "learning_rate": 0.001860704109366981, "loss": 0.2599, "step": 18052 }, { "epoch": 0.03201123425170149, "grad_norm": 0.41796875, "learning_rate": 0.0018606723004747831, "loss": 0.2488, "step": 18054 }, { "epoch": 0.03201478041701131, "grad_norm": 1.015625, "learning_rate": 0.0018606404882558466, "loss": 0.2863, "step": 18056 }, { "epoch": 0.03201832658232112, "grad_norm": 1.7890625, "learning_rate": 0.00186060867271031, "loss": 0.2543, "step": 18058 }, { "epoch": 0.03202187274763094, "grad_norm": 0.455078125, "learning_rate": 0.001860576853838313, "loss": 0.2351, "step": 18060 }, { "epoch": 0.03202541891294076, "grad_norm": 1.2734375, "learning_rate": 0.0018605450316399948, "loss": 0.2415, "step": 18062 }, { "epoch": 0.03202896507825057, "grad_norm": 0.302734375, "learning_rate": 0.001860513206115494, "loss": 0.253, "step": 18064 }, { "epoch": 0.03203251124356039, "grad_norm": 0.44921875, "learning_rate": 0.0018604813772649504, "loss": 0.2276, "step": 18066 }, { "epoch": 0.0320360574088702, "grad_norm": 0.2578125, "learning_rate": 0.0018604495450885033, "loss": 0.2334, "step": 18068 }, { "epoch": 0.032039603574180016, "grad_norm": 0.2421875, "learning_rate": 0.0018604177095862913, "loss": 0.1755, "step": 18070 }, { "epoch": 0.03204314973948983, "grad_norm": 1.1484375, "learning_rate": 0.001860385870758454, "loss": 0.5021, "step": 18072 }, { "epoch": 0.032046695904799645, "grad_norm": 0.41015625, "learning_rate": 0.0018603540286051307, "loss": 0.1723, "step": 18074 }, { "epoch": 0.03205024207010946, "grad_norm": 0.32421875, "learning_rate": 0.0018603221831264604, "loss": 0.1575, "step": 18076 }, { "epoch": 0.032053788235419274, "grad_norm": 0.58984375, "learning_rate": 0.0018602903343225825, "loss": 0.2388, "step": 18078 }, { "epoch": 0.03205733440072909, "grad_norm": 0.359375, "learning_rate": 0.0018602584821936362, "loss": 0.2637, "step": 18080 }, { "epoch": 0.03206088056603891, "grad_norm": 2.453125, "learning_rate": 0.0018602266267397612, "loss": 0.3091, "step": 18082 }, { "epoch": 0.032064426731348725, "grad_norm": 0.396484375, "learning_rate": 0.0018601947679610963, "loss": 0.2268, "step": 18084 }, { "epoch": 0.03206797289665854, "grad_norm": 0.353515625, "learning_rate": 0.0018601629058577812, "loss": 0.2248, "step": 18086 }, { "epoch": 0.032071519061968354, "grad_norm": 1.8125, "learning_rate": 0.0018601310404299553, "loss": 0.2685, "step": 18088 }, { "epoch": 0.03207506522727817, "grad_norm": 0.34765625, "learning_rate": 0.0018600991716777571, "loss": 0.1575, "step": 18090 }, { "epoch": 0.03207861139258798, "grad_norm": 0.609375, "learning_rate": 0.001860067299601327, "loss": 0.287, "step": 18092 }, { "epoch": 0.0320821575578978, "grad_norm": 1.4609375, "learning_rate": 0.0018600354242008044, "loss": 0.2244, "step": 18094 }, { "epoch": 0.03208570372320761, "grad_norm": 0.3828125, "learning_rate": 0.0018600035454763278, "loss": 0.1829, "step": 18096 }, { "epoch": 0.03208924988851743, "grad_norm": 1.1640625, "learning_rate": 0.0018599716634280377, "loss": 0.328, "step": 18098 }, { "epoch": 0.03209279605382724, "grad_norm": 2.390625, "learning_rate": 0.0018599397780560726, "loss": 0.3586, "step": 18100 }, { "epoch": 0.032096342219137056, "grad_norm": 0.2431640625, "learning_rate": 0.0018599078893605722, "loss": 0.132, "step": 18102 }, { "epoch": 0.03209988838444687, "grad_norm": 2.546875, "learning_rate": 0.0018598759973416762, "loss": 0.3367, "step": 18104 }, { "epoch": 0.03210343454975669, "grad_norm": 0.474609375, "learning_rate": 0.0018598441019995243, "loss": 0.2589, "step": 18106 }, { "epoch": 0.032106980715066506, "grad_norm": 0.2451171875, "learning_rate": 0.0018598122033342554, "loss": 0.4878, "step": 18108 }, { "epoch": 0.03211052688037632, "grad_norm": 0.435546875, "learning_rate": 0.0018597803013460092, "loss": 0.205, "step": 18110 }, { "epoch": 0.032114073045686135, "grad_norm": 0.330078125, "learning_rate": 0.0018597483960349258, "loss": 0.2288, "step": 18112 }, { "epoch": 0.03211761921099595, "grad_norm": 0.58203125, "learning_rate": 0.0018597164874011437, "loss": 0.1929, "step": 18114 }, { "epoch": 0.032121165376305764, "grad_norm": 0.322265625, "learning_rate": 0.001859684575444803, "loss": 0.22, "step": 18116 }, { "epoch": 0.03212471154161558, "grad_norm": 3.140625, "learning_rate": 0.0018596526601660432, "loss": 0.2775, "step": 18118 }, { "epoch": 0.032128257706925394, "grad_norm": 0.98046875, "learning_rate": 0.0018596207415650043, "loss": 0.2005, "step": 18120 }, { "epoch": 0.03213180387223521, "grad_norm": 0.453125, "learning_rate": 0.0018595888196418255, "loss": 0.3944, "step": 18122 }, { "epoch": 0.03213535003754502, "grad_norm": 3.71875, "learning_rate": 0.0018595568943966458, "loss": 0.2905, "step": 18124 }, { "epoch": 0.03213889620285484, "grad_norm": 3.28125, "learning_rate": 0.0018595249658296062, "loss": 0.6198, "step": 18126 }, { "epoch": 0.03214244236816466, "grad_norm": 0.28515625, "learning_rate": 0.0018594930339408454, "loss": 0.1551, "step": 18128 }, { "epoch": 0.03214598853347447, "grad_norm": 0.59765625, "learning_rate": 0.001859461098730503, "loss": 0.2012, "step": 18130 }, { "epoch": 0.03214953469878429, "grad_norm": 0.265625, "learning_rate": 0.001859429160198719, "loss": 0.3668, "step": 18132 }, { "epoch": 0.0321530808640941, "grad_norm": 1.046875, "learning_rate": 0.0018593972183456329, "loss": 0.373, "step": 18134 }, { "epoch": 0.03215662702940392, "grad_norm": 0.5078125, "learning_rate": 0.0018593652731713846, "loss": 0.2481, "step": 18136 }, { "epoch": 0.03216017319471373, "grad_norm": 0.38671875, "learning_rate": 0.0018593333246761138, "loss": 0.3061, "step": 18138 }, { "epoch": 0.032163719360023546, "grad_norm": 0.73046875, "learning_rate": 0.00185930137285996, "loss": 0.1828, "step": 18140 }, { "epoch": 0.03216726552533336, "grad_norm": 0.66796875, "learning_rate": 0.001859269417723063, "loss": 0.1925, "step": 18142 }, { "epoch": 0.032170811690643175, "grad_norm": 2.5, "learning_rate": 0.0018592374592655626, "loss": 0.3239, "step": 18144 }, { "epoch": 0.03217435785595299, "grad_norm": 0.54296875, "learning_rate": 0.0018592054974875985, "loss": 0.187, "step": 18146 }, { "epoch": 0.032177904021262804, "grad_norm": 0.58203125, "learning_rate": 0.0018591735323893106, "loss": 0.2462, "step": 18148 }, { "epoch": 0.032181450186572626, "grad_norm": 0.7890625, "learning_rate": 0.0018591415639708385, "loss": 0.1938, "step": 18150 }, { "epoch": 0.03218499635188244, "grad_norm": 0.41015625, "learning_rate": 0.0018591095922323227, "loss": 0.1855, "step": 18152 }, { "epoch": 0.032188542517192255, "grad_norm": 0.52734375, "learning_rate": 0.001859077617173902, "loss": 0.2045, "step": 18154 }, { "epoch": 0.03219208868250207, "grad_norm": 0.87890625, "learning_rate": 0.001859045638795717, "loss": 0.1768, "step": 18156 }, { "epoch": 0.032195634847811884, "grad_norm": 0.484375, "learning_rate": 0.001859013657097907, "loss": 0.2209, "step": 18158 }, { "epoch": 0.0321991810131217, "grad_norm": 0.64453125, "learning_rate": 0.0018589816720806123, "loss": 0.1719, "step": 18160 }, { "epoch": 0.03220272717843151, "grad_norm": 2.125, "learning_rate": 0.0018589496837439728, "loss": 0.2348, "step": 18162 }, { "epoch": 0.03220627334374133, "grad_norm": 7.96875, "learning_rate": 0.001858917692088128, "loss": 0.2563, "step": 18164 }, { "epoch": 0.03220981950905114, "grad_norm": 0.66796875, "learning_rate": 0.001858885697113218, "loss": 0.2144, "step": 18166 }, { "epoch": 0.03221336567436096, "grad_norm": 0.63671875, "learning_rate": 0.001858853698819383, "loss": 0.1818, "step": 18168 }, { "epoch": 0.03221691183967077, "grad_norm": 0.73828125, "learning_rate": 0.0018588216972067625, "loss": 0.2706, "step": 18170 }, { "epoch": 0.032220458004980586, "grad_norm": 0.6953125, "learning_rate": 0.0018587896922754967, "loss": 0.1309, "step": 18172 }, { "epoch": 0.03222400417029041, "grad_norm": 0.341796875, "learning_rate": 0.0018587576840257255, "loss": 0.1866, "step": 18174 }, { "epoch": 0.03222755033560022, "grad_norm": 0.33984375, "learning_rate": 0.0018587256724575895, "loss": 0.2312, "step": 18176 }, { "epoch": 0.032231096500910036, "grad_norm": 0.68359375, "learning_rate": 0.0018586936575712275, "loss": 0.2164, "step": 18178 }, { "epoch": 0.03223464266621985, "grad_norm": 0.80078125, "learning_rate": 0.0018586616393667804, "loss": 0.22, "step": 18180 }, { "epoch": 0.032238188831529666, "grad_norm": 0.3203125, "learning_rate": 0.0018586296178443877, "loss": 0.1834, "step": 18182 }, { "epoch": 0.03224173499683948, "grad_norm": 0.55859375, "learning_rate": 0.0018585975930041898, "loss": 0.2287, "step": 18184 }, { "epoch": 0.032245281162149295, "grad_norm": 0.482421875, "learning_rate": 0.0018585655648463268, "loss": 0.261, "step": 18186 }, { "epoch": 0.03224882732745911, "grad_norm": 0.50390625, "learning_rate": 0.0018585335333709389, "loss": 0.2358, "step": 18188 }, { "epoch": 0.032252373492768924, "grad_norm": 0.451171875, "learning_rate": 0.0018585014985781653, "loss": 0.1816, "step": 18190 }, { "epoch": 0.03225591965807874, "grad_norm": 0.65234375, "learning_rate": 0.0018584694604681473, "loss": 0.1735, "step": 18192 }, { "epoch": 0.03225946582338855, "grad_norm": 0.28125, "learning_rate": 0.0018584374190410242, "loss": 0.159, "step": 18194 }, { "epoch": 0.032263011988698374, "grad_norm": 0.8515625, "learning_rate": 0.0018584053742969364, "loss": 0.235, "step": 18196 }, { "epoch": 0.03226655815400819, "grad_norm": 1.1484375, "learning_rate": 0.0018583733262360243, "loss": 0.1609, "step": 18198 }, { "epoch": 0.032270104319318, "grad_norm": 0.609375, "learning_rate": 0.0018583412748584274, "loss": 0.1851, "step": 18200 }, { "epoch": 0.03227365048462782, "grad_norm": 0.2578125, "learning_rate": 0.0018583092201642864, "loss": 0.23, "step": 18202 }, { "epoch": 0.03227719664993763, "grad_norm": 1.265625, "learning_rate": 0.0018582771621537414, "loss": 0.1818, "step": 18204 }, { "epoch": 0.03228074281524745, "grad_norm": 0.5546875, "learning_rate": 0.0018582451008269326, "loss": 0.2168, "step": 18206 }, { "epoch": 0.03228428898055726, "grad_norm": 0.45703125, "learning_rate": 0.0018582130361839999, "loss": 0.2498, "step": 18208 }, { "epoch": 0.032287835145867076, "grad_norm": 0.60546875, "learning_rate": 0.0018581809682250842, "loss": 0.2346, "step": 18210 }, { "epoch": 0.03229138131117689, "grad_norm": 0.244140625, "learning_rate": 0.001858148896950325, "loss": 0.2292, "step": 18212 }, { "epoch": 0.032294927476486705, "grad_norm": 1.0625, "learning_rate": 0.0018581168223598633, "loss": 0.2996, "step": 18214 }, { "epoch": 0.03229847364179652, "grad_norm": 0.3203125, "learning_rate": 0.0018580847444538385, "loss": 0.1784, "step": 18216 }, { "epoch": 0.03230201980710634, "grad_norm": 0.294921875, "learning_rate": 0.0018580526632323915, "loss": 0.2367, "step": 18218 }, { "epoch": 0.032305565972416156, "grad_norm": 0.482421875, "learning_rate": 0.0018580205786956627, "loss": 0.4424, "step": 18220 }, { "epoch": 0.03230911213772597, "grad_norm": 12.9375, "learning_rate": 0.0018579884908437921, "loss": 0.3309, "step": 18222 }, { "epoch": 0.032312658303035785, "grad_norm": 0.275390625, "learning_rate": 0.00185795639967692, "loss": 0.2179, "step": 18224 }, { "epoch": 0.0323162044683456, "grad_norm": 0.34375, "learning_rate": 0.0018579243051951872, "loss": 0.2933, "step": 18226 }, { "epoch": 0.032319750633655414, "grad_norm": 0.294921875, "learning_rate": 0.0018578922073987335, "loss": 0.2038, "step": 18228 }, { "epoch": 0.03232329679896523, "grad_norm": 0.1826171875, "learning_rate": 0.0018578601062876996, "loss": 0.163, "step": 18230 }, { "epoch": 0.03232684296427504, "grad_norm": 0.7890625, "learning_rate": 0.0018578280018622256, "loss": 0.2317, "step": 18232 }, { "epoch": 0.03233038912958486, "grad_norm": 0.26953125, "learning_rate": 0.0018577958941224525, "loss": 0.1973, "step": 18234 }, { "epoch": 0.03233393529489467, "grad_norm": 0.353515625, "learning_rate": 0.0018577637830685199, "loss": 0.1978, "step": 18236 }, { "epoch": 0.03233748146020449, "grad_norm": 1.1875, "learning_rate": 0.001857731668700569, "loss": 0.4513, "step": 18238 }, { "epoch": 0.0323410276255143, "grad_norm": 2.140625, "learning_rate": 0.0018576995510187397, "loss": 0.3938, "step": 18240 }, { "epoch": 0.03234457379082412, "grad_norm": 0.486328125, "learning_rate": 0.001857667430023173, "loss": 0.1946, "step": 18242 }, { "epoch": 0.03234811995613394, "grad_norm": 0.36328125, "learning_rate": 0.0018576353057140089, "loss": 0.2113, "step": 18244 }, { "epoch": 0.03235166612144375, "grad_norm": 2.90625, "learning_rate": 0.001857603178091388, "loss": 0.2205, "step": 18246 }, { "epoch": 0.03235521228675357, "grad_norm": 0.61328125, "learning_rate": 0.001857571047155451, "loss": 0.2187, "step": 18248 }, { "epoch": 0.03235875845206338, "grad_norm": 0.375, "learning_rate": 0.001857538912906338, "loss": 0.1649, "step": 18250 }, { "epoch": 0.032362304617373196, "grad_norm": 0.69921875, "learning_rate": 0.0018575067753441904, "loss": 0.2236, "step": 18252 }, { "epoch": 0.03236585078268301, "grad_norm": 0.96875, "learning_rate": 0.0018574746344691476, "loss": 0.2026, "step": 18254 }, { "epoch": 0.032369396947992825, "grad_norm": 0.8046875, "learning_rate": 0.0018574424902813508, "loss": 0.7105, "step": 18256 }, { "epoch": 0.03237294311330264, "grad_norm": 2.125, "learning_rate": 0.0018574103427809409, "loss": 0.3104, "step": 18258 }, { "epoch": 0.032376489278612454, "grad_norm": 0.609375, "learning_rate": 0.0018573781919680576, "loss": 0.1761, "step": 18260 }, { "epoch": 0.03238003544392227, "grad_norm": 0.83984375, "learning_rate": 0.0018573460378428423, "loss": 0.3704, "step": 18262 }, { "epoch": 0.03238358160923209, "grad_norm": 0.60546875, "learning_rate": 0.0018573138804054354, "loss": 0.2325, "step": 18264 }, { "epoch": 0.032387127774541905, "grad_norm": 0.287109375, "learning_rate": 0.0018572817196559775, "loss": 0.1435, "step": 18266 }, { "epoch": 0.03239067393985172, "grad_norm": 0.38671875, "learning_rate": 0.0018572495555946092, "loss": 0.2591, "step": 18268 }, { "epoch": 0.032394220105161534, "grad_norm": 0.6484375, "learning_rate": 0.0018572173882214714, "loss": 0.2314, "step": 18270 }, { "epoch": 0.03239776627047135, "grad_norm": 1.2109375, "learning_rate": 0.0018571852175367043, "loss": 0.2122, "step": 18272 }, { "epoch": 0.03240131243578116, "grad_norm": 0.78125, "learning_rate": 0.001857153043540449, "loss": 0.2188, "step": 18274 }, { "epoch": 0.03240485860109098, "grad_norm": 0.353515625, "learning_rate": 0.0018571208662328457, "loss": 0.2736, "step": 18276 }, { "epoch": 0.03240840476640079, "grad_norm": 0.46484375, "learning_rate": 0.001857088685614036, "loss": 0.1802, "step": 18278 }, { "epoch": 0.032411950931710606, "grad_norm": 1.0390625, "learning_rate": 0.0018570565016841603, "loss": 0.2233, "step": 18280 }, { "epoch": 0.03241549709702042, "grad_norm": 0.29296875, "learning_rate": 0.0018570243144433588, "loss": 0.1998, "step": 18282 }, { "epoch": 0.032419043262330235, "grad_norm": 0.3046875, "learning_rate": 0.001856992123891773, "loss": 0.2229, "step": 18284 }, { "epoch": 0.03242258942764006, "grad_norm": 0.44140625, "learning_rate": 0.0018569599300295432, "loss": 0.1703, "step": 18286 }, { "epoch": 0.03242613559294987, "grad_norm": 0.56640625, "learning_rate": 0.0018569277328568101, "loss": 0.2164, "step": 18288 }, { "epoch": 0.032429681758259686, "grad_norm": 0.6328125, "learning_rate": 0.0018568955323737153, "loss": 0.2132, "step": 18290 }, { "epoch": 0.0324332279235695, "grad_norm": 0.453125, "learning_rate": 0.0018568633285803988, "loss": 0.2348, "step": 18292 }, { "epoch": 0.032436774088879315, "grad_norm": 0.251953125, "learning_rate": 0.0018568311214770018, "loss": 0.2406, "step": 18294 }, { "epoch": 0.03244032025418913, "grad_norm": 0.353515625, "learning_rate": 0.001856798911063665, "loss": 0.1664, "step": 18296 }, { "epoch": 0.032443866419498944, "grad_norm": 1.1171875, "learning_rate": 0.0018567666973405294, "loss": 0.2333, "step": 18298 }, { "epoch": 0.03244741258480876, "grad_norm": 0.8984375, "learning_rate": 0.001856734480307736, "loss": 0.2664, "step": 18300 }, { "epoch": 0.03245095875011857, "grad_norm": 0.357421875, "learning_rate": 0.0018567022599654253, "loss": 0.2371, "step": 18302 }, { "epoch": 0.03245450491542839, "grad_norm": 0.97265625, "learning_rate": 0.0018566700363137388, "loss": 0.2988, "step": 18304 }, { "epoch": 0.0324580510807382, "grad_norm": 0.453125, "learning_rate": 0.0018566378093528167, "loss": 0.2135, "step": 18306 }, { "epoch": 0.03246159724604802, "grad_norm": 0.435546875, "learning_rate": 0.0018566055790828005, "loss": 0.4595, "step": 18308 }, { "epoch": 0.03246514341135784, "grad_norm": 1.46875, "learning_rate": 0.0018565733455038309, "loss": 0.2645, "step": 18310 }, { "epoch": 0.03246868957666765, "grad_norm": 1.109375, "learning_rate": 0.0018565411086160493, "loss": 0.2987, "step": 18312 }, { "epoch": 0.03247223574197747, "grad_norm": 0.419921875, "learning_rate": 0.0018565088684195958, "loss": 0.1973, "step": 18314 }, { "epoch": 0.03247578190728728, "grad_norm": 0.875, "learning_rate": 0.0018564766249146124, "loss": 0.2218, "step": 18316 }, { "epoch": 0.0324793280725971, "grad_norm": 2.984375, "learning_rate": 0.0018564443781012392, "loss": 0.1939, "step": 18318 }, { "epoch": 0.03248287423790691, "grad_norm": 0.29296875, "learning_rate": 0.0018564121279796182, "loss": 0.1703, "step": 18320 }, { "epoch": 0.032486420403216726, "grad_norm": 0.5859375, "learning_rate": 0.0018563798745498896, "loss": 0.2094, "step": 18322 }, { "epoch": 0.03248996656852654, "grad_norm": 1.265625, "learning_rate": 0.001856347617812195, "loss": 0.3317, "step": 18324 }, { "epoch": 0.032493512733836355, "grad_norm": 1.9453125, "learning_rate": 0.001856315357766675, "loss": 0.3003, "step": 18326 }, { "epoch": 0.03249705889914617, "grad_norm": 0.609375, "learning_rate": 0.0018562830944134712, "loss": 0.2795, "step": 18328 }, { "epoch": 0.032500605064455984, "grad_norm": 0.78125, "learning_rate": 0.0018562508277527243, "loss": 0.1912, "step": 18330 }, { "epoch": 0.032504151229765806, "grad_norm": 6.28125, "learning_rate": 0.0018562185577845757, "loss": 0.3172, "step": 18332 }, { "epoch": 0.03250769739507562, "grad_norm": 1.640625, "learning_rate": 0.0018561862845091661, "loss": 0.2308, "step": 18334 }, { "epoch": 0.032511243560385435, "grad_norm": 1.0703125, "learning_rate": 0.0018561540079266373, "loss": 0.2428, "step": 18336 }, { "epoch": 0.03251478972569525, "grad_norm": 0.828125, "learning_rate": 0.0018561217280371298, "loss": 0.6595, "step": 18338 }, { "epoch": 0.032518335891005064, "grad_norm": 0.2578125, "learning_rate": 0.0018560894448407853, "loss": 0.4818, "step": 18340 }, { "epoch": 0.03252188205631488, "grad_norm": 1.1796875, "learning_rate": 0.0018560571583377445, "loss": 0.2607, "step": 18342 }, { "epoch": 0.03252542822162469, "grad_norm": 4.25, "learning_rate": 0.001856024868528149, "loss": 0.4292, "step": 18344 }, { "epoch": 0.03252897438693451, "grad_norm": 0.62890625, "learning_rate": 0.0018559925754121401, "loss": 0.2462, "step": 18346 }, { "epoch": 0.03253252055224432, "grad_norm": 0.296875, "learning_rate": 0.0018559602789898584, "loss": 0.1767, "step": 18348 }, { "epoch": 0.03253606671755414, "grad_norm": 0.283203125, "learning_rate": 0.0018559279792614459, "loss": 0.2148, "step": 18350 }, { "epoch": 0.03253961288286395, "grad_norm": 0.609375, "learning_rate": 0.001855895676227043, "loss": 0.2486, "step": 18352 }, { "epoch": 0.03254315904817377, "grad_norm": 0.703125, "learning_rate": 0.0018558633698867921, "loss": 0.2073, "step": 18354 }, { "epoch": 0.03254670521348359, "grad_norm": 0.828125, "learning_rate": 0.0018558310602408334, "loss": 0.4217, "step": 18356 }, { "epoch": 0.0325502513787934, "grad_norm": 1.6171875, "learning_rate": 0.0018557987472893093, "loss": 0.2194, "step": 18358 }, { "epoch": 0.032553797544103216, "grad_norm": 0.482421875, "learning_rate": 0.0018557664310323598, "loss": 0.2198, "step": 18360 }, { "epoch": 0.03255734370941303, "grad_norm": 0.66796875, "learning_rate": 0.0018557341114701273, "loss": 0.2548, "step": 18362 }, { "epoch": 0.032560889874722845, "grad_norm": 0.90625, "learning_rate": 0.0018557017886027527, "loss": 0.3193, "step": 18364 }, { "epoch": 0.03256443604003266, "grad_norm": 1.0703125, "learning_rate": 0.0018556694624303777, "loss": 0.2035, "step": 18366 }, { "epoch": 0.032567982205342474, "grad_norm": 1.859375, "learning_rate": 0.001855637132953143, "loss": 0.2947, "step": 18368 }, { "epoch": 0.03257152837065229, "grad_norm": 0.412109375, "learning_rate": 0.0018556048001711907, "loss": 0.2424, "step": 18370 }, { "epoch": 0.032575074535962104, "grad_norm": 0.224609375, "learning_rate": 0.0018555724640846618, "loss": 0.3273, "step": 18372 }, { "epoch": 0.03257862070127192, "grad_norm": 0.35546875, "learning_rate": 0.0018555401246936978, "loss": 0.224, "step": 18374 }, { "epoch": 0.03258216686658173, "grad_norm": 0.28125, "learning_rate": 0.0018555077819984403, "loss": 0.1635, "step": 18376 }, { "epoch": 0.032585713031891554, "grad_norm": 0.38671875, "learning_rate": 0.0018554754359990302, "loss": 0.1918, "step": 18378 }, { "epoch": 0.03258925919720137, "grad_norm": 0.275390625, "learning_rate": 0.0018554430866956097, "loss": 0.2205, "step": 18380 }, { "epoch": 0.03259280536251118, "grad_norm": 0.205078125, "learning_rate": 0.0018554107340883201, "loss": 0.222, "step": 18382 }, { "epoch": 0.032596351527821, "grad_norm": 2.625, "learning_rate": 0.0018553783781773026, "loss": 0.2785, "step": 18384 }, { "epoch": 0.03259989769313081, "grad_norm": 0.51171875, "learning_rate": 0.0018553460189626987, "loss": 0.2235, "step": 18386 }, { "epoch": 0.03260344385844063, "grad_norm": 0.35546875, "learning_rate": 0.0018553136564446503, "loss": 0.2468, "step": 18388 }, { "epoch": 0.03260699002375044, "grad_norm": 1.4140625, "learning_rate": 0.0018552812906232985, "loss": 0.3162, "step": 18390 }, { "epoch": 0.032610536189060256, "grad_norm": 1.125, "learning_rate": 0.0018552489214987851, "loss": 0.2348, "step": 18392 }, { "epoch": 0.03261408235437007, "grad_norm": 0.244140625, "learning_rate": 0.0018552165490712518, "loss": 0.1933, "step": 18394 }, { "epoch": 0.032617628519679885, "grad_norm": 1.3046875, "learning_rate": 0.00185518417334084, "loss": 0.1257, "step": 18396 }, { "epoch": 0.0326211746849897, "grad_norm": 0.59375, "learning_rate": 0.001855151794307691, "loss": 0.1953, "step": 18398 }, { "epoch": 0.03262472085029952, "grad_norm": 0.275390625, "learning_rate": 0.0018551194119719467, "loss": 0.2064, "step": 18400 }, { "epoch": 0.032628267015609336, "grad_norm": 0.6484375, "learning_rate": 0.0018550870263337493, "loss": 0.2442, "step": 18402 }, { "epoch": 0.03263181318091915, "grad_norm": 0.380859375, "learning_rate": 0.0018550546373932392, "loss": 0.261, "step": 18404 }, { "epoch": 0.032635359346228965, "grad_norm": 0.66015625, "learning_rate": 0.001855022245150559, "loss": 0.2579, "step": 18406 }, { "epoch": 0.03263890551153878, "grad_norm": 0.6484375, "learning_rate": 0.0018549898496058497, "loss": 0.1617, "step": 18408 }, { "epoch": 0.032642451676848594, "grad_norm": 0.9921875, "learning_rate": 0.0018549574507592537, "loss": 0.3228, "step": 18410 }, { "epoch": 0.03264599784215841, "grad_norm": 0.2353515625, "learning_rate": 0.0018549250486109123, "loss": 0.2052, "step": 18412 }, { "epoch": 0.03264954400746822, "grad_norm": 0.4921875, "learning_rate": 0.0018548926431609673, "loss": 0.2159, "step": 18414 }, { "epoch": 0.03265309017277804, "grad_norm": 0.37109375, "learning_rate": 0.0018548602344095604, "loss": 0.1717, "step": 18416 }, { "epoch": 0.03265663633808785, "grad_norm": 0.76953125, "learning_rate": 0.0018548278223568333, "loss": 0.169, "step": 18418 }, { "epoch": 0.03266018250339767, "grad_norm": 0.5390625, "learning_rate": 0.0018547954070029277, "loss": 0.1941, "step": 18420 }, { "epoch": 0.03266372866870749, "grad_norm": 0.5078125, "learning_rate": 0.0018547629883479853, "loss": 0.2066, "step": 18422 }, { "epoch": 0.0326672748340173, "grad_norm": 2.0, "learning_rate": 0.0018547305663921478, "loss": 0.3647, "step": 18424 }, { "epoch": 0.03267082099932712, "grad_norm": 2.671875, "learning_rate": 0.001854698141135558, "loss": 0.359, "step": 18426 }, { "epoch": 0.03267436716463693, "grad_norm": 0.5, "learning_rate": 0.0018546657125783563, "loss": 0.1781, "step": 18428 }, { "epoch": 0.032677913329946746, "grad_norm": 0.40234375, "learning_rate": 0.0018546332807206853, "loss": 0.198, "step": 18430 }, { "epoch": 0.03268145949525656, "grad_norm": 0.546875, "learning_rate": 0.0018546008455626866, "loss": 0.1844, "step": 18432 }, { "epoch": 0.032685005660566376, "grad_norm": 0.435546875, "learning_rate": 0.001854568407104502, "loss": 0.285, "step": 18434 }, { "epoch": 0.03268855182587619, "grad_norm": 1.2109375, "learning_rate": 0.0018545359653462737, "loss": 0.3232, "step": 18436 }, { "epoch": 0.032692097991186005, "grad_norm": 1.65625, "learning_rate": 0.0018545035202881435, "loss": 0.354, "step": 18438 }, { "epoch": 0.03269564415649582, "grad_norm": 0.90234375, "learning_rate": 0.0018544710719302529, "loss": 0.2312, "step": 18440 }, { "epoch": 0.032699190321805634, "grad_norm": 0.326171875, "learning_rate": 0.0018544386202727441, "loss": 0.2175, "step": 18442 }, { "epoch": 0.03270273648711545, "grad_norm": 0.34765625, "learning_rate": 0.0018544061653157592, "loss": 0.1953, "step": 18444 }, { "epoch": 0.03270628265242527, "grad_norm": 0.51171875, "learning_rate": 0.0018543737070594397, "loss": 0.2682, "step": 18446 }, { "epoch": 0.032709828817735084, "grad_norm": 2.421875, "learning_rate": 0.001854341245503928, "loss": 0.221, "step": 18448 }, { "epoch": 0.0327133749830449, "grad_norm": 0.5390625, "learning_rate": 0.0018543087806493657, "loss": 0.2892, "step": 18450 }, { "epoch": 0.03271692114835471, "grad_norm": 1.8203125, "learning_rate": 0.001854276312495895, "loss": 0.5521, "step": 18452 }, { "epoch": 0.03272046731366453, "grad_norm": 0.56640625, "learning_rate": 0.0018542438410436579, "loss": 0.4132, "step": 18454 }, { "epoch": 0.03272401347897434, "grad_norm": 1.296875, "learning_rate": 0.0018542113662927961, "loss": 0.1982, "step": 18456 }, { "epoch": 0.03272755964428416, "grad_norm": 0.8046875, "learning_rate": 0.0018541788882434523, "loss": 0.2705, "step": 18458 }, { "epoch": 0.03273110580959397, "grad_norm": 0.1591796875, "learning_rate": 0.001854146406895768, "loss": 0.1707, "step": 18460 }, { "epoch": 0.032734651974903786, "grad_norm": 2.015625, "learning_rate": 0.0018541139222498853, "loss": 0.3587, "step": 18462 }, { "epoch": 0.0327381981402136, "grad_norm": 0.2373046875, "learning_rate": 0.0018540814343059465, "loss": 0.2082, "step": 18464 }, { "epoch": 0.032741744305523415, "grad_norm": 0.376953125, "learning_rate": 0.0018540489430640932, "loss": 0.1517, "step": 18466 }, { "epoch": 0.03274529047083324, "grad_norm": 0.53515625, "learning_rate": 0.0018540164485244682, "loss": 0.2393, "step": 18468 }, { "epoch": 0.03274883663614305, "grad_norm": 0.96484375, "learning_rate": 0.001853983950687213, "loss": 0.1924, "step": 18470 }, { "epoch": 0.032752382801452866, "grad_norm": 0.453125, "learning_rate": 0.00185395144955247, "loss": 0.1957, "step": 18472 }, { "epoch": 0.03275592896676268, "grad_norm": 0.40625, "learning_rate": 0.0018539189451203815, "loss": 0.2412, "step": 18474 }, { "epoch": 0.032759475132072495, "grad_norm": 0.63671875, "learning_rate": 0.0018538864373910892, "loss": 0.353, "step": 18476 }, { "epoch": 0.03276302129738231, "grad_norm": 0.462890625, "learning_rate": 0.0018538539263647355, "loss": 0.3638, "step": 18478 }, { "epoch": 0.032766567462692124, "grad_norm": 1.0625, "learning_rate": 0.0018538214120414629, "loss": 0.2229, "step": 18480 }, { "epoch": 0.03277011362800194, "grad_norm": 0.828125, "learning_rate": 0.0018537888944214131, "loss": 0.2285, "step": 18482 }, { "epoch": 0.03277365979331175, "grad_norm": 0.65234375, "learning_rate": 0.0018537563735047287, "loss": 0.2865, "step": 18484 }, { "epoch": 0.03277720595862157, "grad_norm": 0.443359375, "learning_rate": 0.0018537238492915516, "loss": 0.1738, "step": 18486 }, { "epoch": 0.03278075212393138, "grad_norm": 0.6484375, "learning_rate": 0.0018536913217820242, "loss": 0.223, "step": 18488 }, { "epoch": 0.032784298289241204, "grad_norm": 0.279296875, "learning_rate": 0.0018536587909762888, "loss": 0.2491, "step": 18490 }, { "epoch": 0.03278784445455102, "grad_norm": 0.337890625, "learning_rate": 0.0018536262568744878, "loss": 0.3263, "step": 18492 }, { "epoch": 0.03279139061986083, "grad_norm": 0.5078125, "learning_rate": 0.001853593719476763, "loss": 0.2398, "step": 18494 }, { "epoch": 0.03279493678517065, "grad_norm": 1.3984375, "learning_rate": 0.0018535611787832572, "loss": 0.2213, "step": 18496 }, { "epoch": 0.03279848295048046, "grad_norm": 0.4140625, "learning_rate": 0.0018535286347941126, "loss": 0.2515, "step": 18498 }, { "epoch": 0.03280202911579028, "grad_norm": 0.4765625, "learning_rate": 0.001853496087509471, "loss": 0.3427, "step": 18500 }, { "epoch": 0.03280557528110009, "grad_norm": 0.28515625, "learning_rate": 0.0018534635369294756, "loss": 0.2382, "step": 18502 }, { "epoch": 0.032809121446409906, "grad_norm": 0.43359375, "learning_rate": 0.001853430983054268, "loss": 0.1691, "step": 18504 }, { "epoch": 0.03281266761171972, "grad_norm": 0.2734375, "learning_rate": 0.0018533984258839915, "loss": 0.1911, "step": 18506 }, { "epoch": 0.032816213777029535, "grad_norm": 1.1953125, "learning_rate": 0.0018533658654187877, "loss": 0.2575, "step": 18508 }, { "epoch": 0.03281975994233935, "grad_norm": 0.1611328125, "learning_rate": 0.0018533333016587993, "loss": 0.1656, "step": 18510 }, { "epoch": 0.032823306107649164, "grad_norm": 0.65625, "learning_rate": 0.001853300734604168, "loss": 0.2313, "step": 18512 }, { "epoch": 0.032826852272958985, "grad_norm": 0.484375, "learning_rate": 0.0018532681642550375, "loss": 0.25, "step": 18514 }, { "epoch": 0.0328303984382688, "grad_norm": 1.296875, "learning_rate": 0.0018532355906115494, "loss": 0.3014, "step": 18516 }, { "epoch": 0.032833944603578614, "grad_norm": 0.294921875, "learning_rate": 0.0018532030136738464, "loss": 0.4298, "step": 18518 }, { "epoch": 0.03283749076888843, "grad_norm": 0.99609375, "learning_rate": 0.0018531704334420708, "loss": 0.2297, "step": 18520 }, { "epoch": 0.032841036934198244, "grad_norm": 0.26171875, "learning_rate": 0.001853137849916365, "loss": 0.2198, "step": 18522 }, { "epoch": 0.03284458309950806, "grad_norm": 1.203125, "learning_rate": 0.0018531052630968723, "loss": 0.2834, "step": 18524 }, { "epoch": 0.03284812926481787, "grad_norm": 0.69921875, "learning_rate": 0.0018530726729837344, "loss": 0.1838, "step": 18526 }, { "epoch": 0.03285167543012769, "grad_norm": 0.73046875, "learning_rate": 0.001853040079577094, "loss": 0.25, "step": 18528 }, { "epoch": 0.0328552215954375, "grad_norm": 0.474609375, "learning_rate": 0.0018530074828770938, "loss": 0.1778, "step": 18530 }, { "epoch": 0.032858767760747316, "grad_norm": 0.251953125, "learning_rate": 0.001852974882883876, "loss": 0.2248, "step": 18532 }, { "epoch": 0.03286231392605713, "grad_norm": 0.41015625, "learning_rate": 0.0018529422795975836, "loss": 0.1937, "step": 18534 }, { "epoch": 0.03286586009136695, "grad_norm": 0.5625, "learning_rate": 0.0018529096730183592, "loss": 0.5235, "step": 18536 }, { "epoch": 0.03286940625667677, "grad_norm": 0.470703125, "learning_rate": 0.001852877063146345, "loss": 0.2376, "step": 18538 }, { "epoch": 0.03287295242198658, "grad_norm": 0.41796875, "learning_rate": 0.001852844449981684, "loss": 0.2307, "step": 18540 }, { "epoch": 0.032876498587296396, "grad_norm": 0.2265625, "learning_rate": 0.0018528118335245187, "loss": 0.2056, "step": 18542 }, { "epoch": 0.03288004475260621, "grad_norm": 0.333984375, "learning_rate": 0.0018527792137749918, "loss": 0.2216, "step": 18544 }, { "epoch": 0.032883590917916025, "grad_norm": 1.09375, "learning_rate": 0.0018527465907332457, "loss": 0.3162, "step": 18546 }, { "epoch": 0.03288713708322584, "grad_norm": 0.357421875, "learning_rate": 0.0018527139643994234, "loss": 0.2156, "step": 18548 }, { "epoch": 0.032890683248535654, "grad_norm": 0.435546875, "learning_rate": 0.0018526813347736672, "loss": 0.1578, "step": 18550 }, { "epoch": 0.03289422941384547, "grad_norm": 1.390625, "learning_rate": 0.0018526487018561205, "loss": 0.4636, "step": 18552 }, { "epoch": 0.03289777557915528, "grad_norm": 1.5625, "learning_rate": 0.0018526160656469253, "loss": 0.2631, "step": 18554 }, { "epoch": 0.0329013217444651, "grad_norm": 0.298828125, "learning_rate": 0.0018525834261462243, "loss": 0.3989, "step": 18556 }, { "epoch": 0.03290486790977492, "grad_norm": 0.40625, "learning_rate": 0.0018525507833541607, "loss": 0.2258, "step": 18558 }, { "epoch": 0.032908414075084734, "grad_norm": 1.171875, "learning_rate": 0.0018525181372708772, "loss": 0.1785, "step": 18560 }, { "epoch": 0.03291196024039455, "grad_norm": 0.29296875, "learning_rate": 0.0018524854878965169, "loss": 0.1763, "step": 18562 }, { "epoch": 0.03291550640570436, "grad_norm": 0.6328125, "learning_rate": 0.0018524528352312215, "loss": 0.3331, "step": 18564 }, { "epoch": 0.03291905257101418, "grad_norm": 0.7890625, "learning_rate": 0.0018524201792751347, "loss": 0.1974, "step": 18566 }, { "epoch": 0.03292259873632399, "grad_norm": 0.23828125, "learning_rate": 0.0018523875200283993, "loss": 0.1745, "step": 18568 }, { "epoch": 0.03292614490163381, "grad_norm": 0.6875, "learning_rate": 0.0018523548574911578, "loss": 0.2699, "step": 18570 }, { "epoch": 0.03292969106694362, "grad_norm": 0.48046875, "learning_rate": 0.0018523221916635533, "loss": 0.2172, "step": 18572 }, { "epoch": 0.032933237232253436, "grad_norm": 2.46875, "learning_rate": 0.0018522895225457283, "loss": 0.2807, "step": 18574 }, { "epoch": 0.03293678339756325, "grad_norm": 0.46484375, "learning_rate": 0.0018522568501378258, "loss": 0.2051, "step": 18576 }, { "epoch": 0.032940329562873065, "grad_norm": 0.2353515625, "learning_rate": 0.0018522241744399893, "loss": 0.2094, "step": 18578 }, { "epoch": 0.03294387572818288, "grad_norm": 1.3828125, "learning_rate": 0.0018521914954523606, "loss": 0.2467, "step": 18580 }, { "epoch": 0.0329474218934927, "grad_norm": 0.39453125, "learning_rate": 0.0018521588131750835, "loss": 0.2121, "step": 18582 }, { "epoch": 0.032950968058802516, "grad_norm": 0.455078125, "learning_rate": 0.0018521261276083008, "loss": 0.228, "step": 18584 }, { "epoch": 0.03295451422411233, "grad_norm": 0.306640625, "learning_rate": 0.0018520934387521552, "loss": 0.2836, "step": 18586 }, { "epoch": 0.032958060389422145, "grad_norm": 0.283203125, "learning_rate": 0.0018520607466067896, "loss": 0.1817, "step": 18588 }, { "epoch": 0.03296160655473196, "grad_norm": 0.5234375, "learning_rate": 0.0018520280511723475, "loss": 0.193, "step": 18590 }, { "epoch": 0.032965152720041774, "grad_norm": 1.4921875, "learning_rate": 0.0018519953524489712, "loss": 0.2192, "step": 18592 }, { "epoch": 0.03296869888535159, "grad_norm": 0.94140625, "learning_rate": 0.0018519626504368044, "loss": 0.2242, "step": 18594 }, { "epoch": 0.0329722450506614, "grad_norm": 0.578125, "learning_rate": 0.0018519299451359894, "loss": 0.1593, "step": 18596 }, { "epoch": 0.03297579121597122, "grad_norm": 0.546875, "learning_rate": 0.0018518972365466698, "loss": 0.2202, "step": 18598 }, { "epoch": 0.03297933738128103, "grad_norm": 0.318359375, "learning_rate": 0.0018518645246689883, "loss": 0.3765, "step": 18600 }, { "epoch": 0.03298288354659085, "grad_norm": 1.140625, "learning_rate": 0.0018518318095030882, "loss": 0.2371, "step": 18602 }, { "epoch": 0.03298642971190067, "grad_norm": 0.33984375, "learning_rate": 0.0018517990910491126, "loss": 0.1665, "step": 18604 }, { "epoch": 0.03298997587721048, "grad_norm": 0.51171875, "learning_rate": 0.0018517663693072043, "loss": 0.2465, "step": 18606 }, { "epoch": 0.0329935220425203, "grad_norm": 0.375, "learning_rate": 0.0018517336442775065, "loss": 0.214, "step": 18608 }, { "epoch": 0.03299706820783011, "grad_norm": 0.330078125, "learning_rate": 0.0018517009159601623, "loss": 0.217, "step": 18610 }, { "epoch": 0.033000614373139926, "grad_norm": 1.046875, "learning_rate": 0.0018516681843553152, "loss": 0.2101, "step": 18612 }, { "epoch": 0.03300416053844974, "grad_norm": 0.34375, "learning_rate": 0.001851635449463108, "loss": 0.1815, "step": 18614 }, { "epoch": 0.033007706703759555, "grad_norm": 0.54296875, "learning_rate": 0.0018516027112836838, "loss": 0.1736, "step": 18616 }, { "epoch": 0.03301125286906937, "grad_norm": 0.36328125, "learning_rate": 0.0018515699698171862, "loss": 0.19, "step": 18618 }, { "epoch": 0.033014799034379184, "grad_norm": 0.29296875, "learning_rate": 0.0018515372250637579, "loss": 0.2253, "step": 18620 }, { "epoch": 0.033018345199689, "grad_norm": 1.265625, "learning_rate": 0.0018515044770235425, "loss": 0.215, "step": 18622 }, { "epoch": 0.033021891364998814, "grad_norm": 1.03125, "learning_rate": 0.0018514717256966828, "loss": 0.4838, "step": 18624 }, { "epoch": 0.033025437530308635, "grad_norm": 0.83203125, "learning_rate": 0.0018514389710833227, "loss": 0.1887, "step": 18626 }, { "epoch": 0.03302898369561845, "grad_norm": 0.578125, "learning_rate": 0.0018514062131836048, "loss": 0.2011, "step": 18628 }, { "epoch": 0.033032529860928264, "grad_norm": 0.5859375, "learning_rate": 0.0018513734519976723, "loss": 0.1552, "step": 18630 }, { "epoch": 0.03303607602623808, "grad_norm": 0.58203125, "learning_rate": 0.0018513406875256694, "loss": 0.2178, "step": 18632 }, { "epoch": 0.03303962219154789, "grad_norm": 0.734375, "learning_rate": 0.0018513079197677383, "loss": 0.1976, "step": 18634 }, { "epoch": 0.03304316835685771, "grad_norm": 0.76953125, "learning_rate": 0.001851275148724023, "loss": 0.2241, "step": 18636 }, { "epoch": 0.03304671452216752, "grad_norm": 1.25, "learning_rate": 0.0018512423743946664, "loss": 0.2003, "step": 18638 }, { "epoch": 0.03305026068747734, "grad_norm": 1.0703125, "learning_rate": 0.0018512095967798121, "loss": 0.2308, "step": 18640 }, { "epoch": 0.03305380685278715, "grad_norm": 0.765625, "learning_rate": 0.0018511768158796032, "loss": 0.2545, "step": 18642 }, { "epoch": 0.033057353018096966, "grad_norm": 0.41796875, "learning_rate": 0.0018511440316941834, "loss": 0.2928, "step": 18644 }, { "epoch": 0.03306089918340678, "grad_norm": 1.5859375, "learning_rate": 0.0018511112442236959, "loss": 0.2709, "step": 18646 }, { "epoch": 0.033064445348716595, "grad_norm": 0.326171875, "learning_rate": 0.0018510784534682841, "loss": 0.1298, "step": 18648 }, { "epoch": 0.03306799151402642, "grad_norm": 0.349609375, "learning_rate": 0.0018510456594280915, "loss": 0.2339, "step": 18650 }, { "epoch": 0.03307153767933623, "grad_norm": 0.416015625, "learning_rate": 0.0018510128621032616, "loss": 0.151, "step": 18652 }, { "epoch": 0.033075083844646046, "grad_norm": 0.61328125, "learning_rate": 0.0018509800614939374, "loss": 0.2144, "step": 18654 }, { "epoch": 0.03307863000995586, "grad_norm": 0.271484375, "learning_rate": 0.0018509472576002627, "loss": 0.1915, "step": 18656 }, { "epoch": 0.033082176175265675, "grad_norm": 0.76953125, "learning_rate": 0.0018509144504223808, "loss": 0.1852, "step": 18658 }, { "epoch": 0.03308572234057549, "grad_norm": 0.3125, "learning_rate": 0.0018508816399604353, "loss": 0.1801, "step": 18660 }, { "epoch": 0.033089268505885304, "grad_norm": 0.60546875, "learning_rate": 0.0018508488262145703, "loss": 0.2263, "step": 18662 }, { "epoch": 0.03309281467119512, "grad_norm": 0.59765625, "learning_rate": 0.0018508160091849277, "loss": 0.2726, "step": 18664 }, { "epoch": 0.03309636083650493, "grad_norm": 0.58203125, "learning_rate": 0.0018507831888716526, "loss": 0.2189, "step": 18666 }, { "epoch": 0.03309990700181475, "grad_norm": 0.6171875, "learning_rate": 0.0018507503652748878, "loss": 0.2005, "step": 18668 }, { "epoch": 0.03310345316712456, "grad_norm": 0.6484375, "learning_rate": 0.0018507175383947767, "loss": 0.211, "step": 18670 }, { "epoch": 0.033106999332434384, "grad_norm": 0.45703125, "learning_rate": 0.0018506847082314637, "loss": 0.201, "step": 18672 }, { "epoch": 0.0331105454977442, "grad_norm": 0.275390625, "learning_rate": 0.0018506518747850916, "loss": 0.1901, "step": 18674 }, { "epoch": 0.03311409166305401, "grad_norm": 0.38671875, "learning_rate": 0.0018506190380558041, "loss": 0.1831, "step": 18676 }, { "epoch": 0.03311763782836383, "grad_norm": 0.4296875, "learning_rate": 0.001850586198043745, "loss": 0.2073, "step": 18678 }, { "epoch": 0.03312118399367364, "grad_norm": 1.703125, "learning_rate": 0.0018505533547490578, "loss": 0.2433, "step": 18680 }, { "epoch": 0.033124730158983456, "grad_norm": 0.22265625, "learning_rate": 0.0018505205081718865, "loss": 0.2346, "step": 18682 }, { "epoch": 0.03312827632429327, "grad_norm": 0.45703125, "learning_rate": 0.0018504876583123743, "loss": 0.2053, "step": 18684 }, { "epoch": 0.033131822489603086, "grad_norm": 0.390625, "learning_rate": 0.0018504548051706648, "loss": 0.2083, "step": 18686 }, { "epoch": 0.0331353686549129, "grad_norm": 0.470703125, "learning_rate": 0.001850421948746902, "loss": 0.1528, "step": 18688 }, { "epoch": 0.033138914820222715, "grad_norm": 0.341796875, "learning_rate": 0.0018503890890412295, "loss": 0.1885, "step": 18690 }, { "epoch": 0.03314246098553253, "grad_norm": 0.412109375, "learning_rate": 0.0018503562260537914, "loss": 0.227, "step": 18692 }, { "epoch": 0.03314600715084235, "grad_norm": 0.2470703125, "learning_rate": 0.0018503233597847307, "loss": 0.2024, "step": 18694 }, { "epoch": 0.033149553316152165, "grad_norm": 0.373046875, "learning_rate": 0.0018502904902341912, "loss": 0.1938, "step": 18696 }, { "epoch": 0.03315309948146198, "grad_norm": 1.2265625, "learning_rate": 0.001850257617402317, "loss": 0.2967, "step": 18698 }, { "epoch": 0.033156645646771794, "grad_norm": 1.59375, "learning_rate": 0.0018502247412892521, "loss": 0.2787, "step": 18700 }, { "epoch": 0.03316019181208161, "grad_norm": 0.79296875, "learning_rate": 0.0018501918618951398, "loss": 0.2648, "step": 18702 }, { "epoch": 0.03316373797739142, "grad_norm": 1.0078125, "learning_rate": 0.001850158979220124, "loss": 0.189, "step": 18704 }, { "epoch": 0.03316728414270124, "grad_norm": 0.44921875, "learning_rate": 0.0018501260932643488, "loss": 0.1882, "step": 18706 }, { "epoch": 0.03317083030801105, "grad_norm": 0.3515625, "learning_rate": 0.0018500932040279576, "loss": 0.1915, "step": 18708 }, { "epoch": 0.03317437647332087, "grad_norm": 0.5078125, "learning_rate": 0.0018500603115110949, "loss": 0.1916, "step": 18710 }, { "epoch": 0.03317792263863068, "grad_norm": 0.412109375, "learning_rate": 0.0018500274157139032, "loss": 0.2237, "step": 18712 }, { "epoch": 0.033181468803940496, "grad_norm": 0.48046875, "learning_rate": 0.001849994516636528, "loss": 0.2083, "step": 18714 }, { "epoch": 0.03318501496925031, "grad_norm": 0.578125, "learning_rate": 0.0018499616142791117, "loss": 0.2221, "step": 18716 }, { "epoch": 0.03318856113456013, "grad_norm": 0.86328125, "learning_rate": 0.0018499287086417998, "loss": 0.3681, "step": 18718 }, { "epoch": 0.03319210729986995, "grad_norm": 0.369140625, "learning_rate": 0.0018498957997247348, "loss": 0.2953, "step": 18720 }, { "epoch": 0.03319565346517976, "grad_norm": 0.423828125, "learning_rate": 0.001849862887528061, "loss": 0.2633, "step": 18722 }, { "epoch": 0.033199199630489576, "grad_norm": 0.3828125, "learning_rate": 0.001849829972051923, "loss": 0.2133, "step": 18724 }, { "epoch": 0.03320274579579939, "grad_norm": 0.3203125, "learning_rate": 0.001849797053296464, "loss": 0.235, "step": 18726 }, { "epoch": 0.033206291961109205, "grad_norm": 0.2236328125, "learning_rate": 0.0018497641312618283, "loss": 0.2189, "step": 18728 }, { "epoch": 0.03320983812641902, "grad_norm": 0.279296875, "learning_rate": 0.0018497312059481596, "loss": 0.1839, "step": 18730 }, { "epoch": 0.033213384291728834, "grad_norm": 0.7265625, "learning_rate": 0.0018496982773556023, "loss": 0.2953, "step": 18732 }, { "epoch": 0.03321693045703865, "grad_norm": 0.4609375, "learning_rate": 0.0018496653454843004, "loss": 0.1469, "step": 18734 }, { "epoch": 0.03322047662234846, "grad_norm": 3.1875, "learning_rate": 0.0018496324103343972, "loss": 0.3843, "step": 18736 }, { "epoch": 0.03322402278765828, "grad_norm": 0.392578125, "learning_rate": 0.0018495994719060378, "loss": 0.2196, "step": 18738 }, { "epoch": 0.0332275689529681, "grad_norm": 1.7578125, "learning_rate": 0.0018495665301993653, "loss": 0.4872, "step": 18740 }, { "epoch": 0.033231115118277914, "grad_norm": 0.57421875, "learning_rate": 0.0018495335852145246, "loss": 0.2595, "step": 18742 }, { "epoch": 0.03323466128358773, "grad_norm": 0.306640625, "learning_rate": 0.0018495006369516592, "loss": 0.2142, "step": 18744 }, { "epoch": 0.03323820744889754, "grad_norm": 3.171875, "learning_rate": 0.0018494676854109134, "loss": 0.3017, "step": 18746 }, { "epoch": 0.03324175361420736, "grad_norm": 0.37109375, "learning_rate": 0.001849434730592431, "loss": 0.1897, "step": 18748 }, { "epoch": 0.03324529977951717, "grad_norm": 0.5625, "learning_rate": 0.0018494017724963567, "loss": 0.2462, "step": 18750 }, { "epoch": 0.03324884594482699, "grad_norm": 0.546875, "learning_rate": 0.0018493688111228346, "loss": 0.2316, "step": 18752 }, { "epoch": 0.0332523921101368, "grad_norm": 0.380859375, "learning_rate": 0.001849335846472008, "loss": 0.1849, "step": 18754 }, { "epoch": 0.033255938275446616, "grad_norm": 0.62109375, "learning_rate": 0.001849302878544022, "loss": 0.2454, "step": 18756 }, { "epoch": 0.03325948444075643, "grad_norm": 0.84375, "learning_rate": 0.0018492699073390205, "loss": 0.2996, "step": 18758 }, { "epoch": 0.033263030606066245, "grad_norm": 0.416015625, "learning_rate": 0.0018492369328571476, "loss": 0.25, "step": 18760 }, { "epoch": 0.033266576771376066, "grad_norm": 2.78125, "learning_rate": 0.0018492039550985476, "loss": 0.2499, "step": 18762 }, { "epoch": 0.03327012293668588, "grad_norm": 0.734375, "learning_rate": 0.0018491709740633647, "loss": 0.2176, "step": 18764 }, { "epoch": 0.033273669101995695, "grad_norm": 1.046875, "learning_rate": 0.001849137989751743, "loss": 0.2178, "step": 18766 }, { "epoch": 0.03327721526730551, "grad_norm": 1.484375, "learning_rate": 0.001849105002163827, "loss": 0.179, "step": 18768 }, { "epoch": 0.033280761432615324, "grad_norm": 0.9765625, "learning_rate": 0.0018490720112997608, "loss": 0.2716, "step": 18770 }, { "epoch": 0.03328430759792514, "grad_norm": 0.375, "learning_rate": 0.0018490390171596886, "loss": 0.2062, "step": 18772 }, { "epoch": 0.033287853763234954, "grad_norm": 0.6171875, "learning_rate": 0.001849006019743755, "loss": 0.3005, "step": 18774 }, { "epoch": 0.03329139992854477, "grad_norm": 0.44921875, "learning_rate": 0.0018489730190521044, "loss": 0.2306, "step": 18776 }, { "epoch": 0.03329494609385458, "grad_norm": 0.57421875, "learning_rate": 0.0018489400150848805, "loss": 0.1936, "step": 18778 }, { "epoch": 0.0332984922591644, "grad_norm": 0.60546875, "learning_rate": 0.001848907007842228, "loss": 0.2347, "step": 18780 }, { "epoch": 0.03330203842447421, "grad_norm": 0.29296875, "learning_rate": 0.0018488739973242915, "loss": 0.1813, "step": 18782 }, { "epoch": 0.033305584589784026, "grad_norm": 1.015625, "learning_rate": 0.001848840983531215, "loss": 0.2902, "step": 18784 }, { "epoch": 0.03330913075509385, "grad_norm": 0.353515625, "learning_rate": 0.001848807966463143, "loss": 0.2607, "step": 18786 }, { "epoch": 0.03331267692040366, "grad_norm": 0.74609375, "learning_rate": 0.00184877494612022, "loss": 0.4356, "step": 18788 }, { "epoch": 0.03331622308571348, "grad_norm": 0.6015625, "learning_rate": 0.0018487419225025903, "loss": 0.1875, "step": 18790 }, { "epoch": 0.03331976925102329, "grad_norm": 0.4296875, "learning_rate": 0.0018487088956103982, "loss": 0.1902, "step": 18792 }, { "epoch": 0.033323315416333106, "grad_norm": 0.3203125, "learning_rate": 0.0018486758654437886, "loss": 0.2309, "step": 18794 }, { "epoch": 0.03332686158164292, "grad_norm": 0.462890625, "learning_rate": 0.0018486428320029053, "loss": 0.2175, "step": 18796 }, { "epoch": 0.033330407746952735, "grad_norm": 0.85546875, "learning_rate": 0.0018486097952878932, "loss": 0.2416, "step": 18798 }, { "epoch": 0.03333395391226255, "grad_norm": 0.62109375, "learning_rate": 0.0018485767552988968, "loss": 0.2062, "step": 18800 }, { "epoch": 0.033337500077572364, "grad_norm": 0.4453125, "learning_rate": 0.0018485437120360603, "loss": 0.2121, "step": 18802 }, { "epoch": 0.03334104624288218, "grad_norm": 0.5703125, "learning_rate": 0.0018485106654995288, "loss": 0.2597, "step": 18804 }, { "epoch": 0.03334459240819199, "grad_norm": 0.25390625, "learning_rate": 0.0018484776156894463, "loss": 0.3383, "step": 18806 }, { "epoch": 0.033348138573501815, "grad_norm": 0.72265625, "learning_rate": 0.0018484445626059573, "loss": 0.2085, "step": 18808 }, { "epoch": 0.03335168473881163, "grad_norm": 0.60546875, "learning_rate": 0.0018484115062492067, "loss": 0.2383, "step": 18810 }, { "epoch": 0.033355230904121444, "grad_norm": 0.7265625, "learning_rate": 0.0018483784466193386, "loss": 0.1899, "step": 18812 }, { "epoch": 0.03335877706943126, "grad_norm": 0.2177734375, "learning_rate": 0.0018483453837164984, "loss": 0.2314, "step": 18814 }, { "epoch": 0.03336232323474107, "grad_norm": 0.306640625, "learning_rate": 0.0018483123175408295, "loss": 0.1807, "step": 18816 }, { "epoch": 0.03336586940005089, "grad_norm": 1.3046875, "learning_rate": 0.001848279248092478, "loss": 0.1743, "step": 18818 }, { "epoch": 0.0333694155653607, "grad_norm": 1.015625, "learning_rate": 0.001848246175371587, "loss": 0.2333, "step": 18820 }, { "epoch": 0.03337296173067052, "grad_norm": 0.392578125, "learning_rate": 0.0018482130993783025, "loss": 0.188, "step": 18822 }, { "epoch": 0.03337650789598033, "grad_norm": 0.296875, "learning_rate": 0.001848180020112768, "loss": 0.2444, "step": 18824 }, { "epoch": 0.033380054061290146, "grad_norm": 4.96875, "learning_rate": 0.001848146937575129, "loss": 0.4252, "step": 18826 }, { "epoch": 0.03338360022659996, "grad_norm": 2.515625, "learning_rate": 0.0018481138517655296, "loss": 0.1596, "step": 18828 }, { "epoch": 0.03338714639190978, "grad_norm": 0.404296875, "learning_rate": 0.0018480807626841149, "loss": 0.2782, "step": 18830 }, { "epoch": 0.033390692557219596, "grad_norm": 0.318359375, "learning_rate": 0.0018480476703310296, "loss": 0.3535, "step": 18832 }, { "epoch": 0.03339423872252941, "grad_norm": 0.51171875, "learning_rate": 0.0018480145747064181, "loss": 0.1631, "step": 18834 }, { "epoch": 0.033397784887839226, "grad_norm": 0.33984375, "learning_rate": 0.0018479814758104256, "loss": 0.1894, "step": 18836 }, { "epoch": 0.03340133105314904, "grad_norm": 0.51171875, "learning_rate": 0.0018479483736431963, "loss": 0.2116, "step": 18838 }, { "epoch": 0.033404877218458855, "grad_norm": 1.1328125, "learning_rate": 0.0018479152682048757, "loss": 0.2601, "step": 18840 }, { "epoch": 0.03340842338376867, "grad_norm": 0.6171875, "learning_rate": 0.0018478821594956076, "loss": 0.2387, "step": 18842 }, { "epoch": 0.033411969549078484, "grad_norm": 0.50390625, "learning_rate": 0.0018478490475155379, "loss": 0.1727, "step": 18844 }, { "epoch": 0.0334155157143883, "grad_norm": 0.458984375, "learning_rate": 0.0018478159322648105, "loss": 0.1703, "step": 18846 }, { "epoch": 0.03341906187969811, "grad_norm": 19.25, "learning_rate": 0.0018477828137435706, "loss": 0.2858, "step": 18848 }, { "epoch": 0.03342260804500793, "grad_norm": 0.44921875, "learning_rate": 0.0018477496919519633, "loss": 0.1944, "step": 18850 }, { "epoch": 0.03342615421031774, "grad_norm": 0.53125, "learning_rate": 0.0018477165668901328, "loss": 0.2487, "step": 18852 }, { "epoch": 0.03342970037562756, "grad_norm": 0.6484375, "learning_rate": 0.0018476834385582246, "loss": 0.2345, "step": 18854 }, { "epoch": 0.03343324654093738, "grad_norm": 1.3515625, "learning_rate": 0.0018476503069563834, "loss": 0.3625, "step": 18856 }, { "epoch": 0.03343679270624719, "grad_norm": 0.67578125, "learning_rate": 0.001847617172084754, "loss": 0.1696, "step": 18858 }, { "epoch": 0.03344033887155701, "grad_norm": 0.89453125, "learning_rate": 0.0018475840339434813, "loss": 0.221, "step": 18860 }, { "epoch": 0.03344388503686682, "grad_norm": 1.25, "learning_rate": 0.0018475508925327104, "loss": 0.2301, "step": 18862 }, { "epoch": 0.033447431202176636, "grad_norm": 0.8671875, "learning_rate": 0.0018475177478525862, "loss": 0.2339, "step": 18864 }, { "epoch": 0.03345097736748645, "grad_norm": 1.0234375, "learning_rate": 0.0018474845999032533, "loss": 0.2813, "step": 18866 }, { "epoch": 0.033454523532796265, "grad_norm": 1.34375, "learning_rate": 0.001847451448684857, "loss": 0.2237, "step": 18868 }, { "epoch": 0.03345806969810608, "grad_norm": 0.92578125, "learning_rate": 0.0018474182941975424, "loss": 0.3002, "step": 18870 }, { "epoch": 0.033461615863415894, "grad_norm": 0.52734375, "learning_rate": 0.0018473851364414543, "loss": 0.192, "step": 18872 }, { "epoch": 0.03346516202872571, "grad_norm": 0.34765625, "learning_rate": 0.0018473519754167375, "loss": 0.2454, "step": 18874 }, { "epoch": 0.03346870819403553, "grad_norm": 0.69140625, "learning_rate": 0.0018473188111235374, "loss": 0.195, "step": 18876 }, { "epoch": 0.033472254359345345, "grad_norm": 0.28125, "learning_rate": 0.0018472856435619994, "loss": 0.1908, "step": 18878 }, { "epoch": 0.03347580052465516, "grad_norm": 0.5078125, "learning_rate": 0.0018472524727322676, "loss": 0.2444, "step": 18880 }, { "epoch": 0.033479346689964974, "grad_norm": 0.478515625, "learning_rate": 0.0018472192986344876, "loss": 0.2898, "step": 18882 }, { "epoch": 0.03348289285527479, "grad_norm": 0.2890625, "learning_rate": 0.0018471861212688045, "loss": 0.2057, "step": 18884 }, { "epoch": 0.0334864390205846, "grad_norm": 0.25, "learning_rate": 0.0018471529406353631, "loss": 0.246, "step": 18886 }, { "epoch": 0.03348998518589442, "grad_norm": 0.470703125, "learning_rate": 0.0018471197567343088, "loss": 0.2827, "step": 18888 }, { "epoch": 0.03349353135120423, "grad_norm": 0.392578125, "learning_rate": 0.0018470865695657868, "loss": 0.2161, "step": 18890 }, { "epoch": 0.03349707751651405, "grad_norm": 0.2431640625, "learning_rate": 0.001847053379129942, "loss": 0.2115, "step": 18892 }, { "epoch": 0.03350062368182386, "grad_norm": 0.357421875, "learning_rate": 0.0018470201854269197, "loss": 0.1322, "step": 18894 }, { "epoch": 0.033504169847133676, "grad_norm": 1.0859375, "learning_rate": 0.0018469869884568649, "loss": 0.1865, "step": 18896 }, { "epoch": 0.0335077160124435, "grad_norm": 0.341796875, "learning_rate": 0.0018469537882199233, "loss": 0.225, "step": 18898 }, { "epoch": 0.03351126217775331, "grad_norm": 0.8125, "learning_rate": 0.0018469205847162393, "loss": 0.435, "step": 18900 }, { "epoch": 0.03351480834306313, "grad_norm": 0.453125, "learning_rate": 0.0018468873779459588, "loss": 0.3007, "step": 18902 }, { "epoch": 0.03351835450837294, "grad_norm": 0.63671875, "learning_rate": 0.0018468541679092267, "loss": 0.3188, "step": 18904 }, { "epoch": 0.033521900673682756, "grad_norm": 0.3515625, "learning_rate": 0.001846820954606188, "loss": 0.2082, "step": 18906 }, { "epoch": 0.03352544683899257, "grad_norm": 1.515625, "learning_rate": 0.0018467877380369886, "loss": 0.2822, "step": 18908 }, { "epoch": 0.033528993004302385, "grad_norm": 0.53515625, "learning_rate": 0.001846754518201773, "loss": 0.2131, "step": 18910 }, { "epoch": 0.0335325391696122, "grad_norm": 1.328125, "learning_rate": 0.0018467212951006873, "loss": 0.3754, "step": 18912 }, { "epoch": 0.033536085334922014, "grad_norm": 0.515625, "learning_rate": 0.0018466880687338764, "loss": 0.2043, "step": 18914 }, { "epoch": 0.03353963150023183, "grad_norm": 0.3046875, "learning_rate": 0.0018466548391014857, "loss": 0.1718, "step": 18916 }, { "epoch": 0.03354317766554164, "grad_norm": 0.443359375, "learning_rate": 0.00184662160620366, "loss": 0.2072, "step": 18918 }, { "epoch": 0.03354672383085146, "grad_norm": 0.53125, "learning_rate": 0.0018465883700405456, "loss": 0.2001, "step": 18920 }, { "epoch": 0.03355026999616128, "grad_norm": 0.64453125, "learning_rate": 0.001846555130612287, "loss": 0.2529, "step": 18922 }, { "epoch": 0.033553816161471094, "grad_norm": 0.421875, "learning_rate": 0.00184652188791903, "loss": 0.2507, "step": 18924 }, { "epoch": 0.03355736232678091, "grad_norm": 0.390625, "learning_rate": 0.0018464886419609198, "loss": 0.2161, "step": 18926 }, { "epoch": 0.03356090849209072, "grad_norm": 0.30078125, "learning_rate": 0.0018464553927381023, "loss": 0.2447, "step": 18928 }, { "epoch": 0.03356445465740054, "grad_norm": 0.462890625, "learning_rate": 0.0018464221402507222, "loss": 0.1866, "step": 18930 }, { "epoch": 0.03356800082271035, "grad_norm": 0.4453125, "learning_rate": 0.0018463888844989249, "loss": 0.2096, "step": 18932 }, { "epoch": 0.033571546988020166, "grad_norm": 0.390625, "learning_rate": 0.0018463556254828565, "loss": 0.2082, "step": 18934 }, { "epoch": 0.03357509315332998, "grad_norm": 0.6953125, "learning_rate": 0.0018463223632026625, "loss": 0.2534, "step": 18936 }, { "epoch": 0.033578639318639796, "grad_norm": 2.140625, "learning_rate": 0.0018462890976584877, "loss": 0.2559, "step": 18938 }, { "epoch": 0.03358218548394961, "grad_norm": 0.38671875, "learning_rate": 0.0018462558288504774, "loss": 0.2209, "step": 18940 }, { "epoch": 0.033585731649259425, "grad_norm": 1.21875, "learning_rate": 0.001846222556778778, "loss": 0.2767, "step": 18942 }, { "epoch": 0.033589277814569246, "grad_norm": 1.21875, "learning_rate": 0.0018461892814435345, "loss": 0.252, "step": 18944 }, { "epoch": 0.03359282397987906, "grad_norm": 0.8828125, "learning_rate": 0.001846156002844893, "loss": 0.1962, "step": 18946 }, { "epoch": 0.033596370145188875, "grad_norm": 0.59765625, "learning_rate": 0.001846122720982998, "loss": 0.2091, "step": 18948 }, { "epoch": 0.03359991631049869, "grad_norm": 0.416015625, "learning_rate": 0.0018460894358579957, "loss": 0.1944, "step": 18950 }, { "epoch": 0.033603462475808504, "grad_norm": 0.3671875, "learning_rate": 0.0018460561474700316, "loss": 0.2193, "step": 18952 }, { "epoch": 0.03360700864111832, "grad_norm": 0.376953125, "learning_rate": 0.0018460228558192515, "loss": 0.2845, "step": 18954 }, { "epoch": 0.03361055480642813, "grad_norm": 0.6640625, "learning_rate": 0.0018459895609058005, "loss": 0.1916, "step": 18956 }, { "epoch": 0.03361410097173795, "grad_norm": 0.24609375, "learning_rate": 0.0018459562627298248, "loss": 0.2705, "step": 18958 }, { "epoch": 0.03361764713704776, "grad_norm": 0.287109375, "learning_rate": 0.0018459229612914694, "loss": 0.1657, "step": 18960 }, { "epoch": 0.03362119330235758, "grad_norm": 0.490234375, "learning_rate": 0.0018458896565908803, "loss": 0.193, "step": 18962 }, { "epoch": 0.03362473946766739, "grad_norm": 0.66015625, "learning_rate": 0.0018458563486282034, "loss": 0.2113, "step": 18964 }, { "epoch": 0.03362828563297721, "grad_norm": 0.87890625, "learning_rate": 0.0018458230374035838, "loss": 0.1834, "step": 18966 }, { "epoch": 0.03363183179828703, "grad_norm": 0.314453125, "learning_rate": 0.0018457897229171673, "loss": 0.1338, "step": 18968 }, { "epoch": 0.03363537796359684, "grad_norm": 1.859375, "learning_rate": 0.0018457564051691001, "loss": 0.4528, "step": 18970 }, { "epoch": 0.03363892412890666, "grad_norm": 0.36328125, "learning_rate": 0.0018457230841595273, "loss": 0.3895, "step": 18972 }, { "epoch": 0.03364247029421647, "grad_norm": 2.625, "learning_rate": 0.0018456897598885952, "loss": 0.2551, "step": 18974 }, { "epoch": 0.033646016459526286, "grad_norm": 0.59765625, "learning_rate": 0.0018456564323564488, "loss": 0.2106, "step": 18976 }, { "epoch": 0.0336495626248361, "grad_norm": 0.3515625, "learning_rate": 0.0018456231015632348, "loss": 0.1826, "step": 18978 }, { "epoch": 0.033653108790145915, "grad_norm": 0.55078125, "learning_rate": 0.001845589767509098, "loss": 0.2581, "step": 18980 }, { "epoch": 0.03365665495545573, "grad_norm": 0.298828125, "learning_rate": 0.001845556430194185, "loss": 0.2521, "step": 18982 }, { "epoch": 0.033660201120765544, "grad_norm": 6.65625, "learning_rate": 0.001845523089618641, "loss": 0.3095, "step": 18984 }, { "epoch": 0.03366374728607536, "grad_norm": 6.46875, "learning_rate": 0.0018454897457826124, "loss": 0.3289, "step": 18986 }, { "epoch": 0.03366729345138517, "grad_norm": 0.390625, "learning_rate": 0.0018454563986862445, "loss": 0.2168, "step": 18988 }, { "epoch": 0.033670839616694995, "grad_norm": 0.240234375, "learning_rate": 0.0018454230483296833, "loss": 0.2024, "step": 18990 }, { "epoch": 0.03367438578200481, "grad_norm": 0.302734375, "learning_rate": 0.0018453896947130746, "loss": 0.1981, "step": 18992 }, { "epoch": 0.033677931947314624, "grad_norm": 1.375, "learning_rate": 0.0018453563378365645, "loss": 0.2182, "step": 18994 }, { "epoch": 0.03368147811262444, "grad_norm": 0.78515625, "learning_rate": 0.0018453229777002987, "loss": 0.1984, "step": 18996 }, { "epoch": 0.03368502427793425, "grad_norm": 0.1982421875, "learning_rate": 0.0018452896143044228, "loss": 0.1803, "step": 18998 }, { "epoch": 0.03368857044324407, "grad_norm": 0.55859375, "learning_rate": 0.0018452562476490835, "loss": 0.1515, "step": 19000 }, { "epoch": 0.03369211660855388, "grad_norm": 0.306640625, "learning_rate": 0.001845222877734426, "loss": 0.2303, "step": 19002 }, { "epoch": 0.0336956627738637, "grad_norm": 0.609375, "learning_rate": 0.0018451895045605963, "loss": 0.1867, "step": 19004 }, { "epoch": 0.03369920893917351, "grad_norm": 0.4609375, "learning_rate": 0.0018451561281277412, "loss": 0.2536, "step": 19006 }, { "epoch": 0.033702755104483326, "grad_norm": 0.369140625, "learning_rate": 0.0018451227484360057, "loss": 0.1953, "step": 19008 }, { "epoch": 0.03370630126979314, "grad_norm": 0.640625, "learning_rate": 0.001845089365485536, "loss": 0.2006, "step": 19010 }, { "epoch": 0.03370984743510296, "grad_norm": 0.32421875, "learning_rate": 0.0018450559792764784, "loss": 0.2292, "step": 19012 }, { "epoch": 0.033713393600412776, "grad_norm": 0.53515625, "learning_rate": 0.0018450225898089785, "loss": 0.2359, "step": 19014 }, { "epoch": 0.03371693976572259, "grad_norm": 0.287109375, "learning_rate": 0.0018449891970831827, "loss": 0.2856, "step": 19016 }, { "epoch": 0.033720485931032405, "grad_norm": 0.42578125, "learning_rate": 0.0018449558010992372, "loss": 0.2128, "step": 19018 }, { "epoch": 0.03372403209634222, "grad_norm": 0.484375, "learning_rate": 0.0018449224018572872, "loss": 0.301, "step": 19020 }, { "epoch": 0.033727578261652034, "grad_norm": 0.625, "learning_rate": 0.0018448889993574796, "loss": 0.2252, "step": 19022 }, { "epoch": 0.03373112442696185, "grad_norm": 0.65625, "learning_rate": 0.0018448555935999603, "loss": 0.3472, "step": 19024 }, { "epoch": 0.033734670592271664, "grad_norm": 2.5625, "learning_rate": 0.0018448221845848752, "loss": 0.3541, "step": 19026 }, { "epoch": 0.03373821675758148, "grad_norm": 0.33984375, "learning_rate": 0.0018447887723123705, "loss": 0.1741, "step": 19028 }, { "epoch": 0.03374176292289129, "grad_norm": 0.74609375, "learning_rate": 0.0018447553567825922, "loss": 0.4304, "step": 19030 }, { "epoch": 0.03374530908820111, "grad_norm": 1.2578125, "learning_rate": 0.0018447219379956867, "loss": 0.2288, "step": 19032 }, { "epoch": 0.03374885525351093, "grad_norm": 0.796875, "learning_rate": 0.0018446885159517999, "loss": 0.264, "step": 19034 }, { "epoch": 0.03375240141882074, "grad_norm": 0.44140625, "learning_rate": 0.0018446550906510782, "loss": 0.1917, "step": 19036 }, { "epoch": 0.03375594758413056, "grad_norm": 0.43359375, "learning_rate": 0.0018446216620936675, "loss": 0.2097, "step": 19038 }, { "epoch": 0.03375949374944037, "grad_norm": 1.4609375, "learning_rate": 0.0018445882302797147, "loss": 0.4741, "step": 19040 }, { "epoch": 0.03376303991475019, "grad_norm": 0.296875, "learning_rate": 0.0018445547952093651, "loss": 0.2618, "step": 19042 }, { "epoch": 0.03376658608006, "grad_norm": 1.2890625, "learning_rate": 0.0018445213568827653, "loss": 0.2557, "step": 19044 }, { "epoch": 0.033770132245369816, "grad_norm": 0.515625, "learning_rate": 0.0018444879153000616, "loss": 0.2402, "step": 19046 }, { "epoch": 0.03377367841067963, "grad_norm": 0.232421875, "learning_rate": 0.0018444544704614002, "loss": 0.1612, "step": 19048 }, { "epoch": 0.033777224575989445, "grad_norm": 0.44140625, "learning_rate": 0.0018444210223669275, "loss": 0.1946, "step": 19050 }, { "epoch": 0.03378077074129926, "grad_norm": 0.89453125, "learning_rate": 0.0018443875710167897, "loss": 0.2281, "step": 19052 }, { "epoch": 0.033784316906609074, "grad_norm": 0.91796875, "learning_rate": 0.0018443541164111326, "loss": 0.1907, "step": 19054 }, { "epoch": 0.03378786307191889, "grad_norm": 0.427734375, "learning_rate": 0.0018443206585501033, "loss": 0.2029, "step": 19056 }, { "epoch": 0.03379140923722871, "grad_norm": 0.89453125, "learning_rate": 0.001844287197433848, "loss": 0.223, "step": 19058 }, { "epoch": 0.033794955402538525, "grad_norm": 0.453125, "learning_rate": 0.0018442537330625127, "loss": 0.226, "step": 19060 }, { "epoch": 0.03379850156784834, "grad_norm": 0.36328125, "learning_rate": 0.0018442202654362437, "loss": 0.227, "step": 19062 }, { "epoch": 0.033802047733158154, "grad_norm": 0.97265625, "learning_rate": 0.0018441867945551877, "loss": 0.2429, "step": 19064 }, { "epoch": 0.03380559389846797, "grad_norm": 0.5, "learning_rate": 0.0018441533204194907, "loss": 0.2129, "step": 19066 }, { "epoch": 0.03380914006377778, "grad_norm": 0.4375, "learning_rate": 0.0018441198430292995, "loss": 0.1582, "step": 19068 }, { "epoch": 0.0338126862290876, "grad_norm": 0.435546875, "learning_rate": 0.0018440863623847604, "loss": 0.2068, "step": 19070 }, { "epoch": 0.03381623239439741, "grad_norm": 1.28125, "learning_rate": 0.00184405287848602, "loss": 0.221, "step": 19072 }, { "epoch": 0.03381977855970723, "grad_norm": 0.61328125, "learning_rate": 0.001844019391333224, "loss": 0.2143, "step": 19074 }, { "epoch": 0.03382332472501704, "grad_norm": 0.46484375, "learning_rate": 0.0018439859009265196, "loss": 0.3348, "step": 19076 }, { "epoch": 0.033826870890326856, "grad_norm": 0.6328125, "learning_rate": 0.001843952407266053, "loss": 0.1941, "step": 19078 }, { "epoch": 0.03383041705563668, "grad_norm": 0.1669921875, "learning_rate": 0.0018439189103519708, "loss": 0.2115, "step": 19080 }, { "epoch": 0.03383396322094649, "grad_norm": 0.51953125, "learning_rate": 0.0018438854101844194, "loss": 0.2639, "step": 19082 }, { "epoch": 0.033837509386256306, "grad_norm": 0.703125, "learning_rate": 0.0018438519067635454, "loss": 0.2026, "step": 19084 }, { "epoch": 0.03384105555156612, "grad_norm": 0.41015625, "learning_rate": 0.0018438184000894948, "loss": 0.2551, "step": 19086 }, { "epoch": 0.033844601716875936, "grad_norm": 0.69921875, "learning_rate": 0.001843784890162415, "loss": 0.2381, "step": 19088 }, { "epoch": 0.03384814788218575, "grad_norm": 0.85546875, "learning_rate": 0.001843751376982452, "loss": 0.2712, "step": 19090 }, { "epoch": 0.033851694047495565, "grad_norm": 3.984375, "learning_rate": 0.0018437178605497525, "loss": 0.3249, "step": 19092 }, { "epoch": 0.03385524021280538, "grad_norm": 2.578125, "learning_rate": 0.0018436843408644631, "loss": 0.2692, "step": 19094 }, { "epoch": 0.033858786378115194, "grad_norm": 0.4453125, "learning_rate": 0.0018436508179267303, "loss": 0.2199, "step": 19096 }, { "epoch": 0.03386233254342501, "grad_norm": 0.81640625, "learning_rate": 0.0018436172917367009, "loss": 0.2365, "step": 19098 }, { "epoch": 0.03386587870873482, "grad_norm": 3.09375, "learning_rate": 0.0018435837622945213, "loss": 0.3604, "step": 19100 }, { "epoch": 0.033869424874044644, "grad_norm": 0.3984375, "learning_rate": 0.0018435502296003383, "loss": 0.2479, "step": 19102 }, { "epoch": 0.03387297103935446, "grad_norm": 0.703125, "learning_rate": 0.0018435166936542986, "loss": 0.1854, "step": 19104 }, { "epoch": 0.03387651720466427, "grad_norm": 0.1845703125, "learning_rate": 0.0018434831544565486, "loss": 0.1999, "step": 19106 }, { "epoch": 0.03388006336997409, "grad_norm": 0.5859375, "learning_rate": 0.0018434496120072353, "loss": 0.1936, "step": 19108 }, { "epoch": 0.0338836095352839, "grad_norm": 1.2109375, "learning_rate": 0.0018434160663065053, "loss": 0.3086, "step": 19110 }, { "epoch": 0.03388715570059372, "grad_norm": 1.1171875, "learning_rate": 0.001843382517354505, "loss": 0.3781, "step": 19112 }, { "epoch": 0.03389070186590353, "grad_norm": 3.4375, "learning_rate": 0.0018433489651513814, "loss": 0.2386, "step": 19114 }, { "epoch": 0.033894248031213346, "grad_norm": 0.2275390625, "learning_rate": 0.0018433154096972814, "loss": 0.2564, "step": 19116 }, { "epoch": 0.03389779419652316, "grad_norm": 3.125, "learning_rate": 0.0018432818509923515, "loss": 0.1917, "step": 19118 }, { "epoch": 0.033901340361832975, "grad_norm": 0.33203125, "learning_rate": 0.0018432482890367384, "loss": 0.2098, "step": 19120 }, { "epoch": 0.03390488652714279, "grad_norm": 0.3046875, "learning_rate": 0.0018432147238305891, "loss": 0.1995, "step": 19122 }, { "epoch": 0.033908432692452604, "grad_norm": 1.609375, "learning_rate": 0.0018431811553740508, "loss": 0.2775, "step": 19124 }, { "epoch": 0.033911978857762426, "grad_norm": 0.478515625, "learning_rate": 0.0018431475836672689, "loss": 0.2088, "step": 19126 }, { "epoch": 0.03391552502307224, "grad_norm": 1.265625, "learning_rate": 0.0018431140087103919, "loss": 0.228, "step": 19128 }, { "epoch": 0.033919071188382055, "grad_norm": 1.640625, "learning_rate": 0.0018430804305035653, "loss": 0.2621, "step": 19130 }, { "epoch": 0.03392261735369187, "grad_norm": 0.470703125, "learning_rate": 0.001843046849046937, "loss": 0.2344, "step": 19132 }, { "epoch": 0.033926163519001684, "grad_norm": 0.267578125, "learning_rate": 0.0018430132643406534, "loss": 0.169, "step": 19134 }, { "epoch": 0.0339297096843115, "grad_norm": 2.65625, "learning_rate": 0.001842979676384861, "loss": 0.3076, "step": 19136 }, { "epoch": 0.03393325584962131, "grad_norm": 1.609375, "learning_rate": 0.0018429460851797073, "loss": 0.225, "step": 19138 }, { "epoch": 0.03393680201493113, "grad_norm": 0.71875, "learning_rate": 0.001842912490725339, "loss": 0.2014, "step": 19140 }, { "epoch": 0.03394034818024094, "grad_norm": 0.318359375, "learning_rate": 0.0018428788930219028, "loss": 0.2189, "step": 19142 }, { "epoch": 0.03394389434555076, "grad_norm": 1.1015625, "learning_rate": 0.0018428452920695457, "loss": 0.2543, "step": 19144 }, { "epoch": 0.03394744051086057, "grad_norm": 0.984375, "learning_rate": 0.0018428116878684154, "loss": 0.2473, "step": 19146 }, { "epoch": 0.03395098667617039, "grad_norm": 1.3828125, "learning_rate": 0.0018427780804186578, "loss": 0.3891, "step": 19148 }, { "epoch": 0.03395453284148021, "grad_norm": 0.87890625, "learning_rate": 0.0018427444697204202, "loss": 0.1533, "step": 19150 }, { "epoch": 0.03395807900679002, "grad_norm": 0.62109375, "learning_rate": 0.00184271085577385, "loss": 0.2407, "step": 19152 }, { "epoch": 0.03396162517209984, "grad_norm": 0.46484375, "learning_rate": 0.001842677238579094, "loss": 0.219, "step": 19154 }, { "epoch": 0.03396517133740965, "grad_norm": 0.302734375, "learning_rate": 0.001842643618136299, "loss": 0.2856, "step": 19156 }, { "epoch": 0.033968717502719466, "grad_norm": 2.5, "learning_rate": 0.0018426099944456123, "loss": 0.3575, "step": 19158 }, { "epoch": 0.03397226366802928, "grad_norm": 0.47265625, "learning_rate": 0.001842576367507181, "loss": 0.2355, "step": 19160 }, { "epoch": 0.033975809833339095, "grad_norm": 0.6796875, "learning_rate": 0.0018425427373211518, "loss": 0.1953, "step": 19162 }, { "epoch": 0.03397935599864891, "grad_norm": 1.7421875, "learning_rate": 0.001842509103887672, "loss": 0.3155, "step": 19164 }, { "epoch": 0.033982902163958724, "grad_norm": 0.34375, "learning_rate": 0.0018424754672068885, "loss": 0.2594, "step": 19166 }, { "epoch": 0.03398644832926854, "grad_norm": 0.57421875, "learning_rate": 0.0018424418272789487, "loss": 0.2617, "step": 19168 }, { "epoch": 0.03398999449457836, "grad_norm": 0.84375, "learning_rate": 0.0018424081841039996, "loss": 0.2766, "step": 19170 }, { "epoch": 0.033993540659888175, "grad_norm": 0.95703125, "learning_rate": 0.0018423745376821886, "loss": 0.3368, "step": 19172 }, { "epoch": 0.03399708682519799, "grad_norm": 0.5546875, "learning_rate": 0.0018423408880136622, "loss": 0.1981, "step": 19174 }, { "epoch": 0.034000632990507804, "grad_norm": 1.09375, "learning_rate": 0.0018423072350985683, "loss": 0.2789, "step": 19176 }, { "epoch": 0.03400417915581762, "grad_norm": 0.65625, "learning_rate": 0.0018422735789370534, "loss": 0.1432, "step": 19178 }, { "epoch": 0.03400772532112743, "grad_norm": 0.6171875, "learning_rate": 0.0018422399195292655, "loss": 0.2191, "step": 19180 }, { "epoch": 0.03401127148643725, "grad_norm": 0.6875, "learning_rate": 0.001842206256875351, "loss": 0.2134, "step": 19182 }, { "epoch": 0.03401481765174706, "grad_norm": 0.291015625, "learning_rate": 0.0018421725909754574, "loss": 0.2076, "step": 19184 }, { "epoch": 0.034018363817056876, "grad_norm": 0.32421875, "learning_rate": 0.0018421389218297324, "loss": 0.1866, "step": 19186 }, { "epoch": 0.03402190998236669, "grad_norm": 0.76953125, "learning_rate": 0.0018421052494383223, "loss": 0.2017, "step": 19188 }, { "epoch": 0.034025456147676506, "grad_norm": 0.43359375, "learning_rate": 0.0018420715738013754, "loss": 0.2194, "step": 19190 }, { "epoch": 0.03402900231298632, "grad_norm": 1.578125, "learning_rate": 0.0018420378949190379, "loss": 0.2218, "step": 19192 }, { "epoch": 0.03403254847829614, "grad_norm": 0.32421875, "learning_rate": 0.0018420042127914582, "loss": 0.2075, "step": 19194 }, { "epoch": 0.034036094643605956, "grad_norm": 1.0859375, "learning_rate": 0.001841970527418783, "loss": 0.2269, "step": 19196 }, { "epoch": 0.03403964080891577, "grad_norm": 0.9765625, "learning_rate": 0.0018419368388011596, "loss": 0.1838, "step": 19198 }, { "epoch": 0.034043186974225585, "grad_norm": 0.6328125, "learning_rate": 0.0018419031469387355, "loss": 0.2427, "step": 19200 }, { "epoch": 0.0340467331395354, "grad_norm": 0.5625, "learning_rate": 0.0018418694518316577, "loss": 0.2295, "step": 19202 }, { "epoch": 0.034050279304845214, "grad_norm": 1.1328125, "learning_rate": 0.0018418357534800743, "loss": 0.4088, "step": 19204 }, { "epoch": 0.03405382547015503, "grad_norm": 0.26171875, "learning_rate": 0.0018418020518841318, "loss": 0.1708, "step": 19206 }, { "epoch": 0.03405737163546484, "grad_norm": 0.609375, "learning_rate": 0.0018417683470439781, "loss": 0.213, "step": 19208 }, { "epoch": 0.03406091780077466, "grad_norm": 0.4921875, "learning_rate": 0.0018417346389597606, "loss": 0.2157, "step": 19210 }, { "epoch": 0.03406446396608447, "grad_norm": 0.357421875, "learning_rate": 0.0018417009276316268, "loss": 0.2962, "step": 19212 }, { "epoch": 0.03406801013139429, "grad_norm": 1.1875, "learning_rate": 0.001841667213059724, "loss": 0.2374, "step": 19214 }, { "epoch": 0.03407155629670411, "grad_norm": 1.078125, "learning_rate": 0.0018416334952441992, "loss": 0.4036, "step": 19216 }, { "epoch": 0.03407510246201392, "grad_norm": 0.392578125, "learning_rate": 0.0018415997741852007, "loss": 0.1918, "step": 19218 }, { "epoch": 0.03407864862732374, "grad_norm": 0.490234375, "learning_rate": 0.0018415660498828752, "loss": 0.1809, "step": 19220 }, { "epoch": 0.03408219479263355, "grad_norm": 0.4140625, "learning_rate": 0.001841532322337371, "loss": 0.238, "step": 19222 }, { "epoch": 0.03408574095794337, "grad_norm": 0.8046875, "learning_rate": 0.0018414985915488347, "loss": 0.2962, "step": 19224 }, { "epoch": 0.03408928712325318, "grad_norm": 0.2421875, "learning_rate": 0.0018414648575174144, "loss": 0.1616, "step": 19226 }, { "epoch": 0.034092833288562996, "grad_norm": 0.5, "learning_rate": 0.0018414311202432576, "loss": 0.2683, "step": 19228 }, { "epoch": 0.03409637945387281, "grad_norm": 0.294921875, "learning_rate": 0.001841397379726512, "loss": 0.2012, "step": 19230 }, { "epoch": 0.034099925619182625, "grad_norm": 0.33203125, "learning_rate": 0.0018413636359673245, "loss": 0.2255, "step": 19232 }, { "epoch": 0.03410347178449244, "grad_norm": 0.67578125, "learning_rate": 0.0018413298889658435, "loss": 0.1975, "step": 19234 }, { "epoch": 0.034107017949802254, "grad_norm": 0.2734375, "learning_rate": 0.0018412961387222159, "loss": 0.2286, "step": 19236 }, { "epoch": 0.034110564115112076, "grad_norm": 2.1875, "learning_rate": 0.0018412623852365896, "loss": 0.4569, "step": 19238 }, { "epoch": 0.03411411028042189, "grad_norm": 1.0625, "learning_rate": 0.0018412286285091123, "loss": 0.2914, "step": 19240 }, { "epoch": 0.034117656445731705, "grad_norm": 2.203125, "learning_rate": 0.0018411948685399312, "loss": 0.1773, "step": 19242 }, { "epoch": 0.03412120261104152, "grad_norm": 0.65625, "learning_rate": 0.0018411611053291946, "loss": 0.2066, "step": 19244 }, { "epoch": 0.034124748776351334, "grad_norm": 0.5703125, "learning_rate": 0.0018411273388770498, "loss": 0.3644, "step": 19246 }, { "epoch": 0.03412829494166115, "grad_norm": 0.341796875, "learning_rate": 0.0018410935691836443, "loss": 0.1911, "step": 19248 }, { "epoch": 0.03413184110697096, "grad_norm": 0.75390625, "learning_rate": 0.0018410597962491265, "loss": 0.2419, "step": 19250 }, { "epoch": 0.03413538727228078, "grad_norm": 0.50390625, "learning_rate": 0.0018410260200736432, "loss": 0.2272, "step": 19252 }, { "epoch": 0.03413893343759059, "grad_norm": 0.57421875, "learning_rate": 0.0018409922406573426, "loss": 0.1901, "step": 19254 }, { "epoch": 0.03414247960290041, "grad_norm": 0.1689453125, "learning_rate": 0.0018409584580003724, "loss": 0.1967, "step": 19256 }, { "epoch": 0.03414602576821022, "grad_norm": 3.609375, "learning_rate": 0.0018409246721028806, "loss": 0.1886, "step": 19258 }, { "epoch": 0.034149571933520036, "grad_norm": 1.2265625, "learning_rate": 0.0018408908829650142, "loss": 0.2766, "step": 19260 }, { "epoch": 0.03415311809882986, "grad_norm": 0.375, "learning_rate": 0.0018408570905869214, "loss": 0.1666, "step": 19262 }, { "epoch": 0.03415666426413967, "grad_norm": 0.92578125, "learning_rate": 0.0018408232949687505, "loss": 0.5652, "step": 19264 }, { "epoch": 0.034160210429449486, "grad_norm": 0.369140625, "learning_rate": 0.0018407894961106487, "loss": 0.2625, "step": 19266 }, { "epoch": 0.0341637565947593, "grad_norm": 0.40625, "learning_rate": 0.0018407556940127636, "loss": 0.2398, "step": 19268 }, { "epoch": 0.034167302760069115, "grad_norm": 0.50390625, "learning_rate": 0.0018407218886752434, "loss": 0.4504, "step": 19270 }, { "epoch": 0.03417084892537893, "grad_norm": 0.56640625, "learning_rate": 0.0018406880800982365, "loss": 0.2331, "step": 19272 }, { "epoch": 0.034174395090688744, "grad_norm": 0.7578125, "learning_rate": 0.0018406542682818897, "loss": 0.2426, "step": 19274 }, { "epoch": 0.03417794125599856, "grad_norm": 0.43359375, "learning_rate": 0.0018406204532263515, "loss": 0.2185, "step": 19276 }, { "epoch": 0.034181487421308374, "grad_norm": 0.91796875, "learning_rate": 0.0018405866349317694, "loss": 0.212, "step": 19278 }, { "epoch": 0.03418503358661819, "grad_norm": 1.328125, "learning_rate": 0.001840552813398292, "loss": 0.2749, "step": 19280 }, { "epoch": 0.034188579751928, "grad_norm": 1.125, "learning_rate": 0.0018405189886260662, "loss": 0.4763, "step": 19282 }, { "epoch": 0.034192125917237824, "grad_norm": 0.93359375, "learning_rate": 0.001840485160615241, "loss": 0.1994, "step": 19284 }, { "epoch": 0.03419567208254764, "grad_norm": 0.373046875, "learning_rate": 0.0018404513293659632, "loss": 0.2514, "step": 19286 }, { "epoch": 0.03419921824785745, "grad_norm": 0.5625, "learning_rate": 0.001840417494878382, "loss": 0.2071, "step": 19288 }, { "epoch": 0.03420276441316727, "grad_norm": 0.353515625, "learning_rate": 0.0018403836571526447, "loss": 0.1594, "step": 19290 }, { "epoch": 0.03420631057847708, "grad_norm": 0.48828125, "learning_rate": 0.001840349816188899, "loss": 0.1771, "step": 19292 }, { "epoch": 0.0342098567437869, "grad_norm": 0.67578125, "learning_rate": 0.0018403159719872935, "loss": 0.2512, "step": 19294 }, { "epoch": 0.03421340290909671, "grad_norm": 0.578125, "learning_rate": 0.001840282124547976, "loss": 0.3169, "step": 19296 }, { "epoch": 0.034216949074406526, "grad_norm": 0.8828125, "learning_rate": 0.0018402482738710945, "loss": 0.2098, "step": 19298 }, { "epoch": 0.03422049523971634, "grad_norm": 0.38671875, "learning_rate": 0.001840214419956797, "loss": 0.255, "step": 19300 }, { "epoch": 0.034224041405026155, "grad_norm": 0.376953125, "learning_rate": 0.0018401805628052315, "loss": 0.1598, "step": 19302 }, { "epoch": 0.03422758757033597, "grad_norm": 0.373046875, "learning_rate": 0.0018401467024165461, "loss": 0.1736, "step": 19304 }, { "epoch": 0.034231133735645784, "grad_norm": 0.5234375, "learning_rate": 0.001840112838790889, "loss": 0.185, "step": 19306 }, { "epoch": 0.034234679900955606, "grad_norm": 1.34375, "learning_rate": 0.0018400789719284086, "loss": 0.2465, "step": 19308 }, { "epoch": 0.03423822606626542, "grad_norm": 3.328125, "learning_rate": 0.0018400451018292523, "loss": 0.197, "step": 19310 }, { "epoch": 0.034241772231575235, "grad_norm": 0.578125, "learning_rate": 0.0018400112284935688, "loss": 0.2497, "step": 19312 }, { "epoch": 0.03424531839688505, "grad_norm": 0.419921875, "learning_rate": 0.0018399773519215054, "loss": 0.2632, "step": 19314 }, { "epoch": 0.034248864562194864, "grad_norm": 0.45703125, "learning_rate": 0.0018399434721132117, "loss": 0.2231, "step": 19316 }, { "epoch": 0.03425241072750468, "grad_norm": 0.83984375, "learning_rate": 0.001839909589068835, "loss": 0.5745, "step": 19318 }, { "epoch": 0.03425595689281449, "grad_norm": 2.40625, "learning_rate": 0.001839875702788523, "loss": 0.2247, "step": 19320 }, { "epoch": 0.03425950305812431, "grad_norm": 0.431640625, "learning_rate": 0.0018398418132724251, "loss": 0.1917, "step": 19322 }, { "epoch": 0.03426304922343412, "grad_norm": 0.671875, "learning_rate": 0.001839807920520688, "loss": 0.3159, "step": 19324 }, { "epoch": 0.03426659538874394, "grad_norm": 0.423828125, "learning_rate": 0.0018397740245334616, "loss": 0.1965, "step": 19326 }, { "epoch": 0.03427014155405375, "grad_norm": 0.54296875, "learning_rate": 0.001839740125310893, "loss": 0.2511, "step": 19328 }, { "epoch": 0.03427368771936357, "grad_norm": 1.2109375, "learning_rate": 0.001839706222853131, "loss": 0.3093, "step": 19330 }, { "epoch": 0.03427723388467339, "grad_norm": 0.69140625, "learning_rate": 0.0018396723171603233, "loss": 0.2201, "step": 19332 }, { "epoch": 0.0342807800499832, "grad_norm": 1.3828125, "learning_rate": 0.0018396384082326187, "loss": 0.3054, "step": 19334 }, { "epoch": 0.034284326215293016, "grad_norm": 0.251953125, "learning_rate": 0.0018396044960701655, "loss": 0.2889, "step": 19336 }, { "epoch": 0.03428787238060283, "grad_norm": 0.6484375, "learning_rate": 0.0018395705806731118, "loss": 0.2257, "step": 19338 }, { "epoch": 0.034291418545912646, "grad_norm": 0.5234375, "learning_rate": 0.001839536662041606, "loss": 0.1897, "step": 19340 }, { "epoch": 0.03429496471122246, "grad_norm": 0.56640625, "learning_rate": 0.001839502740175796, "loss": 0.2285, "step": 19342 }, { "epoch": 0.034298510876532275, "grad_norm": 0.5390625, "learning_rate": 0.001839468815075831, "loss": 0.2467, "step": 19344 }, { "epoch": 0.03430205704184209, "grad_norm": 0.404296875, "learning_rate": 0.0018394348867418588, "loss": 0.204, "step": 19346 }, { "epoch": 0.034305603207151904, "grad_norm": 0.310546875, "learning_rate": 0.001839400955174028, "loss": 0.2198, "step": 19348 }, { "epoch": 0.03430914937246172, "grad_norm": 0.64453125, "learning_rate": 0.0018393670203724869, "loss": 0.243, "step": 19350 }, { "epoch": 0.03431269553777154, "grad_norm": 0.2451171875, "learning_rate": 0.0018393330823373835, "loss": 0.1733, "step": 19352 }, { "epoch": 0.034316241703081354, "grad_norm": 1.7109375, "learning_rate": 0.001839299141068867, "loss": 0.2794, "step": 19354 }, { "epoch": 0.03431978786839117, "grad_norm": 1.0859375, "learning_rate": 0.0018392651965670856, "loss": 0.2212, "step": 19356 }, { "epoch": 0.03432333403370098, "grad_norm": 2.703125, "learning_rate": 0.0018392312488321877, "loss": 0.3003, "step": 19358 }, { "epoch": 0.0343268801990108, "grad_norm": 0.703125, "learning_rate": 0.0018391972978643214, "loss": 0.2195, "step": 19360 }, { "epoch": 0.03433042636432061, "grad_norm": 0.3046875, "learning_rate": 0.001839163343663636, "loss": 0.2255, "step": 19362 }, { "epoch": 0.03433397252963043, "grad_norm": 0.83203125, "learning_rate": 0.0018391293862302792, "loss": 0.2213, "step": 19364 }, { "epoch": 0.03433751869494024, "grad_norm": 0.447265625, "learning_rate": 0.0018390954255643997, "loss": 0.194, "step": 19366 }, { "epoch": 0.034341064860250056, "grad_norm": 0.453125, "learning_rate": 0.0018390614616661458, "loss": 0.2491, "step": 19368 }, { "epoch": 0.03434461102555987, "grad_norm": 0.357421875, "learning_rate": 0.001839027494535667, "loss": 0.2536, "step": 19370 }, { "epoch": 0.034348157190869685, "grad_norm": 0.6875, "learning_rate": 0.0018389935241731109, "loss": 0.222, "step": 19372 }, { "epoch": 0.0343517033561795, "grad_norm": 0.52734375, "learning_rate": 0.0018389595505786266, "loss": 0.1888, "step": 19374 }, { "epoch": 0.03435524952148932, "grad_norm": 0.375, "learning_rate": 0.0018389255737523622, "loss": 0.2139, "step": 19376 }, { "epoch": 0.034358795686799136, "grad_norm": 0.45703125, "learning_rate": 0.0018388915936944667, "loss": 0.2227, "step": 19378 }, { "epoch": 0.03436234185210895, "grad_norm": 1.625, "learning_rate": 0.0018388576104050884, "loss": 0.2889, "step": 19380 }, { "epoch": 0.034365888017418765, "grad_norm": 0.33984375, "learning_rate": 0.0018388236238843763, "loss": 0.2093, "step": 19382 }, { "epoch": 0.03436943418272858, "grad_norm": 2.234375, "learning_rate": 0.0018387896341324787, "loss": 0.3643, "step": 19384 }, { "epoch": 0.034372980348038394, "grad_norm": 0.2119140625, "learning_rate": 0.0018387556411495448, "loss": 0.1925, "step": 19386 }, { "epoch": 0.03437652651334821, "grad_norm": 0.4296875, "learning_rate": 0.0018387216449357222, "loss": 0.183, "step": 19388 }, { "epoch": 0.03438007267865802, "grad_norm": 0.455078125, "learning_rate": 0.0018386876454911606, "loss": 0.2109, "step": 19390 }, { "epoch": 0.03438361884396784, "grad_norm": 0.92578125, "learning_rate": 0.001838653642816008, "loss": 0.3275, "step": 19392 }, { "epoch": 0.03438716500927765, "grad_norm": 0.625, "learning_rate": 0.001838619636910414, "loss": 0.2296, "step": 19394 }, { "epoch": 0.03439071117458747, "grad_norm": 0.51953125, "learning_rate": 0.0018385856277745264, "loss": 0.147, "step": 19396 }, { "epoch": 0.03439425733989729, "grad_norm": 0.255859375, "learning_rate": 0.001838551615408494, "loss": 0.1728, "step": 19398 }, { "epoch": 0.0343978035052071, "grad_norm": 1.4609375, "learning_rate": 0.0018385175998124664, "loss": 0.2031, "step": 19400 }, { "epoch": 0.03440134967051692, "grad_norm": 0.44140625, "learning_rate": 0.0018384835809865914, "loss": 0.1995, "step": 19402 }, { "epoch": 0.03440489583582673, "grad_norm": 0.80859375, "learning_rate": 0.0018384495589310185, "loss": 0.2261, "step": 19404 }, { "epoch": 0.03440844200113655, "grad_norm": 0.494140625, "learning_rate": 0.0018384155336458958, "loss": 0.2119, "step": 19406 }, { "epoch": 0.03441198816644636, "grad_norm": 0.61328125, "learning_rate": 0.0018383815051313728, "loss": 0.2988, "step": 19408 }, { "epoch": 0.034415534331756176, "grad_norm": 0.275390625, "learning_rate": 0.0018383474733875977, "loss": 0.176, "step": 19410 }, { "epoch": 0.03441908049706599, "grad_norm": 0.1640625, "learning_rate": 0.0018383134384147197, "loss": 0.2196, "step": 19412 }, { "epoch": 0.034422626662375805, "grad_norm": 0.248046875, "learning_rate": 0.001838279400212888, "loss": 0.2307, "step": 19414 }, { "epoch": 0.03442617282768562, "grad_norm": 2.9375, "learning_rate": 0.0018382453587822505, "loss": 0.3128, "step": 19416 }, { "epoch": 0.034429718992995434, "grad_norm": 0.56640625, "learning_rate": 0.0018382113141229568, "loss": 0.1993, "step": 19418 }, { "epoch": 0.034433265158305255, "grad_norm": 1.03125, "learning_rate": 0.0018381772662351557, "loss": 0.3101, "step": 19420 }, { "epoch": 0.03443681132361507, "grad_norm": 4.15625, "learning_rate": 0.0018381432151189964, "loss": 0.471, "step": 19422 }, { "epoch": 0.034440357488924885, "grad_norm": 0.369140625, "learning_rate": 0.0018381091607746268, "loss": 0.1733, "step": 19424 }, { "epoch": 0.0344439036542347, "grad_norm": 0.298828125, "learning_rate": 0.0018380751032021968, "loss": 0.2283, "step": 19426 }, { "epoch": 0.034447449819544514, "grad_norm": 0.5546875, "learning_rate": 0.001838041042401855, "loss": 0.2391, "step": 19428 }, { "epoch": 0.03445099598485433, "grad_norm": 0.859375, "learning_rate": 0.0018380069783737502, "loss": 0.4435, "step": 19430 }, { "epoch": 0.03445454215016414, "grad_norm": 1.421875, "learning_rate": 0.0018379729111180315, "loss": 0.3458, "step": 19432 }, { "epoch": 0.03445808831547396, "grad_norm": 0.474609375, "learning_rate": 0.0018379388406348485, "loss": 0.1678, "step": 19434 }, { "epoch": 0.03446163448078377, "grad_norm": 1.6953125, "learning_rate": 0.0018379047669243491, "loss": 0.2759, "step": 19436 }, { "epoch": 0.034465180646093586, "grad_norm": 1.0078125, "learning_rate": 0.0018378706899866834, "loss": 0.1661, "step": 19438 }, { "epoch": 0.0344687268114034, "grad_norm": 0.3984375, "learning_rate": 0.0018378366098219993, "loss": 0.2248, "step": 19440 }, { "epoch": 0.034472272976713215, "grad_norm": 0.6328125, "learning_rate": 0.0018378025264304465, "loss": 0.2803, "step": 19442 }, { "epoch": 0.03447581914202304, "grad_norm": 0.76171875, "learning_rate": 0.0018377684398121744, "loss": 0.2816, "step": 19444 }, { "epoch": 0.03447936530733285, "grad_norm": 0.365234375, "learning_rate": 0.0018377343499673315, "loss": 0.1521, "step": 19446 }, { "epoch": 0.034482911472642666, "grad_norm": 0.609375, "learning_rate": 0.0018377002568960672, "loss": 0.3504, "step": 19448 }, { "epoch": 0.03448645763795248, "grad_norm": 0.2119140625, "learning_rate": 0.0018376661605985303, "loss": 0.1611, "step": 19450 }, { "epoch": 0.034490003803262295, "grad_norm": 1.015625, "learning_rate": 0.0018376320610748703, "loss": 0.214, "step": 19452 }, { "epoch": 0.03449354996857211, "grad_norm": 0.2578125, "learning_rate": 0.001837597958325236, "loss": 0.1684, "step": 19454 }, { "epoch": 0.034497096133881924, "grad_norm": 0.470703125, "learning_rate": 0.0018375638523497767, "loss": 0.2457, "step": 19456 }, { "epoch": 0.03450064229919174, "grad_norm": 0.78125, "learning_rate": 0.0018375297431486414, "loss": 0.1863, "step": 19458 }, { "epoch": 0.03450418846450155, "grad_norm": 0.353515625, "learning_rate": 0.0018374956307219796, "loss": 0.2677, "step": 19460 }, { "epoch": 0.03450773462981137, "grad_norm": 0.314453125, "learning_rate": 0.0018374615150699406, "loss": 0.2287, "step": 19462 }, { "epoch": 0.03451128079512118, "grad_norm": 1.375, "learning_rate": 0.0018374273961926728, "loss": 0.203, "step": 19464 }, { "epoch": 0.034514826960431004, "grad_norm": 0.337890625, "learning_rate": 0.0018373932740903258, "loss": 0.1679, "step": 19466 }, { "epoch": 0.03451837312574082, "grad_norm": 0.263671875, "learning_rate": 0.0018373591487630496, "loss": 0.1568, "step": 19468 }, { "epoch": 0.03452191929105063, "grad_norm": 0.62890625, "learning_rate": 0.0018373250202109923, "loss": 0.1573, "step": 19470 }, { "epoch": 0.03452546545636045, "grad_norm": 0.3203125, "learning_rate": 0.0018372908884343039, "loss": 0.211, "step": 19472 }, { "epoch": 0.03452901162167026, "grad_norm": 0.2578125, "learning_rate": 0.0018372567534331332, "loss": 0.2539, "step": 19474 }, { "epoch": 0.03453255778698008, "grad_norm": 0.64453125, "learning_rate": 0.00183722261520763, "loss": 0.1686, "step": 19476 }, { "epoch": 0.03453610395228989, "grad_norm": 0.5625, "learning_rate": 0.001837188473757943, "loss": 0.1906, "step": 19478 }, { "epoch": 0.034539650117599706, "grad_norm": 0.69921875, "learning_rate": 0.0018371543290842218, "loss": 0.1984, "step": 19480 }, { "epoch": 0.03454319628290952, "grad_norm": 0.515625, "learning_rate": 0.001837120181186616, "loss": 0.2214, "step": 19482 }, { "epoch": 0.034546742448219335, "grad_norm": 0.443359375, "learning_rate": 0.0018370860300652746, "loss": 0.1957, "step": 19484 }, { "epoch": 0.03455028861352915, "grad_norm": 0.314453125, "learning_rate": 0.001837051875720347, "loss": 0.2128, "step": 19486 }, { "epoch": 0.03455383477883897, "grad_norm": 1.234375, "learning_rate": 0.0018370177181519828, "loss": 0.2483, "step": 19488 }, { "epoch": 0.034557380944148786, "grad_norm": 0.48046875, "learning_rate": 0.001836983557360331, "loss": 0.2041, "step": 19490 }, { "epoch": 0.0345609271094586, "grad_norm": 0.80859375, "learning_rate": 0.0018369493933455413, "loss": 0.1802, "step": 19492 }, { "epoch": 0.034564473274768415, "grad_norm": 0.423828125, "learning_rate": 0.0018369152261077629, "loss": 0.1584, "step": 19494 }, { "epoch": 0.03456801944007823, "grad_norm": 0.59765625, "learning_rate": 0.0018368810556471456, "loss": 0.2182, "step": 19496 }, { "epoch": 0.034571565605388044, "grad_norm": 1.640625, "learning_rate": 0.0018368468819638384, "loss": 0.254, "step": 19498 }, { "epoch": 0.03457511177069786, "grad_norm": 0.302734375, "learning_rate": 0.001836812705057991, "loss": 0.2093, "step": 19500 }, { "epoch": 0.03457865793600767, "grad_norm": 1.5625, "learning_rate": 0.001836778524929753, "loss": 0.2199, "step": 19502 }, { "epoch": 0.03458220410131749, "grad_norm": 0.515625, "learning_rate": 0.0018367443415792735, "loss": 0.2821, "step": 19504 }, { "epoch": 0.0345857502666273, "grad_norm": 0.31640625, "learning_rate": 0.001836710155006702, "loss": 0.1945, "step": 19506 }, { "epoch": 0.03458929643193712, "grad_norm": 0.294921875, "learning_rate": 0.0018366759652121887, "loss": 0.1777, "step": 19508 }, { "epoch": 0.03459284259724693, "grad_norm": 0.462890625, "learning_rate": 0.0018366417721958823, "loss": 0.2308, "step": 19510 }, { "epoch": 0.03459638876255675, "grad_norm": 0.4453125, "learning_rate": 0.001836607575957933, "loss": 0.2335, "step": 19512 }, { "epoch": 0.03459993492786657, "grad_norm": 1.0546875, "learning_rate": 0.0018365733764984897, "loss": 0.3872, "step": 19514 }, { "epoch": 0.03460348109317638, "grad_norm": 0.6015625, "learning_rate": 0.0018365391738177027, "loss": 0.209, "step": 19516 }, { "epoch": 0.034607027258486196, "grad_norm": 0.2734375, "learning_rate": 0.0018365049679157205, "loss": 0.2127, "step": 19518 }, { "epoch": 0.03461057342379601, "grad_norm": 0.474609375, "learning_rate": 0.0018364707587926938, "loss": 0.4515, "step": 19520 }, { "epoch": 0.034614119589105825, "grad_norm": 1.0703125, "learning_rate": 0.0018364365464487718, "loss": 0.2133, "step": 19522 }, { "epoch": 0.03461766575441564, "grad_norm": 7.46875, "learning_rate": 0.0018364023308841042, "loss": 0.3535, "step": 19524 }, { "epoch": 0.034621211919725454, "grad_norm": 0.345703125, "learning_rate": 0.0018363681120988407, "loss": 0.2294, "step": 19526 }, { "epoch": 0.03462475808503527, "grad_norm": 2.171875, "learning_rate": 0.0018363338900931306, "loss": 0.3578, "step": 19528 }, { "epoch": 0.034628304250345084, "grad_norm": 0.53515625, "learning_rate": 0.0018362996648671238, "loss": 0.2449, "step": 19530 }, { "epoch": 0.0346318504156549, "grad_norm": 0.52734375, "learning_rate": 0.0018362654364209696, "loss": 0.1809, "step": 19532 }, { "epoch": 0.03463539658096472, "grad_norm": 0.490234375, "learning_rate": 0.0018362312047548184, "loss": 0.1666, "step": 19534 }, { "epoch": 0.034638942746274534, "grad_norm": 0.58203125, "learning_rate": 0.0018361969698688194, "loss": 0.1788, "step": 19536 }, { "epoch": 0.03464248891158435, "grad_norm": 0.302734375, "learning_rate": 0.0018361627317631227, "loss": 0.1806, "step": 19538 }, { "epoch": 0.03464603507689416, "grad_norm": 0.267578125, "learning_rate": 0.0018361284904378778, "loss": 0.1997, "step": 19540 }, { "epoch": 0.03464958124220398, "grad_norm": 0.75390625, "learning_rate": 0.0018360942458932343, "loss": 0.1637, "step": 19542 }, { "epoch": 0.03465312740751379, "grad_norm": 1.1875, "learning_rate": 0.0018360599981293423, "loss": 0.2992, "step": 19544 }, { "epoch": 0.03465667357282361, "grad_norm": 0.353515625, "learning_rate": 0.0018360257471463515, "loss": 0.1982, "step": 19546 }, { "epoch": 0.03466021973813342, "grad_norm": 0.427734375, "learning_rate": 0.0018359914929444118, "loss": 0.2741, "step": 19548 }, { "epoch": 0.034663765903443236, "grad_norm": 0.984375, "learning_rate": 0.0018359572355236722, "loss": 0.3033, "step": 19550 }, { "epoch": 0.03466731206875305, "grad_norm": 0.357421875, "learning_rate": 0.0018359229748842836, "loss": 0.1823, "step": 19552 }, { "epoch": 0.034670858234062865, "grad_norm": 0.6796875, "learning_rate": 0.001835888711026395, "loss": 0.3342, "step": 19554 }, { "epoch": 0.03467440439937269, "grad_norm": 0.33984375, "learning_rate": 0.001835854443950157, "loss": 0.1951, "step": 19556 }, { "epoch": 0.0346779505646825, "grad_norm": 0.75, "learning_rate": 0.0018358201736557189, "loss": 0.1605, "step": 19558 }, { "epoch": 0.034681496729992316, "grad_norm": 0.87109375, "learning_rate": 0.001835785900143231, "loss": 0.1565, "step": 19560 }, { "epoch": 0.03468504289530213, "grad_norm": 1.3515625, "learning_rate": 0.0018357516234128429, "loss": 0.2595, "step": 19562 }, { "epoch": 0.034688589060611945, "grad_norm": 0.44140625, "learning_rate": 0.0018357173434647043, "loss": 0.2521, "step": 19564 }, { "epoch": 0.03469213522592176, "grad_norm": 1.5078125, "learning_rate": 0.0018356830602989658, "loss": 0.1983, "step": 19566 }, { "epoch": 0.034695681391231574, "grad_norm": 0.77734375, "learning_rate": 0.0018356487739157768, "loss": 0.2048, "step": 19568 }, { "epoch": 0.03469922755654139, "grad_norm": 3.40625, "learning_rate": 0.0018356144843152873, "loss": 0.2228, "step": 19570 }, { "epoch": 0.0347027737218512, "grad_norm": 0.72265625, "learning_rate": 0.0018355801914976472, "loss": 0.2046, "step": 19572 }, { "epoch": 0.03470631988716102, "grad_norm": 0.486328125, "learning_rate": 0.0018355458954630068, "loss": 0.2267, "step": 19574 }, { "epoch": 0.03470986605247083, "grad_norm": 0.59375, "learning_rate": 0.0018355115962115159, "loss": 0.2311, "step": 19576 }, { "epoch": 0.03471341221778065, "grad_norm": 1.2265625, "learning_rate": 0.0018354772937433247, "loss": 0.2378, "step": 19578 }, { "epoch": 0.03471695838309047, "grad_norm": 0.4375, "learning_rate": 0.0018354429880585825, "loss": 0.2148, "step": 19580 }, { "epoch": 0.03472050454840028, "grad_norm": 0.265625, "learning_rate": 0.0018354086791574405, "loss": 0.2161, "step": 19582 }, { "epoch": 0.0347240507137101, "grad_norm": 0.373046875, "learning_rate": 0.0018353743670400475, "loss": 0.227, "step": 19584 }, { "epoch": 0.03472759687901991, "grad_norm": 1.3671875, "learning_rate": 0.0018353400517065546, "loss": 0.3436, "step": 19586 }, { "epoch": 0.034731143044329726, "grad_norm": 0.412109375, "learning_rate": 0.0018353057331571112, "loss": 0.2, "step": 19588 }, { "epoch": 0.03473468920963954, "grad_norm": 0.392578125, "learning_rate": 0.0018352714113918681, "loss": 0.209, "step": 19590 }, { "epoch": 0.034738235374949356, "grad_norm": 0.53515625, "learning_rate": 0.0018352370864109747, "loss": 0.1817, "step": 19592 }, { "epoch": 0.03474178154025917, "grad_norm": 0.4765625, "learning_rate": 0.0018352027582145812, "loss": 0.1742, "step": 19594 }, { "epoch": 0.034745327705568985, "grad_norm": 0.71484375, "learning_rate": 0.0018351684268028377, "loss": 0.339, "step": 19596 }, { "epoch": 0.0347488738708788, "grad_norm": 0.57421875, "learning_rate": 0.0018351340921758947, "loss": 0.2354, "step": 19598 }, { "epoch": 0.034752420036188614, "grad_norm": 0.427734375, "learning_rate": 0.0018350997543339026, "loss": 0.2815, "step": 19600 }, { "epoch": 0.034755966201498435, "grad_norm": 2.453125, "learning_rate": 0.0018350654132770108, "loss": 0.3986, "step": 19602 }, { "epoch": 0.03475951236680825, "grad_norm": 0.5234375, "learning_rate": 0.0018350310690053698, "loss": 0.1667, "step": 19604 }, { "epoch": 0.034763058532118064, "grad_norm": 0.369140625, "learning_rate": 0.0018349967215191302, "loss": 0.351, "step": 19606 }, { "epoch": 0.03476660469742788, "grad_norm": 0.412109375, "learning_rate": 0.0018349623708184416, "loss": 0.1907, "step": 19608 }, { "epoch": 0.03477015086273769, "grad_norm": 0.44921875, "learning_rate": 0.0018349280169034545, "loss": 0.1646, "step": 19610 }, { "epoch": 0.03477369702804751, "grad_norm": 0.39453125, "learning_rate": 0.0018348936597743194, "loss": 0.2475, "step": 19612 }, { "epoch": 0.03477724319335732, "grad_norm": 0.82421875, "learning_rate": 0.001834859299431186, "loss": 0.1876, "step": 19614 }, { "epoch": 0.03478078935866714, "grad_norm": 0.4140625, "learning_rate": 0.0018348249358742053, "loss": 0.237, "step": 19616 }, { "epoch": 0.03478433552397695, "grad_norm": 1.7421875, "learning_rate": 0.0018347905691035267, "loss": 0.2735, "step": 19618 }, { "epoch": 0.034787881689286766, "grad_norm": 0.56640625, "learning_rate": 0.0018347561991193012, "loss": 0.2037, "step": 19620 }, { "epoch": 0.03479142785459658, "grad_norm": 0.7734375, "learning_rate": 0.0018347218259216786, "loss": 0.2271, "step": 19622 }, { "epoch": 0.0347949740199064, "grad_norm": 0.9375, "learning_rate": 0.0018346874495108099, "loss": 0.258, "step": 19624 }, { "epoch": 0.03479852018521622, "grad_norm": 1.3671875, "learning_rate": 0.001834653069886845, "loss": 0.248, "step": 19626 }, { "epoch": 0.03480206635052603, "grad_norm": 0.69921875, "learning_rate": 0.0018346186870499342, "loss": 0.1886, "step": 19628 }, { "epoch": 0.034805612515835846, "grad_norm": 1.53125, "learning_rate": 0.0018345843010002278, "loss": 0.3499, "step": 19630 }, { "epoch": 0.03480915868114566, "grad_norm": 0.71875, "learning_rate": 0.0018345499117378766, "loss": 0.2092, "step": 19632 }, { "epoch": 0.034812704846455475, "grad_norm": 0.59765625, "learning_rate": 0.0018345155192630307, "loss": 0.1688, "step": 19634 }, { "epoch": 0.03481625101176529, "grad_norm": 0.66796875, "learning_rate": 0.0018344811235758408, "loss": 0.2282, "step": 19636 }, { "epoch": 0.034819797177075104, "grad_norm": 1.1875, "learning_rate": 0.0018344467246764569, "loss": 0.2666, "step": 19638 }, { "epoch": 0.03482334334238492, "grad_norm": 0.66015625, "learning_rate": 0.0018344123225650295, "loss": 0.261, "step": 19640 }, { "epoch": 0.03482688950769473, "grad_norm": 0.494140625, "learning_rate": 0.0018343779172417091, "loss": 0.1862, "step": 19642 }, { "epoch": 0.03483043567300455, "grad_norm": 2.1875, "learning_rate": 0.0018343435087066467, "loss": 0.2536, "step": 19644 }, { "epoch": 0.03483398183831436, "grad_norm": 0.28125, "learning_rate": 0.001834309096959992, "loss": 0.1949, "step": 19646 }, { "epoch": 0.034837528003624184, "grad_norm": 0.5703125, "learning_rate": 0.0018342746820018963, "loss": 0.1547, "step": 19648 }, { "epoch": 0.034841074168934, "grad_norm": 0.28515625, "learning_rate": 0.0018342402638325092, "loss": 0.196, "step": 19650 }, { "epoch": 0.03484462033424381, "grad_norm": 1.03125, "learning_rate": 0.001834205842451982, "loss": 0.145, "step": 19652 }, { "epoch": 0.03484816649955363, "grad_norm": 0.32421875, "learning_rate": 0.0018341714178604647, "loss": 0.1616, "step": 19654 }, { "epoch": 0.03485171266486344, "grad_norm": 0.263671875, "learning_rate": 0.0018341369900581079, "loss": 0.2392, "step": 19656 }, { "epoch": 0.03485525883017326, "grad_norm": 0.88671875, "learning_rate": 0.001834102559045063, "loss": 0.3104, "step": 19658 }, { "epoch": 0.03485880499548307, "grad_norm": 0.71484375, "learning_rate": 0.0018340681248214792, "loss": 0.198, "step": 19660 }, { "epoch": 0.034862351160792886, "grad_norm": 0.40625, "learning_rate": 0.0018340336873875083, "loss": 0.1792, "step": 19662 }, { "epoch": 0.0348658973261027, "grad_norm": 0.578125, "learning_rate": 0.0018339992467433003, "loss": 0.292, "step": 19664 }, { "epoch": 0.034869443491412515, "grad_norm": 0.66015625, "learning_rate": 0.001833964802889006, "loss": 0.1743, "step": 19666 }, { "epoch": 0.03487298965672233, "grad_norm": 0.181640625, "learning_rate": 0.0018339303558247758, "loss": 0.1609, "step": 19668 }, { "epoch": 0.03487653582203215, "grad_norm": 1.5625, "learning_rate": 0.0018338959055507606, "loss": 0.2623, "step": 19670 }, { "epoch": 0.034880081987341965, "grad_norm": 0.216796875, "learning_rate": 0.0018338614520671112, "loss": 0.1844, "step": 19672 }, { "epoch": 0.03488362815265178, "grad_norm": 0.56640625, "learning_rate": 0.0018338269953739777, "loss": 0.255, "step": 19674 }, { "epoch": 0.034887174317961595, "grad_norm": 1.1640625, "learning_rate": 0.0018337925354715114, "loss": 0.2012, "step": 19676 }, { "epoch": 0.03489072048327141, "grad_norm": 1.25, "learning_rate": 0.0018337580723598631, "loss": 0.1693, "step": 19678 }, { "epoch": 0.034894266648581224, "grad_norm": 4.0625, "learning_rate": 0.0018337236060391826, "loss": 0.5154, "step": 19680 }, { "epoch": 0.03489781281389104, "grad_norm": 0.408203125, "learning_rate": 0.0018336891365096216, "loss": 0.2416, "step": 19682 }, { "epoch": 0.03490135897920085, "grad_norm": 0.357421875, "learning_rate": 0.0018336546637713305, "loss": 0.2266, "step": 19684 }, { "epoch": 0.03490490514451067, "grad_norm": 0.36328125, "learning_rate": 0.00183362018782446, "loss": 0.2055, "step": 19686 }, { "epoch": 0.03490845130982048, "grad_norm": 1.3828125, "learning_rate": 0.001833585708669161, "loss": 0.256, "step": 19688 }, { "epoch": 0.034911997475130296, "grad_norm": 0.1875, "learning_rate": 0.0018335512263055842, "loss": 0.1809, "step": 19690 }, { "epoch": 0.03491554364044012, "grad_norm": 0.6015625, "learning_rate": 0.0018335167407338804, "loss": 0.1392, "step": 19692 }, { "epoch": 0.03491908980574993, "grad_norm": 1.1171875, "learning_rate": 0.0018334822519542006, "loss": 0.2004, "step": 19694 }, { "epoch": 0.03492263597105975, "grad_norm": 1.6484375, "learning_rate": 0.0018334477599666952, "loss": 0.2829, "step": 19696 }, { "epoch": 0.03492618213636956, "grad_norm": 0.419921875, "learning_rate": 0.0018334132647715156, "loss": 0.1818, "step": 19698 }, { "epoch": 0.034929728301679376, "grad_norm": 2.515625, "learning_rate": 0.0018333787663688123, "loss": 0.3024, "step": 19700 }, { "epoch": 0.03493327446698919, "grad_norm": 1.2421875, "learning_rate": 0.0018333442647587362, "loss": 0.2631, "step": 19702 }, { "epoch": 0.034936820632299005, "grad_norm": 0.9140625, "learning_rate": 0.001833309759941438, "loss": 0.1393, "step": 19704 }, { "epoch": 0.03494036679760882, "grad_norm": 0.98046875, "learning_rate": 0.0018332752519170693, "loss": 0.2472, "step": 19706 }, { "epoch": 0.034943912962918634, "grad_norm": 2.5, "learning_rate": 0.0018332407406857806, "loss": 0.2105, "step": 19708 }, { "epoch": 0.03494745912822845, "grad_norm": 1.140625, "learning_rate": 0.0018332062262477228, "loss": 0.1693, "step": 19710 }, { "epoch": 0.03495100529353826, "grad_norm": 1.15625, "learning_rate": 0.0018331717086030467, "loss": 0.1999, "step": 19712 }, { "epoch": 0.03495455145884808, "grad_norm": 1.8828125, "learning_rate": 0.0018331371877519033, "loss": 0.1967, "step": 19714 }, { "epoch": 0.0349580976241579, "grad_norm": 0.5390625, "learning_rate": 0.0018331026636944439, "loss": 0.2034, "step": 19716 }, { "epoch": 0.034961643789467714, "grad_norm": 1.140625, "learning_rate": 0.0018330681364308195, "loss": 0.1513, "step": 19718 }, { "epoch": 0.03496518995477753, "grad_norm": 0.205078125, "learning_rate": 0.0018330336059611805, "loss": 0.1445, "step": 19720 }, { "epoch": 0.03496873612008734, "grad_norm": 0.80078125, "learning_rate": 0.0018329990722856786, "loss": 0.2039, "step": 19722 }, { "epoch": 0.03497228228539716, "grad_norm": 1.234375, "learning_rate": 0.001832964535404464, "loss": 0.3179, "step": 19724 }, { "epoch": 0.03497582845070697, "grad_norm": 0.361328125, "learning_rate": 0.001832929995317689, "loss": 0.292, "step": 19726 }, { "epoch": 0.03497937461601679, "grad_norm": 0.3984375, "learning_rate": 0.0018328954520255034, "loss": 0.1945, "step": 19728 }, { "epoch": 0.0349829207813266, "grad_norm": 0.51953125, "learning_rate": 0.0018328609055280592, "loss": 0.2892, "step": 19730 }, { "epoch": 0.034986466946636416, "grad_norm": 0.400390625, "learning_rate": 0.0018328263558255067, "loss": 0.1896, "step": 19732 }, { "epoch": 0.03499001311194623, "grad_norm": 0.1904296875, "learning_rate": 0.0018327918029179976, "loss": 0.1612, "step": 19734 }, { "epoch": 0.034993559277256045, "grad_norm": 0.470703125, "learning_rate": 0.0018327572468056828, "loss": 0.2013, "step": 19736 }, { "epoch": 0.034997105442565866, "grad_norm": 0.32421875, "learning_rate": 0.0018327226874887135, "loss": 0.1796, "step": 19738 }, { "epoch": 0.03500065160787568, "grad_norm": 0.302734375, "learning_rate": 0.0018326881249672408, "loss": 0.2448, "step": 19740 }, { "epoch": 0.035004197773185496, "grad_norm": 0.5859375, "learning_rate": 0.0018326535592414156, "loss": 0.1752, "step": 19742 }, { "epoch": 0.03500774393849531, "grad_norm": 0.4140625, "learning_rate": 0.0018326189903113894, "loss": 0.216, "step": 19744 }, { "epoch": 0.035011290103805125, "grad_norm": 8.375, "learning_rate": 0.0018325844181773135, "loss": 0.2106, "step": 19746 }, { "epoch": 0.03501483626911494, "grad_norm": 0.64453125, "learning_rate": 0.0018325498428393388, "loss": 0.2093, "step": 19748 }, { "epoch": 0.035018382434424754, "grad_norm": 0.59765625, "learning_rate": 0.0018325152642976166, "loss": 0.2355, "step": 19750 }, { "epoch": 0.03502192859973457, "grad_norm": 6.6875, "learning_rate": 0.001832480682552298, "loss": 0.3573, "step": 19752 }, { "epoch": 0.03502547476504438, "grad_norm": 0.466796875, "learning_rate": 0.0018324460976035344, "loss": 0.3623, "step": 19754 }, { "epoch": 0.0350290209303542, "grad_norm": 0.400390625, "learning_rate": 0.001832411509451477, "loss": 0.1807, "step": 19756 }, { "epoch": 0.03503256709566401, "grad_norm": 0.75, "learning_rate": 0.0018323769180962773, "loss": 0.22, "step": 19758 }, { "epoch": 0.035036113260973833, "grad_norm": 2.140625, "learning_rate": 0.0018323423235380863, "loss": 0.4485, "step": 19760 }, { "epoch": 0.03503965942628365, "grad_norm": 1.7421875, "learning_rate": 0.0018323077257770555, "loss": 0.3063, "step": 19762 }, { "epoch": 0.03504320559159346, "grad_norm": 0.86328125, "learning_rate": 0.0018322731248133356, "loss": 0.1914, "step": 19764 }, { "epoch": 0.03504675175690328, "grad_norm": 1.0078125, "learning_rate": 0.0018322385206470788, "loss": 0.2252, "step": 19766 }, { "epoch": 0.03505029792221309, "grad_norm": 0.515625, "learning_rate": 0.001832203913278436, "loss": 0.1631, "step": 19768 }, { "epoch": 0.035053844087522906, "grad_norm": 1.328125, "learning_rate": 0.0018321693027075587, "loss": 0.2854, "step": 19770 }, { "epoch": 0.03505739025283272, "grad_norm": 0.40625, "learning_rate": 0.001832134688934598, "loss": 0.191, "step": 19772 }, { "epoch": 0.035060936418142535, "grad_norm": 0.83984375, "learning_rate": 0.0018321000719597055, "loss": 0.2428, "step": 19774 }, { "epoch": 0.03506448258345235, "grad_norm": 0.921875, "learning_rate": 0.0018320654517830323, "loss": 0.197, "step": 19776 }, { "epoch": 0.035068028748762164, "grad_norm": 1.0234375, "learning_rate": 0.0018320308284047303, "loss": 0.1741, "step": 19778 }, { "epoch": 0.03507157491407198, "grad_norm": 0.73828125, "learning_rate": 0.0018319962018249504, "loss": 0.2697, "step": 19780 }, { "epoch": 0.035075121079381794, "grad_norm": 4.09375, "learning_rate": 0.0018319615720438448, "loss": 0.2906, "step": 19782 }, { "epoch": 0.035078667244691615, "grad_norm": 0.443359375, "learning_rate": 0.0018319269390615642, "loss": 0.2275, "step": 19784 }, { "epoch": 0.03508221341000143, "grad_norm": 0.443359375, "learning_rate": 0.0018318923028782603, "loss": 0.189, "step": 19786 }, { "epoch": 0.035085759575311244, "grad_norm": 0.6484375, "learning_rate": 0.0018318576634940844, "loss": 0.2052, "step": 19788 }, { "epoch": 0.03508930574062106, "grad_norm": 0.375, "learning_rate": 0.0018318230209091887, "loss": 0.4515, "step": 19790 }, { "epoch": 0.03509285190593087, "grad_norm": 1.8125, "learning_rate": 0.0018317883751237237, "loss": 0.2496, "step": 19792 }, { "epoch": 0.03509639807124069, "grad_norm": 0.359375, "learning_rate": 0.0018317537261378417, "loss": 0.1834, "step": 19794 }, { "epoch": 0.0350999442365505, "grad_norm": 0.578125, "learning_rate": 0.0018317190739516938, "loss": 0.4138, "step": 19796 }, { "epoch": 0.03510349040186032, "grad_norm": 0.3203125, "learning_rate": 0.0018316844185654318, "loss": 0.2385, "step": 19798 }, { "epoch": 0.03510703656717013, "grad_norm": 0.7265625, "learning_rate": 0.001831649759979207, "loss": 0.2178, "step": 19800 }, { "epoch": 0.035110582732479946, "grad_norm": 0.25, "learning_rate": 0.0018316150981931715, "loss": 0.1591, "step": 19802 }, { "epoch": 0.03511412889778976, "grad_norm": 0.458984375, "learning_rate": 0.0018315804332074762, "loss": 0.2301, "step": 19804 }, { "epoch": 0.03511767506309958, "grad_norm": 0.67578125, "learning_rate": 0.0018315457650222731, "loss": 0.2197, "step": 19806 }, { "epoch": 0.0351212212284094, "grad_norm": 0.671875, "learning_rate": 0.001831511093637714, "loss": 0.2655, "step": 19808 }, { "epoch": 0.03512476739371921, "grad_norm": 1.40625, "learning_rate": 0.00183147641905395, "loss": 0.231, "step": 19810 }, { "epoch": 0.035128313559029026, "grad_norm": 0.83984375, "learning_rate": 0.001831441741271133, "loss": 0.2396, "step": 19812 }, { "epoch": 0.03513185972433884, "grad_norm": 0.52734375, "learning_rate": 0.001831407060289415, "loss": 0.1695, "step": 19814 }, { "epoch": 0.035135405889648655, "grad_norm": 3.0625, "learning_rate": 0.001831372376108947, "loss": 0.2298, "step": 19816 }, { "epoch": 0.03513895205495847, "grad_norm": 0.431640625, "learning_rate": 0.0018313376887298817, "loss": 0.342, "step": 19818 }, { "epoch": 0.035142498220268284, "grad_norm": 0.5703125, "learning_rate": 0.0018313029981523695, "loss": 0.2779, "step": 19820 }, { "epoch": 0.0351460443855781, "grad_norm": 0.40625, "learning_rate": 0.001831268304376563, "loss": 0.1999, "step": 19822 }, { "epoch": 0.03514959055088791, "grad_norm": 0.60546875, "learning_rate": 0.0018312336074026137, "loss": 0.2246, "step": 19824 }, { "epoch": 0.03515313671619773, "grad_norm": 0.63671875, "learning_rate": 0.0018311989072306734, "loss": 0.1251, "step": 19826 }, { "epoch": 0.03515668288150755, "grad_norm": 0.38671875, "learning_rate": 0.0018311642038608938, "loss": 0.2344, "step": 19828 }, { "epoch": 0.035160229046817364, "grad_norm": 0.328125, "learning_rate": 0.0018311294972934266, "loss": 0.1566, "step": 19830 }, { "epoch": 0.03516377521212718, "grad_norm": 1.53125, "learning_rate": 0.0018310947875284239, "loss": 0.3336, "step": 19832 }, { "epoch": 0.03516732137743699, "grad_norm": 1.0859375, "learning_rate": 0.0018310600745660367, "loss": 0.307, "step": 19834 }, { "epoch": 0.03517086754274681, "grad_norm": 0.5078125, "learning_rate": 0.001831025358406418, "loss": 0.2103, "step": 19836 }, { "epoch": 0.03517441370805662, "grad_norm": 4.9375, "learning_rate": 0.0018309906390497187, "loss": 0.2092, "step": 19838 }, { "epoch": 0.035177959873366436, "grad_norm": 0.453125, "learning_rate": 0.0018309559164960913, "loss": 0.2123, "step": 19840 }, { "epoch": 0.03518150603867625, "grad_norm": 0.474609375, "learning_rate": 0.001830921190745687, "loss": 0.3171, "step": 19842 }, { "epoch": 0.035185052203986066, "grad_norm": 0.310546875, "learning_rate": 0.0018308864617986582, "loss": 0.2297, "step": 19844 }, { "epoch": 0.03518859836929588, "grad_norm": 0.76171875, "learning_rate": 0.001830851729655156, "loss": 0.2246, "step": 19846 }, { "epoch": 0.035192144534605695, "grad_norm": 1.828125, "learning_rate": 0.0018308169943153335, "loss": 0.286, "step": 19848 }, { "epoch": 0.03519569069991551, "grad_norm": 0.73828125, "learning_rate": 0.0018307822557793417, "loss": 0.1627, "step": 19850 }, { "epoch": 0.03519923686522533, "grad_norm": 0.62890625, "learning_rate": 0.0018307475140473329, "loss": 0.2125, "step": 19852 }, { "epoch": 0.035202783030535145, "grad_norm": 1.28125, "learning_rate": 0.0018307127691194588, "loss": 0.191, "step": 19854 }, { "epoch": 0.03520632919584496, "grad_norm": 0.57421875, "learning_rate": 0.0018306780209958718, "loss": 0.2788, "step": 19856 }, { "epoch": 0.035209875361154774, "grad_norm": 0.70703125, "learning_rate": 0.0018306432696767233, "loss": 0.1968, "step": 19858 }, { "epoch": 0.03521342152646459, "grad_norm": 0.494140625, "learning_rate": 0.0018306085151621656, "loss": 0.1506, "step": 19860 }, { "epoch": 0.0352169676917744, "grad_norm": 1.8203125, "learning_rate": 0.0018305737574523505, "loss": 0.2261, "step": 19862 }, { "epoch": 0.03522051385708422, "grad_norm": 0.478515625, "learning_rate": 0.0018305389965474303, "loss": 0.1966, "step": 19864 }, { "epoch": 0.03522406002239403, "grad_norm": 3.4375, "learning_rate": 0.001830504232447557, "loss": 0.227, "step": 19866 }, { "epoch": 0.03522760618770385, "grad_norm": 2.484375, "learning_rate": 0.0018304694651528823, "loss": 0.3059, "step": 19868 }, { "epoch": 0.03523115235301366, "grad_norm": 0.5625, "learning_rate": 0.0018304346946635586, "loss": 0.1407, "step": 19870 }, { "epoch": 0.035234698518323476, "grad_norm": 0.3046875, "learning_rate": 0.0018303999209797377, "loss": 0.2046, "step": 19872 }, { "epoch": 0.0352382446836333, "grad_norm": 0.59765625, "learning_rate": 0.0018303651441015719, "loss": 0.1782, "step": 19874 }, { "epoch": 0.03524179084894311, "grad_norm": 0.46875, "learning_rate": 0.0018303303640292133, "loss": 0.2057, "step": 19876 }, { "epoch": 0.03524533701425293, "grad_norm": 1.5703125, "learning_rate": 0.0018302955807628137, "loss": 0.2064, "step": 19878 }, { "epoch": 0.03524888317956274, "grad_norm": 1.3203125, "learning_rate": 0.0018302607943025259, "loss": 0.2405, "step": 19880 }, { "epoch": 0.035252429344872556, "grad_norm": 2.140625, "learning_rate": 0.0018302260046485009, "loss": 0.2072, "step": 19882 }, { "epoch": 0.03525597551018237, "grad_norm": 2.546875, "learning_rate": 0.0018301912118008918, "loss": 0.389, "step": 19884 }, { "epoch": 0.035259521675492185, "grad_norm": 0.34765625, "learning_rate": 0.0018301564157598507, "loss": 0.2596, "step": 19886 }, { "epoch": 0.035263067840802, "grad_norm": 1.3671875, "learning_rate": 0.0018301216165255293, "loss": 0.1424, "step": 19888 }, { "epoch": 0.035266614006111814, "grad_norm": 0.9375, "learning_rate": 0.0018300868140980803, "loss": 0.2065, "step": 19890 }, { "epoch": 0.03527016017142163, "grad_norm": 0.8203125, "learning_rate": 0.001830052008477655, "loss": 0.2469, "step": 19892 }, { "epoch": 0.03527370633673144, "grad_norm": 1.015625, "learning_rate": 0.0018300171996644072, "loss": 0.2107, "step": 19894 }, { "epoch": 0.035277252502041265, "grad_norm": 0.4453125, "learning_rate": 0.0018299823876584875, "loss": 0.1964, "step": 19896 }, { "epoch": 0.03528079866735108, "grad_norm": 0.310546875, "learning_rate": 0.0018299475724600492, "loss": 0.1779, "step": 19898 }, { "epoch": 0.035284344832660894, "grad_norm": 0.9296875, "learning_rate": 0.0018299127540692445, "loss": 0.2099, "step": 19900 }, { "epoch": 0.03528789099797071, "grad_norm": 0.83984375, "learning_rate": 0.0018298779324862249, "loss": 0.2107, "step": 19902 }, { "epoch": 0.03529143716328052, "grad_norm": 0.6328125, "learning_rate": 0.0018298431077111432, "loss": 0.2127, "step": 19904 }, { "epoch": 0.03529498332859034, "grad_norm": 0.65234375, "learning_rate": 0.001829808279744152, "loss": 0.3013, "step": 19906 }, { "epoch": 0.03529852949390015, "grad_norm": 0.92578125, "learning_rate": 0.0018297734485854032, "loss": 0.2168, "step": 19908 }, { "epoch": 0.03530207565920997, "grad_norm": 0.421875, "learning_rate": 0.0018297386142350493, "loss": 0.2195, "step": 19910 }, { "epoch": 0.03530562182451978, "grad_norm": 0.28515625, "learning_rate": 0.0018297037766932425, "loss": 0.2853, "step": 19912 }, { "epoch": 0.035309167989829596, "grad_norm": 0.40234375, "learning_rate": 0.0018296689359601351, "loss": 0.2064, "step": 19914 }, { "epoch": 0.03531271415513941, "grad_norm": 0.46875, "learning_rate": 0.00182963409203588, "loss": 0.2045, "step": 19916 }, { "epoch": 0.035316260320449225, "grad_norm": 0.97265625, "learning_rate": 0.0018295992449206286, "loss": 0.2106, "step": 19918 }, { "epoch": 0.035319806485759046, "grad_norm": 0.38671875, "learning_rate": 0.0018295643946145343, "loss": 0.1845, "step": 19920 }, { "epoch": 0.03532335265106886, "grad_norm": 0.43359375, "learning_rate": 0.001829529541117749, "loss": 0.1987, "step": 19922 }, { "epoch": 0.035326898816378675, "grad_norm": 0.83984375, "learning_rate": 0.0018294946844304254, "loss": 0.2617, "step": 19924 }, { "epoch": 0.03533044498168849, "grad_norm": 2.25, "learning_rate": 0.0018294598245527157, "loss": 0.3284, "step": 19926 }, { "epoch": 0.035333991146998305, "grad_norm": 0.3125, "learning_rate": 0.0018294249614847727, "loss": 0.2725, "step": 19928 }, { "epoch": 0.03533753731230812, "grad_norm": 2.890625, "learning_rate": 0.0018293900952267483, "loss": 0.1963, "step": 19930 }, { "epoch": 0.035341083477617934, "grad_norm": 0.56640625, "learning_rate": 0.0018293552257787954, "loss": 0.2567, "step": 19932 }, { "epoch": 0.03534462964292775, "grad_norm": 1.015625, "learning_rate": 0.0018293203531410663, "loss": 0.274, "step": 19934 }, { "epoch": 0.03534817580823756, "grad_norm": 3.40625, "learning_rate": 0.0018292854773137137, "loss": 0.2324, "step": 19936 }, { "epoch": 0.03535172197354738, "grad_norm": 2.484375, "learning_rate": 0.00182925059829689, "loss": 0.4741, "step": 19938 }, { "epoch": 0.03535526813885719, "grad_norm": 0.6875, "learning_rate": 0.0018292157160907478, "loss": 0.2537, "step": 19940 }, { "epoch": 0.03535881430416701, "grad_norm": 1.3046875, "learning_rate": 0.0018291808306954396, "loss": 0.2346, "step": 19942 }, { "epoch": 0.03536236046947683, "grad_norm": 0.47265625, "learning_rate": 0.001829145942111118, "loss": 0.1706, "step": 19944 }, { "epoch": 0.03536590663478664, "grad_norm": 0.6640625, "learning_rate": 0.0018291110503379359, "loss": 0.208, "step": 19946 }, { "epoch": 0.03536945280009646, "grad_norm": 0.66796875, "learning_rate": 0.0018290761553760454, "loss": 0.2336, "step": 19948 }, { "epoch": 0.03537299896540627, "grad_norm": 0.49609375, "learning_rate": 0.0018290412572255993, "loss": 0.2171, "step": 19950 }, { "epoch": 0.035376545130716086, "grad_norm": 0.83203125, "learning_rate": 0.00182900635588675, "loss": 0.192, "step": 19952 }, { "epoch": 0.0353800912960259, "grad_norm": 1.3125, "learning_rate": 0.0018289714513596502, "loss": 0.3025, "step": 19954 }, { "epoch": 0.035383637461335715, "grad_norm": 0.50390625, "learning_rate": 0.0018289365436444532, "loss": 0.3591, "step": 19956 }, { "epoch": 0.03538718362664553, "grad_norm": 0.466796875, "learning_rate": 0.0018289016327413111, "loss": 0.28, "step": 19958 }, { "epoch": 0.035390729791955344, "grad_norm": 0.5078125, "learning_rate": 0.0018288667186503764, "loss": 0.1875, "step": 19960 }, { "epoch": 0.03539427595726516, "grad_norm": 0.451171875, "learning_rate": 0.0018288318013718024, "loss": 0.1637, "step": 19962 }, { "epoch": 0.03539782212257498, "grad_norm": 4.0, "learning_rate": 0.001828796880905741, "loss": 0.4094, "step": 19964 }, { "epoch": 0.035401368287884795, "grad_norm": 0.59765625, "learning_rate": 0.0018287619572523457, "loss": 0.2104, "step": 19966 }, { "epoch": 0.03540491445319461, "grad_norm": 0.44140625, "learning_rate": 0.0018287270304117688, "loss": 0.2464, "step": 19968 }, { "epoch": 0.035408460618504424, "grad_norm": 0.43359375, "learning_rate": 0.0018286921003841633, "loss": 0.2715, "step": 19970 }, { "epoch": 0.03541200678381424, "grad_norm": 0.6328125, "learning_rate": 0.0018286571671696818, "loss": 0.2143, "step": 19972 }, { "epoch": 0.03541555294912405, "grad_norm": 4.0625, "learning_rate": 0.0018286222307684773, "loss": 0.2641, "step": 19974 }, { "epoch": 0.03541909911443387, "grad_norm": 0.3515625, "learning_rate": 0.0018285872911807017, "loss": 0.2092, "step": 19976 }, { "epoch": 0.03542264527974368, "grad_norm": 0.34375, "learning_rate": 0.0018285523484065092, "loss": 0.2434, "step": 19978 }, { "epoch": 0.0354261914450535, "grad_norm": 0.87890625, "learning_rate": 0.0018285174024460513, "loss": 0.1773, "step": 19980 }, { "epoch": 0.03542973761036331, "grad_norm": 1.453125, "learning_rate": 0.001828482453299482, "loss": 0.5136, "step": 19982 }, { "epoch": 0.035433283775673126, "grad_norm": 0.90625, "learning_rate": 0.0018284475009669537, "loss": 0.1881, "step": 19984 }, { "epoch": 0.03543682994098294, "grad_norm": 0.44140625, "learning_rate": 0.0018284125454486185, "loss": 0.3386, "step": 19986 }, { "epoch": 0.03544037610629276, "grad_norm": 0.54296875, "learning_rate": 0.0018283775867446305, "loss": 0.1808, "step": 19988 }, { "epoch": 0.035443922271602576, "grad_norm": 0.515625, "learning_rate": 0.0018283426248551417, "loss": 0.1709, "step": 19990 }, { "epoch": 0.03544746843691239, "grad_norm": 0.78515625, "learning_rate": 0.0018283076597803054, "loss": 0.2594, "step": 19992 }, { "epoch": 0.035451014602222206, "grad_norm": 0.384765625, "learning_rate": 0.0018282726915202748, "loss": 0.1844, "step": 19994 }, { "epoch": 0.03545456076753202, "grad_norm": 0.69921875, "learning_rate": 0.001828237720075202, "loss": 0.2046, "step": 19996 }, { "epoch": 0.035458106932841835, "grad_norm": 0.625, "learning_rate": 0.0018282027454452408, "loss": 0.1854, "step": 19998 }, { "epoch": 0.03546165309815165, "grad_norm": 0.53515625, "learning_rate": 0.0018281677676305434, "loss": 0.1838, "step": 20000 }, { "epoch": 0.035465199263461464, "grad_norm": 0.24609375, "learning_rate": 0.0018281327866312635, "loss": 0.2353, "step": 20002 }, { "epoch": 0.03546874542877128, "grad_norm": 0.341796875, "learning_rate": 0.0018280978024475538, "loss": 0.1868, "step": 20004 }, { "epoch": 0.03547229159408109, "grad_norm": 0.447265625, "learning_rate": 0.001828062815079567, "loss": 0.2402, "step": 20006 }, { "epoch": 0.03547583775939091, "grad_norm": 0.5546875, "learning_rate": 0.0018280278245274562, "loss": 0.1438, "step": 20008 }, { "epoch": 0.03547938392470073, "grad_norm": 1.3984375, "learning_rate": 0.0018279928307913748, "loss": 0.2108, "step": 20010 }, { "epoch": 0.035482930090010543, "grad_norm": 1.21875, "learning_rate": 0.0018279578338714758, "loss": 0.2302, "step": 20012 }, { "epoch": 0.03548647625532036, "grad_norm": 0.376953125, "learning_rate": 0.0018279228337679118, "loss": 0.2388, "step": 20014 }, { "epoch": 0.03549002242063017, "grad_norm": 1.0625, "learning_rate": 0.0018278878304808363, "loss": 0.2008, "step": 20016 }, { "epoch": 0.03549356858593999, "grad_norm": 1.0703125, "learning_rate": 0.001827852824010402, "loss": 0.184, "step": 20018 }, { "epoch": 0.0354971147512498, "grad_norm": 0.443359375, "learning_rate": 0.0018278178143567626, "loss": 0.2505, "step": 20020 }, { "epoch": 0.035500660916559616, "grad_norm": 0.78515625, "learning_rate": 0.0018277828015200706, "loss": 0.2271, "step": 20022 }, { "epoch": 0.03550420708186943, "grad_norm": 0.353515625, "learning_rate": 0.0018277477855004793, "loss": 0.1955, "step": 20024 }, { "epoch": 0.035507753247179245, "grad_norm": 0.375, "learning_rate": 0.0018277127662981421, "loss": 0.1941, "step": 20026 }, { "epoch": 0.03551129941248906, "grad_norm": 0.4296875, "learning_rate": 0.001827677743913212, "loss": 0.1778, "step": 20028 }, { "epoch": 0.035514845577798874, "grad_norm": 1.0390625, "learning_rate": 0.001827642718345842, "loss": 0.2736, "step": 20030 }, { "epoch": 0.035518391743108696, "grad_norm": 0.40625, "learning_rate": 0.0018276076895961855, "loss": 0.2164, "step": 20032 }, { "epoch": 0.03552193790841851, "grad_norm": 2.828125, "learning_rate": 0.0018275726576643956, "loss": 0.237, "step": 20034 }, { "epoch": 0.035525484073728325, "grad_norm": 0.5390625, "learning_rate": 0.0018275376225506256, "loss": 0.2322, "step": 20036 }, { "epoch": 0.03552903023903814, "grad_norm": 1.234375, "learning_rate": 0.0018275025842550284, "loss": 0.2477, "step": 20038 }, { "epoch": 0.035532576404347954, "grad_norm": 0.447265625, "learning_rate": 0.0018274675427777576, "loss": 0.1645, "step": 20040 }, { "epoch": 0.03553612256965777, "grad_norm": 0.5546875, "learning_rate": 0.0018274324981189664, "loss": 0.2012, "step": 20042 }, { "epoch": 0.03553966873496758, "grad_norm": 0.259765625, "learning_rate": 0.0018273974502788079, "loss": 0.2016, "step": 20044 }, { "epoch": 0.0355432149002774, "grad_norm": 0.7421875, "learning_rate": 0.0018273623992574358, "loss": 0.2397, "step": 20046 }, { "epoch": 0.03554676106558721, "grad_norm": 1.0, "learning_rate": 0.0018273273450550025, "loss": 0.2838, "step": 20048 }, { "epoch": 0.03555030723089703, "grad_norm": 0.5703125, "learning_rate": 0.0018272922876716622, "loss": 0.3161, "step": 20050 }, { "epoch": 0.03555385339620684, "grad_norm": 2.40625, "learning_rate": 0.0018272572271075679, "loss": 0.302, "step": 20052 }, { "epoch": 0.035557399561516656, "grad_norm": 2.5625, "learning_rate": 0.001827222163362873, "loss": 0.3404, "step": 20054 }, { "epoch": 0.03556094572682648, "grad_norm": 1.1015625, "learning_rate": 0.0018271870964377305, "loss": 0.2398, "step": 20056 }, { "epoch": 0.03556449189213629, "grad_norm": 0.470703125, "learning_rate": 0.0018271520263322943, "loss": 0.1816, "step": 20058 }, { "epoch": 0.03556803805744611, "grad_norm": 2.984375, "learning_rate": 0.0018271169530467172, "loss": 0.2055, "step": 20060 }, { "epoch": 0.03557158422275592, "grad_norm": 0.5546875, "learning_rate": 0.0018270818765811535, "loss": 0.1773, "step": 20062 }, { "epoch": 0.035575130388065736, "grad_norm": 1.40625, "learning_rate": 0.0018270467969357554, "loss": 0.3101, "step": 20064 }, { "epoch": 0.03557867655337555, "grad_norm": 0.546875, "learning_rate": 0.0018270117141106772, "loss": 0.2072, "step": 20066 }, { "epoch": 0.035582222718685365, "grad_norm": 2.265625, "learning_rate": 0.0018269766281060723, "loss": 0.2914, "step": 20068 }, { "epoch": 0.03558576888399518, "grad_norm": 0.671875, "learning_rate": 0.0018269415389220935, "loss": 0.2349, "step": 20070 }, { "epoch": 0.035589315049304994, "grad_norm": 4.3125, "learning_rate": 0.0018269064465588948, "loss": 0.5016, "step": 20072 }, { "epoch": 0.03559286121461481, "grad_norm": 0.5390625, "learning_rate": 0.0018268713510166295, "loss": 0.1555, "step": 20074 }, { "epoch": 0.03559640737992462, "grad_norm": 2.078125, "learning_rate": 0.0018268362522954511, "loss": 0.3287, "step": 20076 }, { "epoch": 0.035599953545234445, "grad_norm": 0.515625, "learning_rate": 0.0018268011503955134, "loss": 0.1804, "step": 20078 }, { "epoch": 0.03560349971054426, "grad_norm": 0.5859375, "learning_rate": 0.0018267660453169694, "loss": 0.1981, "step": 20080 }, { "epoch": 0.035607045875854074, "grad_norm": 0.7265625, "learning_rate": 0.0018267309370599727, "loss": 0.2257, "step": 20082 }, { "epoch": 0.03561059204116389, "grad_norm": 0.79296875, "learning_rate": 0.0018266958256246773, "loss": 0.1887, "step": 20084 }, { "epoch": 0.0356141382064737, "grad_norm": 0.6484375, "learning_rate": 0.0018266607110112366, "loss": 0.1587, "step": 20086 }, { "epoch": 0.03561768437178352, "grad_norm": 0.796875, "learning_rate": 0.0018266255932198037, "loss": 0.2306, "step": 20088 }, { "epoch": 0.03562123053709333, "grad_norm": 0.7421875, "learning_rate": 0.0018265904722505328, "loss": 0.1625, "step": 20090 }, { "epoch": 0.035624776702403146, "grad_norm": 0.6484375, "learning_rate": 0.0018265553481035773, "loss": 0.2199, "step": 20092 }, { "epoch": 0.03562832286771296, "grad_norm": 0.482421875, "learning_rate": 0.0018265202207790904, "loss": 0.1625, "step": 20094 }, { "epoch": 0.035631869033022776, "grad_norm": 11.6875, "learning_rate": 0.001826485090277226, "loss": 0.4761, "step": 20096 }, { "epoch": 0.03563541519833259, "grad_norm": 1.015625, "learning_rate": 0.0018264499565981383, "loss": 0.2255, "step": 20098 }, { "epoch": 0.03563896136364241, "grad_norm": 0.62890625, "learning_rate": 0.0018264148197419804, "loss": 0.2157, "step": 20100 }, { "epoch": 0.035642507528952226, "grad_norm": 1.1171875, "learning_rate": 0.0018263796797089056, "loss": 0.1731, "step": 20102 }, { "epoch": 0.03564605369426204, "grad_norm": 0.5234375, "learning_rate": 0.0018263445364990682, "loss": 0.2737, "step": 20104 }, { "epoch": 0.035649599859571855, "grad_norm": 2.828125, "learning_rate": 0.0018263093901126218, "loss": 0.494, "step": 20106 }, { "epoch": 0.03565314602488167, "grad_norm": 1.390625, "learning_rate": 0.0018262742405497198, "loss": 0.1488, "step": 20108 }, { "epoch": 0.035656692190191484, "grad_norm": 1.2890625, "learning_rate": 0.0018262390878105164, "loss": 0.2315, "step": 20110 }, { "epoch": 0.0356602383555013, "grad_norm": 1.9609375, "learning_rate": 0.0018262039318951652, "loss": 0.4986, "step": 20112 }, { "epoch": 0.03566378452081111, "grad_norm": 0.71484375, "learning_rate": 0.0018261687728038194, "loss": 0.2758, "step": 20114 }, { "epoch": 0.03566733068612093, "grad_norm": 0.5234375, "learning_rate": 0.0018261336105366336, "loss": 0.1304, "step": 20116 }, { "epoch": 0.03567087685143074, "grad_norm": 1.015625, "learning_rate": 0.001826098445093761, "loss": 0.2241, "step": 20118 }, { "epoch": 0.03567442301674056, "grad_norm": 0.6171875, "learning_rate": 0.0018260632764753556, "loss": 0.2542, "step": 20120 }, { "epoch": 0.03567796918205037, "grad_norm": 1.234375, "learning_rate": 0.0018260281046815709, "loss": 0.2207, "step": 20122 }, { "epoch": 0.03568151534736019, "grad_norm": 0.80859375, "learning_rate": 0.0018259929297125613, "loss": 0.1875, "step": 20124 }, { "epoch": 0.03568506151267001, "grad_norm": 2.953125, "learning_rate": 0.0018259577515684804, "loss": 0.2535, "step": 20126 }, { "epoch": 0.03568860767797982, "grad_norm": 5.9375, "learning_rate": 0.0018259225702494818, "loss": 0.2641, "step": 20128 }, { "epoch": 0.03569215384328964, "grad_norm": 0.68359375, "learning_rate": 0.0018258873857557195, "loss": 0.2122, "step": 20130 }, { "epoch": 0.03569570000859945, "grad_norm": 0.8515625, "learning_rate": 0.0018258521980873475, "loss": 0.2815, "step": 20132 }, { "epoch": 0.035699246173909266, "grad_norm": 0.65234375, "learning_rate": 0.0018258170072445196, "loss": 0.1924, "step": 20134 }, { "epoch": 0.03570279233921908, "grad_norm": 0.6015625, "learning_rate": 0.0018257818132273898, "loss": 0.3822, "step": 20136 }, { "epoch": 0.035706338504528895, "grad_norm": 1.75, "learning_rate": 0.0018257466160361117, "loss": 0.3125, "step": 20138 }, { "epoch": 0.03570988466983871, "grad_norm": 0.5234375, "learning_rate": 0.0018257114156708398, "loss": 0.2066, "step": 20140 }, { "epoch": 0.035713430835148524, "grad_norm": 1.0390625, "learning_rate": 0.0018256762121317276, "loss": 0.197, "step": 20142 }, { "epoch": 0.03571697700045834, "grad_norm": 0.369140625, "learning_rate": 0.001825641005418929, "loss": 0.4264, "step": 20144 }, { "epoch": 0.03572052316576816, "grad_norm": 2.359375, "learning_rate": 0.0018256057955325985, "loss": 0.2233, "step": 20146 }, { "epoch": 0.035724069331077975, "grad_norm": 1.171875, "learning_rate": 0.0018255705824728892, "loss": 0.2092, "step": 20148 }, { "epoch": 0.03572761549638779, "grad_norm": 0.96484375, "learning_rate": 0.001825535366239956, "loss": 0.1493, "step": 20150 }, { "epoch": 0.035731161661697604, "grad_norm": 0.376953125, "learning_rate": 0.0018255001468339524, "loss": 0.2141, "step": 20152 }, { "epoch": 0.03573470782700742, "grad_norm": 1.234375, "learning_rate": 0.0018254649242550327, "loss": 0.3326, "step": 20154 }, { "epoch": 0.03573825399231723, "grad_norm": 0.427734375, "learning_rate": 0.001825429698503351, "loss": 0.2103, "step": 20156 }, { "epoch": 0.03574180015762705, "grad_norm": 0.5390625, "learning_rate": 0.001825394469579061, "loss": 0.1697, "step": 20158 }, { "epoch": 0.03574534632293686, "grad_norm": 0.734375, "learning_rate": 0.001825359237482317, "loss": 0.3109, "step": 20160 }, { "epoch": 0.03574889248824668, "grad_norm": 0.9375, "learning_rate": 0.0018253240022132727, "loss": 0.2036, "step": 20162 }, { "epoch": 0.03575243865355649, "grad_norm": 8.1875, "learning_rate": 0.001825288763772083, "loss": 0.237, "step": 20164 }, { "epoch": 0.035755984818866306, "grad_norm": 2.59375, "learning_rate": 0.0018252535221589012, "loss": 0.2523, "step": 20166 }, { "epoch": 0.03575953098417613, "grad_norm": 1.109375, "learning_rate": 0.0018252182773738819, "loss": 0.1467, "step": 20168 }, { "epoch": 0.03576307714948594, "grad_norm": 0.66796875, "learning_rate": 0.001825183029417179, "loss": 0.4145, "step": 20170 }, { "epoch": 0.035766623314795756, "grad_norm": 2.515625, "learning_rate": 0.001825147778288947, "loss": 0.3125, "step": 20172 }, { "epoch": 0.03577016948010557, "grad_norm": 0.80859375, "learning_rate": 0.0018251125239893398, "loss": 0.2468, "step": 20174 }, { "epoch": 0.035773715645415385, "grad_norm": 0.306640625, "learning_rate": 0.001825077266518511, "loss": 0.1606, "step": 20176 }, { "epoch": 0.0357772618107252, "grad_norm": 0.73828125, "learning_rate": 0.0018250420058766163, "loss": 0.2095, "step": 20178 }, { "epoch": 0.035780807976035015, "grad_norm": 0.75390625, "learning_rate": 0.0018250067420638087, "loss": 0.2235, "step": 20180 }, { "epoch": 0.03578435414134483, "grad_norm": 0.5859375, "learning_rate": 0.0018249714750802426, "loss": 0.2626, "step": 20182 }, { "epoch": 0.035787900306654644, "grad_norm": 0.91015625, "learning_rate": 0.0018249362049260723, "loss": 0.186, "step": 20184 }, { "epoch": 0.03579144647196446, "grad_norm": 0.296875, "learning_rate": 0.0018249009316014524, "loss": 0.1928, "step": 20186 }, { "epoch": 0.03579499263727427, "grad_norm": 0.60546875, "learning_rate": 0.001824865655106537, "loss": 0.2129, "step": 20188 }, { "epoch": 0.03579853880258409, "grad_norm": 0.8046875, "learning_rate": 0.0018248303754414801, "loss": 0.1755, "step": 20190 }, { "epoch": 0.03580208496789391, "grad_norm": 0.296875, "learning_rate": 0.0018247950926064361, "loss": 0.2808, "step": 20192 }, { "epoch": 0.03580563113320372, "grad_norm": 0.53125, "learning_rate": 0.0018247598066015594, "loss": 0.2269, "step": 20194 }, { "epoch": 0.03580917729851354, "grad_norm": 2.375, "learning_rate": 0.0018247245174270042, "loss": 0.5249, "step": 20196 }, { "epoch": 0.03581272346382335, "grad_norm": 1.3203125, "learning_rate": 0.001824689225082925, "loss": 0.2022, "step": 20198 }, { "epoch": 0.03581626962913317, "grad_norm": 0.9140625, "learning_rate": 0.0018246539295694762, "loss": 0.5507, "step": 20200 }, { "epoch": 0.03581981579444298, "grad_norm": 0.80078125, "learning_rate": 0.0018246186308868123, "loss": 0.262, "step": 20202 }, { "epoch": 0.035823361959752796, "grad_norm": 0.384765625, "learning_rate": 0.0018245833290350868, "loss": 0.1989, "step": 20204 }, { "epoch": 0.03582690812506261, "grad_norm": 0.259765625, "learning_rate": 0.001824548024014455, "loss": 0.1892, "step": 20206 }, { "epoch": 0.035830454290372425, "grad_norm": 0.275390625, "learning_rate": 0.0018245127158250712, "loss": 0.2029, "step": 20208 }, { "epoch": 0.03583400045568224, "grad_norm": 1.484375, "learning_rate": 0.0018244774044670893, "loss": 0.3161, "step": 20210 }, { "epoch": 0.035837546620992054, "grad_norm": 0.71484375, "learning_rate": 0.0018244420899406642, "loss": 0.2452, "step": 20212 }, { "epoch": 0.035841092786301876, "grad_norm": 0.75390625, "learning_rate": 0.0018244067722459501, "loss": 0.1898, "step": 20214 }, { "epoch": 0.03584463895161169, "grad_norm": 0.318359375, "learning_rate": 0.0018243714513831019, "loss": 0.1896, "step": 20216 }, { "epoch": 0.035848185116921505, "grad_norm": 1.703125, "learning_rate": 0.0018243361273522734, "loss": 0.1791, "step": 20218 }, { "epoch": 0.03585173128223132, "grad_norm": 0.9140625, "learning_rate": 0.0018243008001536194, "loss": 0.1824, "step": 20220 }, { "epoch": 0.035855277447541134, "grad_norm": 0.69140625, "learning_rate": 0.0018242654697872944, "loss": 0.2069, "step": 20222 }, { "epoch": 0.03585882361285095, "grad_norm": 1.6875, "learning_rate": 0.0018242301362534532, "loss": 0.251, "step": 20224 }, { "epoch": 0.03586236977816076, "grad_norm": 0.255859375, "learning_rate": 0.00182419479955225, "loss": 0.1677, "step": 20226 }, { "epoch": 0.03586591594347058, "grad_norm": 0.294921875, "learning_rate": 0.0018241594596838393, "loss": 0.1602, "step": 20228 }, { "epoch": 0.03586946210878039, "grad_norm": 1.2734375, "learning_rate": 0.001824124116648376, "loss": 0.1487, "step": 20230 }, { "epoch": 0.03587300827409021, "grad_norm": 1.1484375, "learning_rate": 0.0018240887704460142, "loss": 0.2511, "step": 20232 }, { "epoch": 0.03587655443940002, "grad_norm": 0.458984375, "learning_rate": 0.0018240534210769088, "loss": 0.2312, "step": 20234 }, { "epoch": 0.03588010060470984, "grad_norm": 0.52734375, "learning_rate": 0.0018240180685412143, "loss": 0.1966, "step": 20236 }, { "epoch": 0.03588364677001966, "grad_norm": 0.337890625, "learning_rate": 0.0018239827128390853, "loss": 0.2074, "step": 20238 }, { "epoch": 0.03588719293532947, "grad_norm": 2.453125, "learning_rate": 0.0018239473539706766, "loss": 0.2629, "step": 20240 }, { "epoch": 0.035890739100639286, "grad_norm": 0.51171875, "learning_rate": 0.0018239119919361425, "loss": 0.2351, "step": 20242 }, { "epoch": 0.0358942852659491, "grad_norm": 0.68359375, "learning_rate": 0.0018238766267356379, "loss": 0.2104, "step": 20244 }, { "epoch": 0.035897831431258916, "grad_norm": 0.828125, "learning_rate": 0.0018238412583693174, "loss": 0.1781, "step": 20246 }, { "epoch": 0.03590137759656873, "grad_norm": 0.361328125, "learning_rate": 0.0018238058868373357, "loss": 0.2221, "step": 20248 }, { "epoch": 0.035904923761878545, "grad_norm": 0.97265625, "learning_rate": 0.0018237705121398476, "loss": 0.2552, "step": 20250 }, { "epoch": 0.03590846992718836, "grad_norm": 0.7265625, "learning_rate": 0.0018237351342770074, "loss": 0.2013, "step": 20252 }, { "epoch": 0.035912016092498174, "grad_norm": 1.7265625, "learning_rate": 0.0018236997532489706, "loss": 0.2035, "step": 20254 }, { "epoch": 0.03591556225780799, "grad_norm": 0.640625, "learning_rate": 0.0018236643690558911, "loss": 0.1907, "step": 20256 }, { "epoch": 0.0359191084231178, "grad_norm": 0.95703125, "learning_rate": 0.0018236289816979242, "loss": 0.2263, "step": 20258 }, { "epoch": 0.035922654588427624, "grad_norm": 0.30859375, "learning_rate": 0.001823593591175224, "loss": 0.1817, "step": 20260 }, { "epoch": 0.03592620075373744, "grad_norm": 4.34375, "learning_rate": 0.0018235581974879464, "loss": 0.3063, "step": 20262 }, { "epoch": 0.035929746919047253, "grad_norm": 0.58984375, "learning_rate": 0.001823522800636245, "loss": 0.1946, "step": 20264 }, { "epoch": 0.03593329308435707, "grad_norm": 0.4921875, "learning_rate": 0.0018234874006202758, "loss": 0.1798, "step": 20266 }, { "epoch": 0.03593683924966688, "grad_norm": 0.5234375, "learning_rate": 0.0018234519974401923, "loss": 0.1849, "step": 20268 }, { "epoch": 0.0359403854149767, "grad_norm": 0.32421875, "learning_rate": 0.0018234165910961501, "loss": 0.1966, "step": 20270 }, { "epoch": 0.03594393158028651, "grad_norm": 0.953125, "learning_rate": 0.001823381181588304, "loss": 0.2543, "step": 20272 }, { "epoch": 0.035947477745596326, "grad_norm": 0.353515625, "learning_rate": 0.0018233457689168092, "loss": 0.2512, "step": 20274 }, { "epoch": 0.03595102391090614, "grad_norm": 0.22265625, "learning_rate": 0.0018233103530818197, "loss": 0.2635, "step": 20276 }, { "epoch": 0.035954570076215955, "grad_norm": 1.90625, "learning_rate": 0.001823274934083491, "loss": 0.1846, "step": 20278 }, { "epoch": 0.03595811624152577, "grad_norm": 0.6171875, "learning_rate": 0.001823239511921978, "loss": 0.2407, "step": 20280 }, { "epoch": 0.03596166240683559, "grad_norm": 0.515625, "learning_rate": 0.0018232040865974353, "loss": 0.2686, "step": 20282 }, { "epoch": 0.035965208572145406, "grad_norm": 0.75390625, "learning_rate": 0.001823168658110018, "loss": 0.2196, "step": 20284 }, { "epoch": 0.03596875473745522, "grad_norm": 0.8671875, "learning_rate": 0.0018231332264598812, "loss": 0.25, "step": 20286 }, { "epoch": 0.035972300902765035, "grad_norm": 0.8125, "learning_rate": 0.0018230977916471795, "loss": 0.2642, "step": 20288 }, { "epoch": 0.03597584706807485, "grad_norm": 0.93359375, "learning_rate": 0.0018230623536720683, "loss": 0.4423, "step": 20290 }, { "epoch": 0.035979393233384664, "grad_norm": 0.294921875, "learning_rate": 0.0018230269125347023, "loss": 0.1808, "step": 20292 }, { "epoch": 0.03598293939869448, "grad_norm": 2.109375, "learning_rate": 0.0018229914682352365, "loss": 0.3624, "step": 20294 }, { "epoch": 0.03598648556400429, "grad_norm": 0.3203125, "learning_rate": 0.0018229560207738264, "loss": 0.1932, "step": 20296 }, { "epoch": 0.03599003172931411, "grad_norm": 0.490234375, "learning_rate": 0.0018229205701506262, "loss": 0.4172, "step": 20298 }, { "epoch": 0.03599357789462392, "grad_norm": 0.48046875, "learning_rate": 0.0018228851163657911, "loss": 0.1616, "step": 20300 }, { "epoch": 0.03599712405993374, "grad_norm": 0.2255859375, "learning_rate": 0.0018228496594194768, "loss": 0.3895, "step": 20302 }, { "epoch": 0.03600067022524356, "grad_norm": 0.2734375, "learning_rate": 0.001822814199311838, "loss": 0.195, "step": 20304 }, { "epoch": 0.03600421639055337, "grad_norm": 1.875, "learning_rate": 0.0018227787360430294, "loss": 0.3931, "step": 20306 }, { "epoch": 0.03600776255586319, "grad_norm": 0.80078125, "learning_rate": 0.001822743269613207, "loss": 0.2296, "step": 20308 }, { "epoch": 0.036011308721173, "grad_norm": 0.5, "learning_rate": 0.0018227078000225248, "loss": 0.2144, "step": 20310 }, { "epoch": 0.03601485488648282, "grad_norm": 0.31640625, "learning_rate": 0.0018226723272711388, "loss": 0.2374, "step": 20312 }, { "epoch": 0.03601840105179263, "grad_norm": 1.5703125, "learning_rate": 0.0018226368513592037, "loss": 0.1881, "step": 20314 }, { "epoch": 0.036021947217102446, "grad_norm": 0.859375, "learning_rate": 0.0018226013722868748, "loss": 0.2212, "step": 20316 }, { "epoch": 0.03602549338241226, "grad_norm": 0.50390625, "learning_rate": 0.0018225658900543073, "loss": 0.5096, "step": 20318 }, { "epoch": 0.036029039547722075, "grad_norm": 0.6328125, "learning_rate": 0.0018225304046616561, "loss": 0.2779, "step": 20320 }, { "epoch": 0.03603258571303189, "grad_norm": 0.54296875, "learning_rate": 0.0018224949161090769, "loss": 0.2251, "step": 20322 }, { "epoch": 0.036036131878341704, "grad_norm": 0.416015625, "learning_rate": 0.0018224594243967246, "loss": 0.1936, "step": 20324 }, { "epoch": 0.03603967804365152, "grad_norm": 0.9453125, "learning_rate": 0.0018224239295247541, "loss": 0.2438, "step": 20326 }, { "epoch": 0.03604322420896134, "grad_norm": 0.58203125, "learning_rate": 0.0018223884314933212, "loss": 0.2021, "step": 20328 }, { "epoch": 0.036046770374271155, "grad_norm": 0.58203125, "learning_rate": 0.001822352930302581, "loss": 0.1562, "step": 20330 }, { "epoch": 0.03605031653958097, "grad_norm": 0.47265625, "learning_rate": 0.0018223174259526886, "loss": 0.338, "step": 20332 }, { "epoch": 0.036053862704890784, "grad_norm": 0.5078125, "learning_rate": 0.0018222819184437993, "loss": 0.1898, "step": 20334 }, { "epoch": 0.0360574088702006, "grad_norm": 1.5546875, "learning_rate": 0.0018222464077760684, "loss": 0.2265, "step": 20336 }, { "epoch": 0.03606095503551041, "grad_norm": 0.6484375, "learning_rate": 0.0018222108939496514, "loss": 0.252, "step": 20338 }, { "epoch": 0.03606450120082023, "grad_norm": 4.1875, "learning_rate": 0.001822175376964703, "loss": 0.194, "step": 20340 }, { "epoch": 0.03606804736613004, "grad_norm": 0.53515625, "learning_rate": 0.0018221398568213797, "loss": 0.2426, "step": 20342 }, { "epoch": 0.036071593531439856, "grad_norm": 0.2294921875, "learning_rate": 0.0018221043335198356, "loss": 0.2203, "step": 20344 }, { "epoch": 0.03607513969674967, "grad_norm": 0.318359375, "learning_rate": 0.0018220688070602268, "loss": 0.222, "step": 20346 }, { "epoch": 0.036078685862059486, "grad_norm": 1.28125, "learning_rate": 0.0018220332774427084, "loss": 0.5303, "step": 20348 }, { "epoch": 0.03608223202736931, "grad_norm": 0.546875, "learning_rate": 0.001821997744667436, "loss": 0.1951, "step": 20350 }, { "epoch": 0.03608577819267912, "grad_norm": 1.015625, "learning_rate": 0.001821962208734565, "loss": 0.2364, "step": 20352 }, { "epoch": 0.036089324357988936, "grad_norm": 1.0625, "learning_rate": 0.0018219266696442502, "loss": 0.2128, "step": 20354 }, { "epoch": 0.03609287052329875, "grad_norm": 0.4375, "learning_rate": 0.0018218911273966474, "loss": 0.1906, "step": 20356 }, { "epoch": 0.036096416688608565, "grad_norm": 0.69921875, "learning_rate": 0.0018218555819919126, "loss": 0.2462, "step": 20358 }, { "epoch": 0.03609996285391838, "grad_norm": 0.3671875, "learning_rate": 0.0018218200334302007, "loss": 0.1718, "step": 20360 }, { "epoch": 0.036103509019228194, "grad_norm": 0.279296875, "learning_rate": 0.0018217844817116672, "loss": 0.2305, "step": 20362 }, { "epoch": 0.03610705518453801, "grad_norm": 0.7578125, "learning_rate": 0.0018217489268364676, "loss": 0.1791, "step": 20364 }, { "epoch": 0.03611060134984782, "grad_norm": 0.255859375, "learning_rate": 0.0018217133688047573, "loss": 0.2277, "step": 20366 }, { "epoch": 0.03611414751515764, "grad_norm": 1.1171875, "learning_rate": 0.0018216778076166921, "loss": 0.2123, "step": 20368 }, { "epoch": 0.03611769368046745, "grad_norm": 0.5078125, "learning_rate": 0.0018216422432724274, "loss": 0.2252, "step": 20370 }, { "epoch": 0.036121239845777274, "grad_norm": 0.373046875, "learning_rate": 0.0018216066757721185, "loss": 0.1478, "step": 20372 }, { "epoch": 0.03612478601108709, "grad_norm": 0.734375, "learning_rate": 0.0018215711051159213, "loss": 0.2125, "step": 20374 }, { "epoch": 0.0361283321763969, "grad_norm": 0.6875, "learning_rate": 0.0018215355313039917, "loss": 0.2409, "step": 20376 }, { "epoch": 0.03613187834170672, "grad_norm": 0.4296875, "learning_rate": 0.001821499954336484, "loss": 0.2482, "step": 20378 }, { "epoch": 0.03613542450701653, "grad_norm": 0.68359375, "learning_rate": 0.001821464374213555, "loss": 0.2355, "step": 20380 }, { "epoch": 0.03613897067232635, "grad_norm": 0.375, "learning_rate": 0.00182142879093536, "loss": 0.2166, "step": 20382 }, { "epoch": 0.03614251683763616, "grad_norm": 0.3828125, "learning_rate": 0.0018213932045020543, "loss": 0.2441, "step": 20384 }, { "epoch": 0.036146063002945976, "grad_norm": 0.42578125, "learning_rate": 0.0018213576149137937, "loss": 0.1813, "step": 20386 }, { "epoch": 0.03614960916825579, "grad_norm": 0.4609375, "learning_rate": 0.0018213220221707342, "loss": 0.1864, "step": 20388 }, { "epoch": 0.036153155333565605, "grad_norm": 0.55078125, "learning_rate": 0.0018212864262730308, "loss": 0.1749, "step": 20390 }, { "epoch": 0.03615670149887542, "grad_norm": 0.24609375, "learning_rate": 0.00182125082722084, "loss": 0.3499, "step": 20392 }, { "epoch": 0.036160247664185234, "grad_norm": 0.7421875, "learning_rate": 0.0018212152250143166, "loss": 0.2385, "step": 20394 }, { "epoch": 0.036163793829495056, "grad_norm": 1.1328125, "learning_rate": 0.0018211796196536173, "loss": 0.2278, "step": 20396 }, { "epoch": 0.03616733999480487, "grad_norm": 0.359375, "learning_rate": 0.001821144011138897, "loss": 0.187, "step": 20398 }, { "epoch": 0.036170886160114685, "grad_norm": 0.412109375, "learning_rate": 0.0018211083994703113, "loss": 0.1811, "step": 20400 }, { "epoch": 0.0361744323254245, "grad_norm": 0.86328125, "learning_rate": 0.0018210727846480167, "loss": 0.1686, "step": 20402 }, { "epoch": 0.036177978490734314, "grad_norm": 0.91796875, "learning_rate": 0.0018210371666721684, "loss": 0.1866, "step": 20404 }, { "epoch": 0.03618152465604413, "grad_norm": 0.451171875, "learning_rate": 0.0018210015455429225, "loss": 0.1801, "step": 20406 }, { "epoch": 0.03618507082135394, "grad_norm": 0.251953125, "learning_rate": 0.0018209659212604346, "loss": 0.2136, "step": 20408 }, { "epoch": 0.03618861698666376, "grad_norm": 0.5078125, "learning_rate": 0.0018209302938248604, "loss": 0.1821, "step": 20410 }, { "epoch": 0.03619216315197357, "grad_norm": 1.65625, "learning_rate": 0.0018208946632363558, "loss": 0.3578, "step": 20412 }, { "epoch": 0.03619570931728339, "grad_norm": 0.39453125, "learning_rate": 0.0018208590294950771, "loss": 0.1913, "step": 20414 }, { "epoch": 0.0361992554825932, "grad_norm": 0.61328125, "learning_rate": 0.0018208233926011797, "loss": 0.2514, "step": 20416 }, { "epoch": 0.03620280164790302, "grad_norm": 0.2490234375, "learning_rate": 0.0018207877525548192, "loss": 0.2053, "step": 20418 }, { "epoch": 0.03620634781321284, "grad_norm": 0.388671875, "learning_rate": 0.0018207521093561519, "loss": 0.158, "step": 20420 }, { "epoch": 0.03620989397852265, "grad_norm": 1.125, "learning_rate": 0.0018207164630053335, "loss": 0.3697, "step": 20422 }, { "epoch": 0.036213440143832466, "grad_norm": 0.283203125, "learning_rate": 0.0018206808135025202, "loss": 0.1715, "step": 20424 }, { "epoch": 0.03621698630914228, "grad_norm": 0.6171875, "learning_rate": 0.0018206451608478672, "loss": 0.1934, "step": 20426 }, { "epoch": 0.036220532474452095, "grad_norm": 1.03125, "learning_rate": 0.0018206095050415307, "loss": 0.1837, "step": 20428 }, { "epoch": 0.03622407863976191, "grad_norm": 0.2109375, "learning_rate": 0.0018205738460836673, "loss": 0.1285, "step": 20430 }, { "epoch": 0.036227624805071725, "grad_norm": 0.265625, "learning_rate": 0.0018205381839744323, "loss": 0.2553, "step": 20432 }, { "epoch": 0.03623117097038154, "grad_norm": 2.421875, "learning_rate": 0.0018205025187139818, "loss": 0.5537, "step": 20434 }, { "epoch": 0.036234717135691354, "grad_norm": 0.20703125, "learning_rate": 0.0018204668503024718, "loss": 0.1745, "step": 20436 }, { "epoch": 0.03623826330100117, "grad_norm": 0.82421875, "learning_rate": 0.0018204311787400587, "loss": 0.1451, "step": 20438 }, { "epoch": 0.03624180946631099, "grad_norm": 0.39453125, "learning_rate": 0.0018203955040268975, "loss": 0.2201, "step": 20440 }, { "epoch": 0.036245355631620804, "grad_norm": 0.40625, "learning_rate": 0.001820359826163145, "loss": 0.1715, "step": 20442 }, { "epoch": 0.03624890179693062, "grad_norm": 0.369140625, "learning_rate": 0.0018203241451489572, "loss": 0.1692, "step": 20444 }, { "epoch": 0.03625244796224043, "grad_norm": 0.7890625, "learning_rate": 0.00182028846098449, "loss": 0.1986, "step": 20446 }, { "epoch": 0.03625599412755025, "grad_norm": 0.83203125, "learning_rate": 0.0018202527736698993, "loss": 0.2816, "step": 20448 }, { "epoch": 0.03625954029286006, "grad_norm": 0.5078125, "learning_rate": 0.0018202170832053413, "loss": 0.1991, "step": 20450 }, { "epoch": 0.03626308645816988, "grad_norm": 0.54296875, "learning_rate": 0.0018201813895909723, "loss": 0.2758, "step": 20452 }, { "epoch": 0.03626663262347969, "grad_norm": 0.2294921875, "learning_rate": 0.0018201456928269482, "loss": 0.1686, "step": 20454 }, { "epoch": 0.036270178788789506, "grad_norm": 1.25, "learning_rate": 0.001820109992913425, "loss": 0.3832, "step": 20456 }, { "epoch": 0.03627372495409932, "grad_norm": 0.349609375, "learning_rate": 0.0018200742898505592, "loss": 0.2363, "step": 20458 }, { "epoch": 0.036277271119409135, "grad_norm": 0.431640625, "learning_rate": 0.0018200385836385066, "loss": 0.172, "step": 20460 }, { "epoch": 0.03628081728471895, "grad_norm": 0.251953125, "learning_rate": 0.0018200028742774235, "loss": 0.1854, "step": 20462 }, { "epoch": 0.03628436345002877, "grad_norm": 0.294921875, "learning_rate": 0.001819967161767466, "loss": 0.4978, "step": 20464 }, { "epoch": 0.036287909615338586, "grad_norm": 0.326171875, "learning_rate": 0.0018199314461087906, "loss": 0.1836, "step": 20466 }, { "epoch": 0.0362914557806484, "grad_norm": 0.423828125, "learning_rate": 0.0018198957273015532, "loss": 0.2348, "step": 20468 }, { "epoch": 0.036295001945958215, "grad_norm": 0.326171875, "learning_rate": 0.0018198600053459097, "loss": 0.2287, "step": 20470 }, { "epoch": 0.03629854811126803, "grad_norm": 0.73828125, "learning_rate": 0.0018198242802420167, "loss": 0.2544, "step": 20472 }, { "epoch": 0.036302094276577844, "grad_norm": 0.46875, "learning_rate": 0.0018197885519900306, "loss": 0.2776, "step": 20474 }, { "epoch": 0.03630564044188766, "grad_norm": 0.349609375, "learning_rate": 0.0018197528205901078, "loss": 0.1897, "step": 20476 }, { "epoch": 0.03630918660719747, "grad_norm": 0.431640625, "learning_rate": 0.0018197170860424037, "loss": 0.2406, "step": 20478 }, { "epoch": 0.03631273277250729, "grad_norm": 0.2138671875, "learning_rate": 0.0018196813483470752, "loss": 0.1711, "step": 20480 }, { "epoch": 0.0363162789378171, "grad_norm": 0.3828125, "learning_rate": 0.0018196456075042788, "loss": 0.3583, "step": 20482 }, { "epoch": 0.03631982510312692, "grad_norm": 0.447265625, "learning_rate": 0.0018196098635141706, "loss": 0.2374, "step": 20484 }, { "epoch": 0.03632337126843674, "grad_norm": 0.37109375, "learning_rate": 0.0018195741163769064, "loss": 0.2568, "step": 20486 }, { "epoch": 0.03632691743374655, "grad_norm": 0.7734375, "learning_rate": 0.0018195383660926435, "loss": 0.2657, "step": 20488 }, { "epoch": 0.03633046359905637, "grad_norm": 0.4453125, "learning_rate": 0.0018195026126615374, "loss": 0.2017, "step": 20490 }, { "epoch": 0.03633400976436618, "grad_norm": 0.625, "learning_rate": 0.0018194668560837447, "loss": 0.1823, "step": 20492 }, { "epoch": 0.036337555929675996, "grad_norm": 0.419921875, "learning_rate": 0.0018194310963594222, "loss": 0.3033, "step": 20494 }, { "epoch": 0.03634110209498581, "grad_norm": 16.25, "learning_rate": 0.001819395333488726, "loss": 0.3811, "step": 20496 }, { "epoch": 0.036344648260295626, "grad_norm": 0.6640625, "learning_rate": 0.0018193595674718124, "loss": 0.2094, "step": 20498 }, { "epoch": 0.03634819442560544, "grad_norm": 0.458984375, "learning_rate": 0.0018193237983088377, "loss": 0.2555, "step": 20500 }, { "epoch": 0.036351740590915255, "grad_norm": 0.423828125, "learning_rate": 0.0018192880259999588, "loss": 0.2965, "step": 20502 }, { "epoch": 0.03635528675622507, "grad_norm": 2.390625, "learning_rate": 0.001819252250545332, "loss": 0.3489, "step": 20504 }, { "epoch": 0.036358832921534884, "grad_norm": 1.015625, "learning_rate": 0.0018192164719451134, "loss": 0.2912, "step": 20506 }, { "epoch": 0.036362379086844705, "grad_norm": 0.40625, "learning_rate": 0.0018191806901994595, "loss": 0.1569, "step": 20508 }, { "epoch": 0.03636592525215452, "grad_norm": 0.8203125, "learning_rate": 0.0018191449053085273, "loss": 0.2023, "step": 20510 }, { "epoch": 0.036369471417464334, "grad_norm": 0.251953125, "learning_rate": 0.001819109117272473, "loss": 0.1676, "step": 20512 }, { "epoch": 0.03637301758277415, "grad_norm": 0.5078125, "learning_rate": 0.0018190733260914531, "loss": 0.1503, "step": 20514 }, { "epoch": 0.036376563748083963, "grad_norm": 2.46875, "learning_rate": 0.0018190375317656243, "loss": 0.315, "step": 20516 }, { "epoch": 0.03638010991339378, "grad_norm": 0.33984375, "learning_rate": 0.001819001734295143, "loss": 0.1813, "step": 20518 }, { "epoch": 0.03638365607870359, "grad_norm": 0.283203125, "learning_rate": 0.0018189659336801654, "loss": 0.1341, "step": 20520 }, { "epoch": 0.03638720224401341, "grad_norm": 0.6796875, "learning_rate": 0.0018189301299208487, "loss": 0.2152, "step": 20522 }, { "epoch": 0.03639074840932322, "grad_norm": 0.5234375, "learning_rate": 0.0018188943230173493, "loss": 0.1736, "step": 20524 }, { "epoch": 0.036394294574633036, "grad_norm": 0.578125, "learning_rate": 0.0018188585129698233, "loss": 0.2245, "step": 20526 }, { "epoch": 0.03639784073994285, "grad_norm": 0.498046875, "learning_rate": 0.0018188226997784282, "loss": 0.2311, "step": 20528 }, { "epoch": 0.036401386905252665, "grad_norm": 0.361328125, "learning_rate": 0.0018187868834433202, "loss": 0.1871, "step": 20530 }, { "epoch": 0.03640493307056249, "grad_norm": 0.341796875, "learning_rate": 0.0018187510639646556, "loss": 0.2337, "step": 20532 }, { "epoch": 0.0364084792358723, "grad_norm": 0.41796875, "learning_rate": 0.0018187152413425914, "loss": 0.2826, "step": 20534 }, { "epoch": 0.036412025401182116, "grad_norm": 1.3203125, "learning_rate": 0.001818679415577284, "loss": 0.3556, "step": 20536 }, { "epoch": 0.03641557156649193, "grad_norm": 24.875, "learning_rate": 0.0018186435866688908, "loss": 0.2328, "step": 20538 }, { "epoch": 0.036419117731801745, "grad_norm": 0.6484375, "learning_rate": 0.0018186077546175677, "loss": 0.1957, "step": 20540 }, { "epoch": 0.03642266389711156, "grad_norm": 14.4375, "learning_rate": 0.0018185719194234715, "loss": 0.3737, "step": 20542 }, { "epoch": 0.036426210062421374, "grad_norm": 0.80078125, "learning_rate": 0.0018185360810867594, "loss": 0.2355, "step": 20544 }, { "epoch": 0.03642975622773119, "grad_norm": 1.0234375, "learning_rate": 0.001818500239607588, "loss": 0.1879, "step": 20546 }, { "epoch": 0.036433302393041, "grad_norm": 3.609375, "learning_rate": 0.0018184643949861138, "loss": 0.3967, "step": 20548 }, { "epoch": 0.03643684855835082, "grad_norm": 0.67578125, "learning_rate": 0.0018184285472224932, "loss": 0.2691, "step": 20550 }, { "epoch": 0.03644039472366063, "grad_norm": 0.73828125, "learning_rate": 0.001818392696316884, "loss": 0.1964, "step": 20552 }, { "epoch": 0.036443940888970454, "grad_norm": 0.470703125, "learning_rate": 0.0018183568422694423, "loss": 0.279, "step": 20554 }, { "epoch": 0.03644748705428027, "grad_norm": 0.408203125, "learning_rate": 0.001818320985080325, "loss": 0.1756, "step": 20556 }, { "epoch": 0.03645103321959008, "grad_norm": 0.51953125, "learning_rate": 0.0018182851247496889, "loss": 0.5166, "step": 20558 }, { "epoch": 0.0364545793848999, "grad_norm": 0.81640625, "learning_rate": 0.0018182492612776912, "loss": 0.3984, "step": 20560 }, { "epoch": 0.03645812555020971, "grad_norm": 0.55859375, "learning_rate": 0.001818213394664488, "loss": 0.1721, "step": 20562 }, { "epoch": 0.03646167171551953, "grad_norm": 5.46875, "learning_rate": 0.0018181775249102368, "loss": 0.3314, "step": 20564 }, { "epoch": 0.03646521788082934, "grad_norm": 0.2451171875, "learning_rate": 0.0018181416520150944, "loss": 0.2224, "step": 20566 }, { "epoch": 0.036468764046139156, "grad_norm": 0.6953125, "learning_rate": 0.0018181057759792173, "loss": 0.179, "step": 20568 }, { "epoch": 0.03647231021144897, "grad_norm": 0.78515625, "learning_rate": 0.001818069896802763, "loss": 0.3013, "step": 20570 }, { "epoch": 0.036475856376758785, "grad_norm": 0.36328125, "learning_rate": 0.0018180340144858876, "loss": 0.2424, "step": 20572 }, { "epoch": 0.0364794025420686, "grad_norm": 0.4140625, "learning_rate": 0.0018179981290287488, "loss": 0.1755, "step": 20574 }, { "epoch": 0.03648294870737842, "grad_norm": 1.4765625, "learning_rate": 0.0018179622404315033, "loss": 0.2172, "step": 20576 }, { "epoch": 0.036486494872688235, "grad_norm": 0.59765625, "learning_rate": 0.001817926348694308, "loss": 0.203, "step": 20578 }, { "epoch": 0.03649004103799805, "grad_norm": 4.3125, "learning_rate": 0.0018178904538173198, "loss": 0.4282, "step": 20580 }, { "epoch": 0.036493587203307865, "grad_norm": 0.5546875, "learning_rate": 0.0018178545558006957, "loss": 0.4245, "step": 20582 }, { "epoch": 0.03649713336861768, "grad_norm": 0.83984375, "learning_rate": 0.0018178186546445928, "loss": 0.2519, "step": 20584 }, { "epoch": 0.036500679533927494, "grad_norm": 0.7890625, "learning_rate": 0.0018177827503491682, "loss": 0.2118, "step": 20586 }, { "epoch": 0.03650422569923731, "grad_norm": 0.67578125, "learning_rate": 0.0018177468429145785, "loss": 0.36, "step": 20588 }, { "epoch": 0.03650777186454712, "grad_norm": 0.90234375, "learning_rate": 0.001817710932340981, "loss": 0.1937, "step": 20590 }, { "epoch": 0.03651131802985694, "grad_norm": 0.251953125, "learning_rate": 0.0018176750186285331, "loss": 0.2424, "step": 20592 }, { "epoch": 0.03651486419516675, "grad_norm": 0.345703125, "learning_rate": 0.0018176391017773912, "loss": 0.169, "step": 20594 }, { "epoch": 0.036518410360476566, "grad_norm": 0.81640625, "learning_rate": 0.001817603181787713, "loss": 0.2774, "step": 20596 }, { "epoch": 0.03652195652578638, "grad_norm": 0.26171875, "learning_rate": 0.0018175672586596553, "loss": 0.2406, "step": 20598 }, { "epoch": 0.0365255026910962, "grad_norm": 0.71875, "learning_rate": 0.0018175313323933752, "loss": 0.1676, "step": 20600 }, { "epoch": 0.03652904885640602, "grad_norm": 0.2236328125, "learning_rate": 0.0018174954029890299, "loss": 0.1873, "step": 20602 }, { "epoch": 0.03653259502171583, "grad_norm": 1.4453125, "learning_rate": 0.0018174594704467764, "loss": 0.1998, "step": 20604 }, { "epoch": 0.036536141187025646, "grad_norm": 1.3046875, "learning_rate": 0.0018174235347667717, "loss": 0.3718, "step": 20606 }, { "epoch": 0.03653968735233546, "grad_norm": 0.91015625, "learning_rate": 0.0018173875959491734, "loss": 0.2011, "step": 20608 }, { "epoch": 0.036543233517645275, "grad_norm": 0.421875, "learning_rate": 0.0018173516539941386, "loss": 0.2226, "step": 20610 }, { "epoch": 0.03654677968295509, "grad_norm": 0.349609375, "learning_rate": 0.0018173157089018243, "loss": 0.147, "step": 20612 }, { "epoch": 0.036550325848264904, "grad_norm": 0.271484375, "learning_rate": 0.0018172797606723875, "loss": 0.2609, "step": 20614 }, { "epoch": 0.03655387201357472, "grad_norm": 0.4140625, "learning_rate": 0.001817243809305986, "loss": 0.2488, "step": 20616 }, { "epoch": 0.03655741817888453, "grad_norm": 0.28125, "learning_rate": 0.0018172078548027767, "loss": 0.2065, "step": 20618 }, { "epoch": 0.03656096434419435, "grad_norm": 0.37109375, "learning_rate": 0.0018171718971629165, "loss": 0.2163, "step": 20620 }, { "epoch": 0.03656451050950417, "grad_norm": 0.58203125, "learning_rate": 0.0018171359363865632, "loss": 0.3091, "step": 20622 }, { "epoch": 0.036568056674813984, "grad_norm": 0.5078125, "learning_rate": 0.001817099972473874, "loss": 0.3216, "step": 20624 }, { "epoch": 0.0365716028401238, "grad_norm": 0.375, "learning_rate": 0.0018170640054250057, "loss": 0.2159, "step": 20626 }, { "epoch": 0.03657514900543361, "grad_norm": 0.359375, "learning_rate": 0.0018170280352401162, "loss": 0.1962, "step": 20628 }, { "epoch": 0.03657869517074343, "grad_norm": 0.69921875, "learning_rate": 0.0018169920619193626, "loss": 0.1887, "step": 20630 }, { "epoch": 0.03658224133605324, "grad_norm": 0.490234375, "learning_rate": 0.0018169560854629024, "loss": 0.2226, "step": 20632 }, { "epoch": 0.03658578750136306, "grad_norm": 0.494140625, "learning_rate": 0.0018169201058708922, "loss": 0.1607, "step": 20634 }, { "epoch": 0.03658933366667287, "grad_norm": 0.52734375, "learning_rate": 0.0018168841231434904, "loss": 0.2607, "step": 20636 }, { "epoch": 0.036592879831982686, "grad_norm": 0.2578125, "learning_rate": 0.0018168481372808534, "loss": 0.2244, "step": 20638 }, { "epoch": 0.0365964259972925, "grad_norm": 0.921875, "learning_rate": 0.0018168121482831394, "loss": 0.2954, "step": 20640 }, { "epoch": 0.036599972162602315, "grad_norm": 0.296875, "learning_rate": 0.0018167761561505055, "loss": 0.2097, "step": 20642 }, { "epoch": 0.036603518327912136, "grad_norm": 0.453125, "learning_rate": 0.001816740160883109, "loss": 0.175, "step": 20644 }, { "epoch": 0.03660706449322195, "grad_norm": 0.45703125, "learning_rate": 0.0018167041624811068, "loss": 0.2198, "step": 20646 }, { "epoch": 0.036610610658531766, "grad_norm": 1.0390625, "learning_rate": 0.0018166681609446576, "loss": 0.521, "step": 20648 }, { "epoch": 0.03661415682384158, "grad_norm": 0.265625, "learning_rate": 0.0018166321562739182, "loss": 0.1773, "step": 20650 }, { "epoch": 0.036617702989151395, "grad_norm": 0.54296875, "learning_rate": 0.0018165961484690454, "loss": 0.1919, "step": 20652 }, { "epoch": 0.03662124915446121, "grad_norm": 0.7109375, "learning_rate": 0.0018165601375301976, "loss": 0.1674, "step": 20654 }, { "epoch": 0.036624795319771024, "grad_norm": 0.39453125, "learning_rate": 0.001816524123457532, "loss": 0.2623, "step": 20656 }, { "epoch": 0.03662834148508084, "grad_norm": 2.484375, "learning_rate": 0.0018164881062512062, "loss": 0.2362, "step": 20658 }, { "epoch": 0.03663188765039065, "grad_norm": 1.4140625, "learning_rate": 0.0018164520859113775, "loss": 0.3246, "step": 20660 }, { "epoch": 0.03663543381570047, "grad_norm": 0.375, "learning_rate": 0.0018164160624382039, "loss": 0.1974, "step": 20662 }, { "epoch": 0.03663897998101028, "grad_norm": 0.6484375, "learning_rate": 0.001816380035831842, "loss": 0.19, "step": 20664 }, { "epoch": 0.0366425261463201, "grad_norm": 0.83984375, "learning_rate": 0.00181634400609245, "loss": 0.3763, "step": 20666 }, { "epoch": 0.03664607231162992, "grad_norm": 0.67578125, "learning_rate": 0.0018163079732201857, "loss": 0.1669, "step": 20668 }, { "epoch": 0.03664961847693973, "grad_norm": 0.2490234375, "learning_rate": 0.0018162719372152066, "loss": 0.1944, "step": 20670 }, { "epoch": 0.03665316464224955, "grad_norm": 0.34375, "learning_rate": 0.0018162358980776696, "loss": 0.1894, "step": 20672 }, { "epoch": 0.03665671080755936, "grad_norm": 1.8203125, "learning_rate": 0.001816199855807733, "loss": 0.1825, "step": 20674 }, { "epoch": 0.036660256972869176, "grad_norm": 0.345703125, "learning_rate": 0.0018161638104055545, "loss": 0.3974, "step": 20676 }, { "epoch": 0.03666380313817899, "grad_norm": 0.546875, "learning_rate": 0.0018161277618712912, "loss": 0.2757, "step": 20678 }, { "epoch": 0.036667349303488805, "grad_norm": 0.515625, "learning_rate": 0.0018160917102051008, "loss": 0.1951, "step": 20680 }, { "epoch": 0.03667089546879862, "grad_norm": 0.4140625, "learning_rate": 0.0018160556554071418, "loss": 0.2306, "step": 20682 }, { "epoch": 0.036674441634108434, "grad_norm": 1.15625, "learning_rate": 0.0018160195974775712, "loss": 0.283, "step": 20684 }, { "epoch": 0.03667798779941825, "grad_norm": 0.396484375, "learning_rate": 0.0018159835364165466, "loss": 0.1744, "step": 20686 }, { "epoch": 0.036681533964728064, "grad_norm": 0.92578125, "learning_rate": 0.0018159474722242257, "loss": 0.2245, "step": 20688 }, { "epoch": 0.036685080130037885, "grad_norm": 0.490234375, "learning_rate": 0.0018159114049007667, "loss": 0.2463, "step": 20690 }, { "epoch": 0.0366886262953477, "grad_norm": 0.69921875, "learning_rate": 0.001815875334446327, "loss": 0.2343, "step": 20692 }, { "epoch": 0.036692172460657514, "grad_norm": 0.55078125, "learning_rate": 0.0018158392608610646, "loss": 0.2155, "step": 20694 }, { "epoch": 0.03669571862596733, "grad_norm": 0.91796875, "learning_rate": 0.001815803184145137, "loss": 0.2953, "step": 20696 }, { "epoch": 0.03669926479127714, "grad_norm": 1.390625, "learning_rate": 0.001815767104298702, "loss": 0.3375, "step": 20698 }, { "epoch": 0.03670281095658696, "grad_norm": 2.109375, "learning_rate": 0.0018157310213219174, "loss": 0.2837, "step": 20700 }, { "epoch": 0.03670635712189677, "grad_norm": 0.302734375, "learning_rate": 0.0018156949352149412, "loss": 0.1909, "step": 20702 }, { "epoch": 0.03670990328720659, "grad_norm": 1.921875, "learning_rate": 0.0018156588459779309, "loss": 0.2606, "step": 20704 }, { "epoch": 0.0367134494525164, "grad_norm": 0.357421875, "learning_rate": 0.0018156227536110447, "loss": 0.2155, "step": 20706 }, { "epoch": 0.036716995617826216, "grad_norm": 0.349609375, "learning_rate": 0.00181558665811444, "loss": 0.1708, "step": 20708 }, { "epoch": 0.03672054178313603, "grad_norm": 0.271484375, "learning_rate": 0.0018155505594882753, "loss": 0.3175, "step": 20710 }, { "epoch": 0.03672408794844585, "grad_norm": 1.0078125, "learning_rate": 0.001815514457732708, "loss": 0.2446, "step": 20712 }, { "epoch": 0.03672763411375567, "grad_norm": 0.494140625, "learning_rate": 0.0018154783528478958, "loss": 0.1739, "step": 20714 }, { "epoch": 0.03673118027906548, "grad_norm": 0.484375, "learning_rate": 0.0018154422448339968, "loss": 0.1448, "step": 20716 }, { "epoch": 0.036734726444375296, "grad_norm": 0.4375, "learning_rate": 0.0018154061336911693, "loss": 0.4107, "step": 20718 }, { "epoch": 0.03673827260968511, "grad_norm": 0.328125, "learning_rate": 0.0018153700194195707, "loss": 0.1966, "step": 20720 }, { "epoch": 0.036741818774994925, "grad_norm": 0.3515625, "learning_rate": 0.0018153339020193597, "loss": 0.1899, "step": 20722 }, { "epoch": 0.03674536494030474, "grad_norm": 0.7109375, "learning_rate": 0.0018152977814906933, "loss": 0.2027, "step": 20724 }, { "epoch": 0.036748911105614554, "grad_norm": 1.3671875, "learning_rate": 0.00181526165783373, "loss": 0.3423, "step": 20726 }, { "epoch": 0.03675245727092437, "grad_norm": 0.64453125, "learning_rate": 0.0018152255310486276, "loss": 0.2286, "step": 20728 }, { "epoch": 0.03675600343623418, "grad_norm": 1.09375, "learning_rate": 0.001815189401135544, "loss": 0.2784, "step": 20730 }, { "epoch": 0.036759549601544, "grad_norm": 0.26953125, "learning_rate": 0.001815153268094638, "loss": 0.1614, "step": 20732 }, { "epoch": 0.03676309576685381, "grad_norm": 0.6484375, "learning_rate": 0.0018151171319260664, "loss": 0.2384, "step": 20734 }, { "epoch": 0.036766641932163634, "grad_norm": 0.490234375, "learning_rate": 0.0018150809926299884, "loss": 0.2256, "step": 20736 }, { "epoch": 0.03677018809747345, "grad_norm": 0.73828125, "learning_rate": 0.0018150448502065608, "loss": 0.2084, "step": 20738 }, { "epoch": 0.03677373426278326, "grad_norm": 0.546875, "learning_rate": 0.0018150087046559427, "loss": 0.2435, "step": 20740 }, { "epoch": 0.03677728042809308, "grad_norm": 1.0546875, "learning_rate": 0.001814972555978292, "loss": 0.2042, "step": 20742 }, { "epoch": 0.03678082659340289, "grad_norm": 0.30859375, "learning_rate": 0.0018149364041737668, "loss": 0.2081, "step": 20744 }, { "epoch": 0.036784372758712706, "grad_norm": 0.64453125, "learning_rate": 0.0018149002492425246, "loss": 0.2705, "step": 20746 }, { "epoch": 0.03678791892402252, "grad_norm": 0.7265625, "learning_rate": 0.001814864091184724, "loss": 0.2427, "step": 20748 }, { "epoch": 0.036791465089332336, "grad_norm": 0.6640625, "learning_rate": 0.0018148279300005234, "loss": 0.2168, "step": 20750 }, { "epoch": 0.03679501125464215, "grad_norm": 0.44921875, "learning_rate": 0.0018147917656900808, "loss": 0.2555, "step": 20752 }, { "epoch": 0.036798557419951965, "grad_norm": 1.234375, "learning_rate": 0.0018147555982535538, "loss": 0.3553, "step": 20754 }, { "epoch": 0.03680210358526178, "grad_norm": 0.5078125, "learning_rate": 0.0018147194276911013, "loss": 0.19, "step": 20756 }, { "epoch": 0.0368056497505716, "grad_norm": 0.5546875, "learning_rate": 0.001814683254002881, "loss": 0.2831, "step": 20758 }, { "epoch": 0.036809195915881415, "grad_norm": 3.609375, "learning_rate": 0.0018146470771890513, "loss": 0.4019, "step": 20760 }, { "epoch": 0.03681274208119123, "grad_norm": 0.30078125, "learning_rate": 0.0018146108972497703, "loss": 0.1951, "step": 20762 }, { "epoch": 0.036816288246501044, "grad_norm": 0.44921875, "learning_rate": 0.0018145747141851964, "loss": 0.2427, "step": 20764 }, { "epoch": 0.03681983441181086, "grad_norm": 0.71484375, "learning_rate": 0.001814538527995488, "loss": 0.1913, "step": 20766 }, { "epoch": 0.036823380577120673, "grad_norm": 0.91015625, "learning_rate": 0.0018145023386808028, "loss": 0.1737, "step": 20768 }, { "epoch": 0.03682692674243049, "grad_norm": 0.7109375, "learning_rate": 0.0018144661462413, "loss": 0.2179, "step": 20770 }, { "epoch": 0.0368304729077403, "grad_norm": 1.0859375, "learning_rate": 0.0018144299506771366, "loss": 0.2875, "step": 20772 }, { "epoch": 0.03683401907305012, "grad_norm": 0.578125, "learning_rate": 0.0018143937519884718, "loss": 0.2428, "step": 20774 }, { "epoch": 0.03683756523835993, "grad_norm": 0.375, "learning_rate": 0.0018143575501754637, "loss": 0.2427, "step": 20776 }, { "epoch": 0.036841111403669746, "grad_norm": 1.15625, "learning_rate": 0.0018143213452382705, "loss": 0.161, "step": 20778 }, { "epoch": 0.03684465756897957, "grad_norm": 0.48828125, "learning_rate": 0.001814285137177051, "loss": 0.2137, "step": 20780 }, { "epoch": 0.03684820373428938, "grad_norm": 3.71875, "learning_rate": 0.0018142489259919633, "loss": 0.3106, "step": 20782 }, { "epoch": 0.0368517498995992, "grad_norm": 0.53125, "learning_rate": 0.0018142127116831653, "loss": 0.1988, "step": 20784 }, { "epoch": 0.03685529606490901, "grad_norm": 0.43359375, "learning_rate": 0.0018141764942508159, "loss": 0.2045, "step": 20786 }, { "epoch": 0.036858842230218826, "grad_norm": 0.5078125, "learning_rate": 0.0018141402736950732, "loss": 0.1707, "step": 20788 }, { "epoch": 0.03686238839552864, "grad_norm": 0.65625, "learning_rate": 0.001814104050016096, "loss": 0.5549, "step": 20790 }, { "epoch": 0.036865934560838455, "grad_norm": 0.61328125, "learning_rate": 0.0018140678232140424, "loss": 0.3329, "step": 20792 }, { "epoch": 0.03686948072614827, "grad_norm": 0.73828125, "learning_rate": 0.0018140315932890709, "loss": 0.2136, "step": 20794 }, { "epoch": 0.036873026891458084, "grad_norm": 0.73046875, "learning_rate": 0.0018139953602413401, "loss": 0.1922, "step": 20796 }, { "epoch": 0.0368765730567679, "grad_norm": 0.66796875, "learning_rate": 0.001813959124071008, "loss": 0.1666, "step": 20798 }, { "epoch": 0.03688011922207771, "grad_norm": 0.51953125, "learning_rate": 0.001813922884778234, "loss": 0.2369, "step": 20800 }, { "epoch": 0.03688366538738753, "grad_norm": 0.47265625, "learning_rate": 0.0018138866423631753, "loss": 0.2323, "step": 20802 }, { "epoch": 0.03688721155269735, "grad_norm": 0.35546875, "learning_rate": 0.0018138503968259917, "loss": 0.2249, "step": 20804 }, { "epoch": 0.036890757718007164, "grad_norm": 0.8984375, "learning_rate": 0.0018138141481668407, "loss": 0.1648, "step": 20806 }, { "epoch": 0.03689430388331698, "grad_norm": 0.54296875, "learning_rate": 0.0018137778963858812, "loss": 0.1919, "step": 20808 }, { "epoch": 0.03689785004862679, "grad_norm": 1.078125, "learning_rate": 0.001813741641483272, "loss": 0.272, "step": 20810 }, { "epoch": 0.03690139621393661, "grad_norm": 0.333984375, "learning_rate": 0.0018137053834591716, "loss": 0.1518, "step": 20812 }, { "epoch": 0.03690494237924642, "grad_norm": 0.50390625, "learning_rate": 0.0018136691223137383, "loss": 0.2655, "step": 20814 }, { "epoch": 0.03690848854455624, "grad_norm": 0.65234375, "learning_rate": 0.0018136328580471307, "loss": 0.2803, "step": 20816 }, { "epoch": 0.03691203470986605, "grad_norm": 0.279296875, "learning_rate": 0.0018135965906595076, "loss": 0.1776, "step": 20818 }, { "epoch": 0.036915580875175866, "grad_norm": 0.1630859375, "learning_rate": 0.0018135603201510273, "loss": 0.2845, "step": 20820 }, { "epoch": 0.03691912704048568, "grad_norm": 1.03125, "learning_rate": 0.001813524046521849, "loss": 0.1963, "step": 20822 }, { "epoch": 0.036922673205795495, "grad_norm": 0.42578125, "learning_rate": 0.0018134877697721307, "loss": 0.2117, "step": 20824 }, { "epoch": 0.036926219371105316, "grad_norm": 0.4609375, "learning_rate": 0.0018134514899020315, "loss": 0.1698, "step": 20826 }, { "epoch": 0.03692976553641513, "grad_norm": 0.248046875, "learning_rate": 0.00181341520691171, "loss": 0.2005, "step": 20828 }, { "epoch": 0.036933311701724945, "grad_norm": 0.267578125, "learning_rate": 0.0018133789208013248, "loss": 0.2165, "step": 20830 }, { "epoch": 0.03693685786703476, "grad_norm": 0.8046875, "learning_rate": 0.001813342631571034, "loss": 0.2663, "step": 20832 }, { "epoch": 0.036940404032344575, "grad_norm": 0.1875, "learning_rate": 0.0018133063392209976, "loss": 0.1681, "step": 20834 }, { "epoch": 0.03694395019765439, "grad_norm": 1.6015625, "learning_rate": 0.0018132700437513735, "loss": 0.3001, "step": 20836 }, { "epoch": 0.036947496362964204, "grad_norm": 3.234375, "learning_rate": 0.0018132337451623201, "loss": 0.3373, "step": 20838 }, { "epoch": 0.03695104252827402, "grad_norm": 0.37109375, "learning_rate": 0.0018131974434539972, "loss": 0.1783, "step": 20840 }, { "epoch": 0.03695458869358383, "grad_norm": 1.2734375, "learning_rate": 0.0018131611386265625, "loss": 0.233, "step": 20842 }, { "epoch": 0.03695813485889365, "grad_norm": 0.51171875, "learning_rate": 0.0018131248306801754, "loss": 0.2175, "step": 20844 }, { "epoch": 0.03696168102420346, "grad_norm": 0.6875, "learning_rate": 0.0018130885196149945, "loss": 0.2348, "step": 20846 }, { "epoch": 0.03696522718951328, "grad_norm": 1.5703125, "learning_rate": 0.0018130522054311785, "loss": 0.2253, "step": 20848 }, { "epoch": 0.0369687733548231, "grad_norm": 0.5390625, "learning_rate": 0.0018130158881288865, "loss": 0.2017, "step": 20850 }, { "epoch": 0.03697231952013291, "grad_norm": 0.68359375, "learning_rate": 0.001812979567708277, "loss": 0.1608, "step": 20852 }, { "epoch": 0.03697586568544273, "grad_norm": 0.359375, "learning_rate": 0.0018129432441695093, "loss": 0.1483, "step": 20854 }, { "epoch": 0.03697941185075254, "grad_norm": 0.671875, "learning_rate": 0.0018129069175127418, "loss": 0.2507, "step": 20856 }, { "epoch": 0.036982958016062356, "grad_norm": 0.50390625, "learning_rate": 0.0018128705877381336, "loss": 0.1923, "step": 20858 }, { "epoch": 0.03698650418137217, "grad_norm": 1.1015625, "learning_rate": 0.0018128342548458434, "loss": 0.2784, "step": 20860 }, { "epoch": 0.036990050346681985, "grad_norm": 0.4453125, "learning_rate": 0.0018127979188360304, "loss": 0.1615, "step": 20862 }, { "epoch": 0.0369935965119918, "grad_norm": 0.484375, "learning_rate": 0.0018127615797088532, "loss": 0.1922, "step": 20864 }, { "epoch": 0.036997142677301614, "grad_norm": 0.271484375, "learning_rate": 0.001812725237464471, "loss": 0.2392, "step": 20866 }, { "epoch": 0.03700068884261143, "grad_norm": 1.2421875, "learning_rate": 0.0018126888921030427, "loss": 0.2242, "step": 20868 }, { "epoch": 0.03700423500792124, "grad_norm": 0.9609375, "learning_rate": 0.001812652543624727, "loss": 0.3085, "step": 20870 }, { "epoch": 0.037007781173231065, "grad_norm": 0.64453125, "learning_rate": 0.001812616192029683, "loss": 0.2082, "step": 20872 }, { "epoch": 0.03701132733854088, "grad_norm": 0.263671875, "learning_rate": 0.0018125798373180698, "loss": 0.1908, "step": 20874 }, { "epoch": 0.037014873503850694, "grad_norm": 1.15625, "learning_rate": 0.001812543479490046, "loss": 0.2896, "step": 20876 }, { "epoch": 0.03701841966916051, "grad_norm": 0.7890625, "learning_rate": 0.0018125071185457712, "loss": 0.1997, "step": 20878 }, { "epoch": 0.03702196583447032, "grad_norm": 0.5703125, "learning_rate": 0.0018124707544854041, "loss": 0.1764, "step": 20880 }, { "epoch": 0.03702551199978014, "grad_norm": 0.8125, "learning_rate": 0.0018124343873091038, "loss": 0.187, "step": 20882 }, { "epoch": 0.03702905816508995, "grad_norm": 0.64453125, "learning_rate": 0.0018123980170170293, "loss": 0.256, "step": 20884 }, { "epoch": 0.03703260433039977, "grad_norm": 0.890625, "learning_rate": 0.0018123616436093398, "loss": 0.4213, "step": 20886 }, { "epoch": 0.03703615049570958, "grad_norm": 4.9375, "learning_rate": 0.0018123252670861944, "loss": 0.3213, "step": 20888 }, { "epoch": 0.037039696661019396, "grad_norm": 0.396484375, "learning_rate": 0.0018122888874477518, "loss": 0.219, "step": 20890 }, { "epoch": 0.03704324282632921, "grad_norm": 0.41796875, "learning_rate": 0.001812252504694171, "loss": 0.2303, "step": 20892 }, { "epoch": 0.03704678899163903, "grad_norm": 0.53515625, "learning_rate": 0.001812216118825612, "loss": 0.1699, "step": 20894 }, { "epoch": 0.037050335156948846, "grad_norm": 0.75390625, "learning_rate": 0.0018121797298422334, "loss": 0.2213, "step": 20896 }, { "epoch": 0.03705388132225866, "grad_norm": 0.54296875, "learning_rate": 0.0018121433377441942, "loss": 0.2062, "step": 20898 }, { "epoch": 0.037057427487568476, "grad_norm": 0.3203125, "learning_rate": 0.0018121069425316536, "loss": 0.2148, "step": 20900 }, { "epoch": 0.03706097365287829, "grad_norm": 0.703125, "learning_rate": 0.001812070544204771, "loss": 0.1873, "step": 20902 }, { "epoch": 0.037064519818188105, "grad_norm": 4.15625, "learning_rate": 0.0018120341427637053, "loss": 0.2448, "step": 20904 }, { "epoch": 0.03706806598349792, "grad_norm": 0.2470703125, "learning_rate": 0.001811997738208616, "loss": 0.2328, "step": 20906 }, { "epoch": 0.037071612148807734, "grad_norm": 0.30859375, "learning_rate": 0.0018119613305396622, "loss": 0.1098, "step": 20908 }, { "epoch": 0.03707515831411755, "grad_norm": 0.63671875, "learning_rate": 0.0018119249197570031, "loss": 0.178, "step": 20910 }, { "epoch": 0.03707870447942736, "grad_norm": 0.384765625, "learning_rate": 0.001811888505860798, "loss": 0.2052, "step": 20912 }, { "epoch": 0.03708225064473718, "grad_norm": 0.79296875, "learning_rate": 0.0018118520888512058, "loss": 0.1887, "step": 20914 }, { "epoch": 0.037085796810047, "grad_norm": 1.0546875, "learning_rate": 0.0018118156687283863, "loss": 0.244, "step": 20916 }, { "epoch": 0.037089342975356814, "grad_norm": 0.4765625, "learning_rate": 0.0018117792454924984, "loss": 0.2284, "step": 20918 }, { "epoch": 0.03709288914066663, "grad_norm": 0.53125, "learning_rate": 0.0018117428191437017, "loss": 0.2419, "step": 20920 }, { "epoch": 0.03709643530597644, "grad_norm": 0.55859375, "learning_rate": 0.0018117063896821552, "loss": 0.2104, "step": 20922 }, { "epoch": 0.03709998147128626, "grad_norm": 0.5078125, "learning_rate": 0.0018116699571080184, "loss": 0.3777, "step": 20924 }, { "epoch": 0.03710352763659607, "grad_norm": 0.734375, "learning_rate": 0.0018116335214214505, "loss": 0.315, "step": 20926 }, { "epoch": 0.037107073801905886, "grad_norm": 0.7265625, "learning_rate": 0.001811597082622611, "loss": 0.2939, "step": 20928 }, { "epoch": 0.0371106199672157, "grad_norm": 1.65625, "learning_rate": 0.0018115606407116593, "loss": 0.4039, "step": 20930 }, { "epoch": 0.037114166132525515, "grad_norm": 0.578125, "learning_rate": 0.0018115241956887546, "loss": 0.171, "step": 20932 }, { "epoch": 0.03711771229783533, "grad_norm": 2.828125, "learning_rate": 0.0018114877475540563, "loss": 0.2688, "step": 20934 }, { "epoch": 0.037121258463145144, "grad_norm": 0.765625, "learning_rate": 0.0018114512963077242, "loss": 0.1959, "step": 20936 }, { "epoch": 0.03712480462845496, "grad_norm": 0.53515625, "learning_rate": 0.001811414841949917, "loss": 0.1852, "step": 20938 }, { "epoch": 0.03712835079376478, "grad_norm": 1.3984375, "learning_rate": 0.0018113783844807946, "loss": 0.292, "step": 20940 }, { "epoch": 0.037131896959074595, "grad_norm": 0.7890625, "learning_rate": 0.0018113419239005166, "loss": 0.4275, "step": 20942 }, { "epoch": 0.03713544312438441, "grad_norm": 2.09375, "learning_rate": 0.001811305460209242, "loss": 0.2571, "step": 20944 }, { "epoch": 0.037138989289694224, "grad_norm": 1.078125, "learning_rate": 0.0018112689934071304, "loss": 0.2541, "step": 20946 }, { "epoch": 0.03714253545500404, "grad_norm": 0.4140625, "learning_rate": 0.001811232523494342, "loss": 0.209, "step": 20948 }, { "epoch": 0.03714608162031385, "grad_norm": 0.455078125, "learning_rate": 0.0018111960504710353, "loss": 0.2007, "step": 20950 }, { "epoch": 0.03714962778562367, "grad_norm": 0.89453125, "learning_rate": 0.0018111595743373697, "loss": 0.2365, "step": 20952 }, { "epoch": 0.03715317395093348, "grad_norm": 1.9921875, "learning_rate": 0.0018111230950935055, "loss": 0.4104, "step": 20954 }, { "epoch": 0.0371567201162433, "grad_norm": 1.4140625, "learning_rate": 0.0018110866127396023, "loss": 0.2188, "step": 20956 }, { "epoch": 0.03716026628155311, "grad_norm": 0.392578125, "learning_rate": 0.001811050127275819, "loss": 0.1942, "step": 20958 }, { "epoch": 0.037163812446862926, "grad_norm": 0.48046875, "learning_rate": 0.0018110136387023154, "loss": 0.2925, "step": 20960 }, { "epoch": 0.03716735861217275, "grad_norm": 0.99609375, "learning_rate": 0.0018109771470192512, "loss": 0.259, "step": 20962 }, { "epoch": 0.03717090477748256, "grad_norm": 0.84765625, "learning_rate": 0.001810940652226786, "loss": 0.2276, "step": 20964 }, { "epoch": 0.03717445094279238, "grad_norm": 0.9296875, "learning_rate": 0.0018109041543250795, "loss": 0.2024, "step": 20966 }, { "epoch": 0.03717799710810219, "grad_norm": 0.7890625, "learning_rate": 0.0018108676533142912, "loss": 0.2229, "step": 20968 }, { "epoch": 0.037181543273412006, "grad_norm": 1.2890625, "learning_rate": 0.0018108311491945805, "loss": 0.3529, "step": 20970 }, { "epoch": 0.03718508943872182, "grad_norm": 0.29296875, "learning_rate": 0.0018107946419661071, "loss": 0.1708, "step": 20972 }, { "epoch": 0.037188635604031635, "grad_norm": 0.6171875, "learning_rate": 0.0018107581316290312, "loss": 0.1987, "step": 20974 }, { "epoch": 0.03719218176934145, "grad_norm": 0.87890625, "learning_rate": 0.0018107216181835117, "loss": 0.1912, "step": 20976 }, { "epoch": 0.037195727934651264, "grad_norm": 0.46484375, "learning_rate": 0.001810685101629709, "loss": 0.1652, "step": 20978 }, { "epoch": 0.03719927409996108, "grad_norm": 0.93359375, "learning_rate": 0.0018106485819677825, "loss": 0.2541, "step": 20980 }, { "epoch": 0.03720282026527089, "grad_norm": 0.703125, "learning_rate": 0.0018106120591978917, "loss": 0.2177, "step": 20982 }, { "epoch": 0.037206366430580715, "grad_norm": 0.59765625, "learning_rate": 0.0018105755333201966, "loss": 0.2082, "step": 20984 }, { "epoch": 0.03720991259589053, "grad_norm": 2.15625, "learning_rate": 0.0018105390043348566, "loss": 0.4442, "step": 20986 }, { "epoch": 0.037213458761200344, "grad_norm": 0.80859375, "learning_rate": 0.0018105024722420322, "loss": 0.2664, "step": 20988 }, { "epoch": 0.03721700492651016, "grad_norm": 0.375, "learning_rate": 0.0018104659370418825, "loss": 0.3408, "step": 20990 }, { "epoch": 0.03722055109181997, "grad_norm": 0.59765625, "learning_rate": 0.0018104293987345674, "loss": 0.2209, "step": 20992 }, { "epoch": 0.03722409725712979, "grad_norm": 0.8203125, "learning_rate": 0.0018103928573202468, "loss": 0.1995, "step": 20994 }, { "epoch": 0.0372276434224396, "grad_norm": 0.330078125, "learning_rate": 0.0018103563127990809, "loss": 0.2242, "step": 20996 }, { "epoch": 0.037231189587749416, "grad_norm": 0.7109375, "learning_rate": 0.0018103197651712284, "loss": 0.1957, "step": 20998 }, { "epoch": 0.03723473575305923, "grad_norm": 0.83203125, "learning_rate": 0.0018102832144368502, "loss": 0.2153, "step": 21000 }, { "epoch": 0.037238281918369046, "grad_norm": 0.4921875, "learning_rate": 0.0018102466605961058, "loss": 0.1745, "step": 21002 }, { "epoch": 0.03724182808367886, "grad_norm": 1.0859375, "learning_rate": 0.0018102101036491553, "loss": 0.2931, "step": 21004 }, { "epoch": 0.037245374248988675, "grad_norm": 0.5859375, "learning_rate": 0.0018101735435961579, "loss": 0.1673, "step": 21006 }, { "epoch": 0.037248920414298496, "grad_norm": 1.3046875, "learning_rate": 0.0018101369804372743, "loss": 0.3635, "step": 21008 }, { "epoch": 0.03725246657960831, "grad_norm": 0.37109375, "learning_rate": 0.0018101004141726639, "loss": 0.3353, "step": 21010 }, { "epoch": 0.037256012744918125, "grad_norm": 0.3828125, "learning_rate": 0.0018100638448024864, "loss": 0.2376, "step": 21012 }, { "epoch": 0.03725955891022794, "grad_norm": 0.6875, "learning_rate": 0.0018100272723269026, "loss": 0.2049, "step": 21014 }, { "epoch": 0.037263105075537754, "grad_norm": 0.61328125, "learning_rate": 0.001809990696746072, "loss": 0.2344, "step": 21016 }, { "epoch": 0.03726665124084757, "grad_norm": 0.58984375, "learning_rate": 0.001809954118060154, "loss": 0.2745, "step": 21018 }, { "epoch": 0.03727019740615738, "grad_norm": 1.21875, "learning_rate": 0.0018099175362693094, "loss": 0.2316, "step": 21020 }, { "epoch": 0.0372737435714672, "grad_norm": 0.3125, "learning_rate": 0.0018098809513736977, "loss": 0.3419, "step": 21022 }, { "epoch": 0.03727728973677701, "grad_norm": 0.458984375, "learning_rate": 0.0018098443633734794, "loss": 0.2413, "step": 21024 }, { "epoch": 0.03728083590208683, "grad_norm": 0.490234375, "learning_rate": 0.001809807772268814, "loss": 0.3037, "step": 21026 }, { "epoch": 0.03728438206739664, "grad_norm": 0.76953125, "learning_rate": 0.0018097711780598615, "loss": 0.2663, "step": 21028 }, { "epoch": 0.03728792823270646, "grad_norm": 0.47265625, "learning_rate": 0.0018097345807467824, "loss": 0.2586, "step": 21030 }, { "epoch": 0.03729147439801628, "grad_norm": 0.703125, "learning_rate": 0.0018096979803297365, "loss": 0.2057, "step": 21032 }, { "epoch": 0.03729502056332609, "grad_norm": 0.80859375, "learning_rate": 0.001809661376808884, "loss": 0.2135, "step": 21034 }, { "epoch": 0.03729856672863591, "grad_norm": 1.0078125, "learning_rate": 0.0018096247701843845, "loss": 0.1939, "step": 21036 }, { "epoch": 0.03730211289394572, "grad_norm": 0.875, "learning_rate": 0.0018095881604563986, "loss": 0.2012, "step": 21038 }, { "epoch": 0.037305659059255536, "grad_norm": 0.515625, "learning_rate": 0.0018095515476250863, "loss": 0.2241, "step": 21040 }, { "epoch": 0.03730920522456535, "grad_norm": 0.271484375, "learning_rate": 0.0018095149316906078, "loss": 0.1795, "step": 21042 }, { "epoch": 0.037312751389875165, "grad_norm": 0.3359375, "learning_rate": 0.0018094783126531232, "loss": 0.4878, "step": 21044 }, { "epoch": 0.03731629755518498, "grad_norm": 0.51171875, "learning_rate": 0.0018094416905127919, "loss": 0.2097, "step": 21046 }, { "epoch": 0.037319843720494794, "grad_norm": 0.69921875, "learning_rate": 0.0018094050652697753, "loss": 0.233, "step": 21048 }, { "epoch": 0.03732338988580461, "grad_norm": 1.0234375, "learning_rate": 0.0018093684369242331, "loss": 0.2799, "step": 21050 }, { "epoch": 0.03732693605111443, "grad_norm": 0.482421875, "learning_rate": 0.0018093318054763254, "loss": 0.231, "step": 21052 }, { "epoch": 0.037330482216424245, "grad_norm": 0.6328125, "learning_rate": 0.001809295170926212, "loss": 0.2407, "step": 21054 }, { "epoch": 0.03733402838173406, "grad_norm": 0.52734375, "learning_rate": 0.0018092585332740538, "loss": 0.2253, "step": 21056 }, { "epoch": 0.037337574547043874, "grad_norm": 0.625, "learning_rate": 0.0018092218925200108, "loss": 0.1789, "step": 21058 }, { "epoch": 0.03734112071235369, "grad_norm": 0.296875, "learning_rate": 0.0018091852486642433, "loss": 0.2441, "step": 21060 }, { "epoch": 0.0373446668776635, "grad_norm": 0.60546875, "learning_rate": 0.0018091486017069113, "loss": 0.2528, "step": 21062 }, { "epoch": 0.03734821304297332, "grad_norm": 0.390625, "learning_rate": 0.0018091119516481752, "loss": 0.1958, "step": 21064 }, { "epoch": 0.03735175920828313, "grad_norm": 0.81640625, "learning_rate": 0.0018090752984881954, "loss": 0.2339, "step": 21066 }, { "epoch": 0.03735530537359295, "grad_norm": 0.265625, "learning_rate": 0.0018090386422271324, "loss": 0.1701, "step": 21068 }, { "epoch": 0.03735885153890276, "grad_norm": 0.373046875, "learning_rate": 0.0018090019828651456, "loss": 0.1947, "step": 21070 }, { "epoch": 0.037362397704212576, "grad_norm": 0.58203125, "learning_rate": 0.0018089653204023963, "loss": 0.1968, "step": 21072 }, { "epoch": 0.03736594386952239, "grad_norm": 1.03125, "learning_rate": 0.0018089286548390448, "loss": 0.3609, "step": 21074 }, { "epoch": 0.03736949003483221, "grad_norm": 0.455078125, "learning_rate": 0.0018088919861752506, "loss": 0.2133, "step": 21076 }, { "epoch": 0.037373036200142026, "grad_norm": 1.671875, "learning_rate": 0.001808855314411175, "loss": 0.2689, "step": 21078 }, { "epoch": 0.03737658236545184, "grad_norm": 0.66796875, "learning_rate": 0.0018088186395469779, "loss": 0.1503, "step": 21080 }, { "epoch": 0.037380128530761655, "grad_norm": 0.75390625, "learning_rate": 0.0018087819615828196, "loss": 0.2604, "step": 21082 }, { "epoch": 0.03738367469607147, "grad_norm": 0.6875, "learning_rate": 0.001808745280518861, "loss": 0.289, "step": 21084 }, { "epoch": 0.037387220861381285, "grad_norm": 0.4765625, "learning_rate": 0.0018087085963552623, "loss": 0.3335, "step": 21086 }, { "epoch": 0.0373907670266911, "grad_norm": 0.396484375, "learning_rate": 0.0018086719090921837, "loss": 0.3295, "step": 21088 }, { "epoch": 0.037394313192000914, "grad_norm": 0.51171875, "learning_rate": 0.0018086352187297859, "loss": 0.2438, "step": 21090 }, { "epoch": 0.03739785935731073, "grad_norm": 0.95703125, "learning_rate": 0.0018085985252682293, "loss": 0.2031, "step": 21092 }, { "epoch": 0.03740140552262054, "grad_norm": 1.5390625, "learning_rate": 0.001808561828707674, "loss": 0.2183, "step": 21094 }, { "epoch": 0.03740495168793036, "grad_norm": 0.546875, "learning_rate": 0.0018085251290482812, "loss": 0.2279, "step": 21096 }, { "epoch": 0.03740849785324018, "grad_norm": 0.9453125, "learning_rate": 0.001808488426290211, "loss": 0.2037, "step": 21098 }, { "epoch": 0.03741204401854999, "grad_norm": 0.4609375, "learning_rate": 0.001808451720433624, "loss": 0.1834, "step": 21100 }, { "epoch": 0.03741559018385981, "grad_norm": 0.7890625, "learning_rate": 0.0018084150114786807, "loss": 0.2205, "step": 21102 }, { "epoch": 0.03741913634916962, "grad_norm": 1.7734375, "learning_rate": 0.0018083782994255412, "loss": 0.3892, "step": 21104 }, { "epoch": 0.03742268251447944, "grad_norm": 0.5546875, "learning_rate": 0.001808341584274367, "loss": 0.242, "step": 21106 }, { "epoch": 0.03742622867978925, "grad_norm": 0.44921875, "learning_rate": 0.001808304866025318, "loss": 0.2081, "step": 21108 }, { "epoch": 0.037429774845099066, "grad_norm": 0.470703125, "learning_rate": 0.001808268144678555, "loss": 0.1906, "step": 21110 }, { "epoch": 0.03743332101040888, "grad_norm": 0.71484375, "learning_rate": 0.0018082314202342385, "loss": 0.2031, "step": 21112 }, { "epoch": 0.037436867175718695, "grad_norm": 0.5703125, "learning_rate": 0.001808194692692529, "loss": 0.322, "step": 21114 }, { "epoch": 0.03744041334102851, "grad_norm": 2.0, "learning_rate": 0.0018081579620535875, "loss": 0.4059, "step": 21116 }, { "epoch": 0.037443959506338324, "grad_norm": 0.458984375, "learning_rate": 0.0018081212283175745, "loss": 0.1982, "step": 21118 }, { "epoch": 0.037447505671648146, "grad_norm": 0.5078125, "learning_rate": 0.0018080844914846505, "loss": 0.1356, "step": 21120 }, { "epoch": 0.03745105183695796, "grad_norm": 0.51171875, "learning_rate": 0.0018080477515549761, "loss": 0.2173, "step": 21122 }, { "epoch": 0.037454598002267775, "grad_norm": 0.28515625, "learning_rate": 0.0018080110085287122, "loss": 0.1596, "step": 21124 }, { "epoch": 0.03745814416757759, "grad_norm": 0.392578125, "learning_rate": 0.0018079742624060194, "loss": 0.1601, "step": 21126 }, { "epoch": 0.037461690332887404, "grad_norm": 1.6328125, "learning_rate": 0.001807937513187058, "loss": 0.2733, "step": 21128 }, { "epoch": 0.03746523649819722, "grad_norm": 0.609375, "learning_rate": 0.0018079007608719899, "loss": 0.2029, "step": 21130 }, { "epoch": 0.03746878266350703, "grad_norm": 0.453125, "learning_rate": 0.0018078640054609745, "loss": 0.2069, "step": 21132 }, { "epoch": 0.03747232882881685, "grad_norm": 1.0390625, "learning_rate": 0.0018078272469541736, "loss": 0.237, "step": 21134 }, { "epoch": 0.03747587499412666, "grad_norm": 1.53125, "learning_rate": 0.0018077904853517472, "loss": 0.1499, "step": 21136 }, { "epoch": 0.03747942115943648, "grad_norm": 0.6328125, "learning_rate": 0.001807753720653856, "loss": 0.1936, "step": 21138 }, { "epoch": 0.03748296732474629, "grad_norm": 19.125, "learning_rate": 0.0018077169528606617, "loss": 0.3343, "step": 21140 }, { "epoch": 0.037486513490056106, "grad_norm": 0.390625, "learning_rate": 0.0018076801819723242, "loss": 0.2345, "step": 21142 }, { "epoch": 0.03749005965536593, "grad_norm": 0.6796875, "learning_rate": 0.001807643407989005, "loss": 0.1732, "step": 21144 }, { "epoch": 0.03749360582067574, "grad_norm": 2.3125, "learning_rate": 0.0018076066309108644, "loss": 0.2229, "step": 21146 }, { "epoch": 0.037497151985985556, "grad_norm": 1.9921875, "learning_rate": 0.0018075698507380633, "loss": 0.4106, "step": 21148 }, { "epoch": 0.03750069815129537, "grad_norm": 0.99609375, "learning_rate": 0.0018075330674707628, "loss": 0.2897, "step": 21150 }, { "epoch": 0.037504244316605186, "grad_norm": 0.3203125, "learning_rate": 0.0018074962811091236, "loss": 0.1652, "step": 21152 }, { "epoch": 0.037507790481915, "grad_norm": 0.48046875, "learning_rate": 0.001807459491653307, "loss": 0.1881, "step": 21154 }, { "epoch": 0.037511336647224815, "grad_norm": 0.466796875, "learning_rate": 0.001807422699103473, "loss": 0.2221, "step": 21156 }, { "epoch": 0.03751488281253463, "grad_norm": 1.6640625, "learning_rate": 0.001807385903459783, "loss": 0.2691, "step": 21158 }, { "epoch": 0.037518428977844444, "grad_norm": 0.52734375, "learning_rate": 0.0018073491047223983, "loss": 0.2291, "step": 21160 }, { "epoch": 0.03752197514315426, "grad_norm": 0.314453125, "learning_rate": 0.0018073123028914792, "loss": 0.1859, "step": 21162 }, { "epoch": 0.03752552130846407, "grad_norm": 2.78125, "learning_rate": 0.0018072754979671873, "loss": 0.3122, "step": 21164 }, { "epoch": 0.037529067473773894, "grad_norm": 0.3125, "learning_rate": 0.001807238689949683, "loss": 0.2175, "step": 21166 }, { "epoch": 0.03753261363908371, "grad_norm": 0.85546875, "learning_rate": 0.0018072018788391274, "loss": 0.2292, "step": 21168 }, { "epoch": 0.037536159804393524, "grad_norm": 0.55859375, "learning_rate": 0.0018071650646356817, "loss": 0.2266, "step": 21170 }, { "epoch": 0.03753970596970334, "grad_norm": 0.59375, "learning_rate": 0.0018071282473395068, "loss": 0.1971, "step": 21172 }, { "epoch": 0.03754325213501315, "grad_norm": 0.2216796875, "learning_rate": 0.0018070914269507636, "loss": 0.1768, "step": 21174 }, { "epoch": 0.03754679830032297, "grad_norm": 0.466796875, "learning_rate": 0.0018070546034696135, "loss": 0.2223, "step": 21176 }, { "epoch": 0.03755034446563278, "grad_norm": 1.2890625, "learning_rate": 0.0018070177768962168, "loss": 0.219, "step": 21178 }, { "epoch": 0.037553890630942596, "grad_norm": 0.296875, "learning_rate": 0.0018069809472307352, "loss": 0.1485, "step": 21180 }, { "epoch": 0.03755743679625241, "grad_norm": 0.62109375, "learning_rate": 0.00180694411447333, "loss": 0.2118, "step": 21182 }, { "epoch": 0.037560982961562225, "grad_norm": 1.28125, "learning_rate": 0.0018069072786241615, "loss": 0.3292, "step": 21184 }, { "epoch": 0.03756452912687204, "grad_norm": 0.384765625, "learning_rate": 0.001806870439683391, "loss": 0.2352, "step": 21186 }, { "epoch": 0.03756807529218186, "grad_norm": 1.1015625, "learning_rate": 0.0018068335976511802, "loss": 0.2458, "step": 21188 }, { "epoch": 0.037571621457491676, "grad_norm": 0.41015625, "learning_rate": 0.0018067967525276893, "loss": 0.2156, "step": 21190 }, { "epoch": 0.03757516762280149, "grad_norm": 0.8203125, "learning_rate": 0.0018067599043130806, "loss": 0.2623, "step": 21192 }, { "epoch": 0.037578713788111305, "grad_norm": 0.7109375, "learning_rate": 0.0018067230530075145, "loss": 0.2313, "step": 21194 }, { "epoch": 0.03758225995342112, "grad_norm": 0.78515625, "learning_rate": 0.001806686198611152, "loss": 0.1753, "step": 21196 }, { "epoch": 0.037585806118730934, "grad_norm": 0.27734375, "learning_rate": 0.0018066493411241548, "loss": 0.1862, "step": 21198 }, { "epoch": 0.03758935228404075, "grad_norm": 0.63671875, "learning_rate": 0.0018066124805466836, "loss": 0.2945, "step": 21200 }, { "epoch": 0.03759289844935056, "grad_norm": 2.0, "learning_rate": 0.0018065756168789001, "loss": 0.2767, "step": 21202 }, { "epoch": 0.03759644461466038, "grad_norm": 1.078125, "learning_rate": 0.001806538750120965, "loss": 0.3304, "step": 21204 }, { "epoch": 0.03759999077997019, "grad_norm": 2.328125, "learning_rate": 0.0018065018802730399, "loss": 0.2129, "step": 21206 }, { "epoch": 0.03760353694528001, "grad_norm": 1.5625, "learning_rate": 0.0018064650073352862, "loss": 0.2207, "step": 21208 }, { "epoch": 0.03760708311058982, "grad_norm": 0.396484375, "learning_rate": 0.0018064281313078649, "loss": 0.1767, "step": 21210 }, { "epoch": 0.03761062927589964, "grad_norm": 0.34765625, "learning_rate": 0.0018063912521909366, "loss": 0.2346, "step": 21212 }, { "epoch": 0.03761417544120946, "grad_norm": 3.15625, "learning_rate": 0.001806354369984664, "loss": 0.2902, "step": 21214 }, { "epoch": 0.03761772160651927, "grad_norm": 1.0703125, "learning_rate": 0.0018063174846892076, "loss": 0.3171, "step": 21216 }, { "epoch": 0.03762126777182909, "grad_norm": 0.5078125, "learning_rate": 0.0018062805963047286, "loss": 0.2067, "step": 21218 }, { "epoch": 0.0376248139371389, "grad_norm": 0.5078125, "learning_rate": 0.001806243704831389, "loss": 0.2354, "step": 21220 }, { "epoch": 0.037628360102448716, "grad_norm": 0.84765625, "learning_rate": 0.0018062068102693489, "loss": 0.1851, "step": 21222 }, { "epoch": 0.03763190626775853, "grad_norm": 0.408203125, "learning_rate": 0.0018061699126187712, "loss": 0.2565, "step": 21224 }, { "epoch": 0.037635452433068345, "grad_norm": 3.6875, "learning_rate": 0.001806133011879816, "loss": 0.2314, "step": 21226 }, { "epoch": 0.03763899859837816, "grad_norm": 0.2578125, "learning_rate": 0.0018060961080526455, "loss": 0.1508, "step": 21228 }, { "epoch": 0.037642544763687974, "grad_norm": 0.63671875, "learning_rate": 0.0018060592011374204, "loss": 0.2648, "step": 21230 }, { "epoch": 0.03764609092899779, "grad_norm": 0.478515625, "learning_rate": 0.0018060222911343029, "loss": 0.1983, "step": 21232 }, { "epoch": 0.03764963709430761, "grad_norm": 1.0546875, "learning_rate": 0.0018059853780434537, "loss": 0.3237, "step": 21234 }, { "epoch": 0.037653183259617425, "grad_norm": 0.68359375, "learning_rate": 0.0018059484618650346, "loss": 0.2297, "step": 21236 }, { "epoch": 0.03765672942492724, "grad_norm": 0.55859375, "learning_rate": 0.001805911542599207, "loss": 0.2027, "step": 21238 }, { "epoch": 0.037660275590237054, "grad_norm": 0.57421875, "learning_rate": 0.0018058746202461323, "loss": 0.1726, "step": 21240 }, { "epoch": 0.03766382175554687, "grad_norm": 0.83984375, "learning_rate": 0.0018058376948059723, "loss": 0.2698, "step": 21242 }, { "epoch": 0.03766736792085668, "grad_norm": 0.3203125, "learning_rate": 0.001805800766278888, "loss": 0.1898, "step": 21244 }, { "epoch": 0.0376709140861665, "grad_norm": 0.400390625, "learning_rate": 0.0018057638346650408, "loss": 0.1719, "step": 21246 }, { "epoch": 0.03767446025147631, "grad_norm": 0.6328125, "learning_rate": 0.001805726899964593, "loss": 0.2179, "step": 21248 }, { "epoch": 0.037678006416786126, "grad_norm": 0.86328125, "learning_rate": 0.0018056899621777056, "loss": 0.2322, "step": 21250 }, { "epoch": 0.03768155258209594, "grad_norm": 0.6875, "learning_rate": 0.0018056530213045403, "loss": 0.176, "step": 21252 }, { "epoch": 0.037685098747405756, "grad_norm": 1.96875, "learning_rate": 0.0018056160773452584, "loss": 0.4929, "step": 21254 }, { "epoch": 0.03768864491271558, "grad_norm": 2.296875, "learning_rate": 0.0018055791303000217, "loss": 0.2637, "step": 21256 }, { "epoch": 0.03769219107802539, "grad_norm": 0.578125, "learning_rate": 0.0018055421801689917, "loss": 0.2497, "step": 21258 }, { "epoch": 0.037695737243335206, "grad_norm": 0.353515625, "learning_rate": 0.0018055052269523302, "loss": 0.4515, "step": 21260 }, { "epoch": 0.03769928340864502, "grad_norm": 0.94140625, "learning_rate": 0.0018054682706501984, "loss": 0.2287, "step": 21262 }, { "epoch": 0.037702829573954835, "grad_norm": 2.1875, "learning_rate": 0.0018054313112627583, "loss": 0.2604, "step": 21264 }, { "epoch": 0.03770637573926465, "grad_norm": 0.3828125, "learning_rate": 0.0018053943487901713, "loss": 0.2021, "step": 21266 }, { "epoch": 0.037709921904574464, "grad_norm": 0.314453125, "learning_rate": 0.0018053573832325996, "loss": 0.278, "step": 21268 }, { "epoch": 0.03771346806988428, "grad_norm": 0.466796875, "learning_rate": 0.0018053204145902038, "loss": 0.2024, "step": 21270 }, { "epoch": 0.03771701423519409, "grad_norm": 0.474609375, "learning_rate": 0.0018052834428631465, "loss": 0.1597, "step": 21272 }, { "epoch": 0.03772056040050391, "grad_norm": 0.79296875, "learning_rate": 0.001805246468051589, "loss": 0.2239, "step": 21274 }, { "epoch": 0.03772410656581372, "grad_norm": 0.6484375, "learning_rate": 0.0018052094901556933, "loss": 0.2408, "step": 21276 }, { "epoch": 0.03772765273112354, "grad_norm": 0.322265625, "learning_rate": 0.0018051725091756208, "loss": 0.2451, "step": 21278 }, { "epoch": 0.03773119889643336, "grad_norm": 0.2451171875, "learning_rate": 0.0018051355251115335, "loss": 0.1978, "step": 21280 }, { "epoch": 0.03773474506174317, "grad_norm": 0.35546875, "learning_rate": 0.0018050985379635927, "loss": 0.2186, "step": 21282 }, { "epoch": 0.03773829122705299, "grad_norm": 0.306640625, "learning_rate": 0.0018050615477319606, "loss": 0.2343, "step": 21284 }, { "epoch": 0.0377418373923628, "grad_norm": 0.55078125, "learning_rate": 0.0018050245544167988, "loss": 0.2135, "step": 21286 }, { "epoch": 0.03774538355767262, "grad_norm": 0.51171875, "learning_rate": 0.0018049875580182695, "loss": 0.1666, "step": 21288 }, { "epoch": 0.03774892972298243, "grad_norm": 0.69921875, "learning_rate": 0.0018049505585365337, "loss": 0.2834, "step": 21290 }, { "epoch": 0.037752475888292246, "grad_norm": 0.4609375, "learning_rate": 0.0018049135559717539, "loss": 0.1998, "step": 21292 }, { "epoch": 0.03775602205360206, "grad_norm": 0.3515625, "learning_rate": 0.0018048765503240914, "loss": 0.2019, "step": 21294 }, { "epoch": 0.037759568218911875, "grad_norm": 0.6328125, "learning_rate": 0.0018048395415937084, "loss": 0.2537, "step": 21296 }, { "epoch": 0.03776311438422169, "grad_norm": 1.40625, "learning_rate": 0.0018048025297807667, "loss": 0.2449, "step": 21298 }, { "epoch": 0.037766660549531504, "grad_norm": 1.484375, "learning_rate": 0.0018047655148854282, "loss": 0.2965, "step": 21300 }, { "epoch": 0.037770206714841326, "grad_norm": 0.7265625, "learning_rate": 0.0018047284969078545, "loss": 0.297, "step": 21302 }, { "epoch": 0.03777375288015114, "grad_norm": 0.57421875, "learning_rate": 0.001804691475848208, "loss": 0.2388, "step": 21304 }, { "epoch": 0.037777299045460955, "grad_norm": 1.3359375, "learning_rate": 0.00180465445170665, "loss": 0.4461, "step": 21306 }, { "epoch": 0.03778084521077077, "grad_norm": 0.498046875, "learning_rate": 0.0018046174244833431, "loss": 0.1706, "step": 21308 }, { "epoch": 0.037784391376080584, "grad_norm": 3.90625, "learning_rate": 0.0018045803941784486, "loss": 0.29, "step": 21310 }, { "epoch": 0.0377879375413904, "grad_norm": 0.4375, "learning_rate": 0.001804543360792129, "loss": 0.3236, "step": 21312 }, { "epoch": 0.03779148370670021, "grad_norm": 0.474609375, "learning_rate": 0.001804506324324546, "loss": 0.2502, "step": 21314 }, { "epoch": 0.03779502987201003, "grad_norm": 0.408203125, "learning_rate": 0.0018044692847758613, "loss": 0.1669, "step": 21316 }, { "epoch": 0.03779857603731984, "grad_norm": 0.9453125, "learning_rate": 0.0018044322421462374, "loss": 0.2279, "step": 21318 }, { "epoch": 0.03780212220262966, "grad_norm": 0.828125, "learning_rate": 0.0018043951964358356, "loss": 0.2066, "step": 21320 }, { "epoch": 0.03780566836793947, "grad_norm": 0.82421875, "learning_rate": 0.001804358147644819, "loss": 0.2373, "step": 21322 }, { "epoch": 0.03780921453324929, "grad_norm": 0.78125, "learning_rate": 0.0018043210957733482, "loss": 0.2588, "step": 21324 }, { "epoch": 0.03781276069855911, "grad_norm": 2.59375, "learning_rate": 0.0018042840408215867, "loss": 0.1868, "step": 21326 }, { "epoch": 0.03781630686386892, "grad_norm": 0.91796875, "learning_rate": 0.001804246982789696, "loss": 0.2225, "step": 21328 }, { "epoch": 0.037819853029178736, "grad_norm": 1.0703125, "learning_rate": 0.0018042099216778375, "loss": 0.1665, "step": 21330 }, { "epoch": 0.03782339919448855, "grad_norm": 0.52734375, "learning_rate": 0.0018041728574861742, "loss": 0.2137, "step": 21332 }, { "epoch": 0.037826945359798365, "grad_norm": 0.61328125, "learning_rate": 0.0018041357902148678, "loss": 0.239, "step": 21334 }, { "epoch": 0.03783049152510818, "grad_norm": 0.369140625, "learning_rate": 0.0018040987198640803, "loss": 0.1946, "step": 21336 }, { "epoch": 0.037834037690417995, "grad_norm": 2.609375, "learning_rate": 0.001804061646433974, "loss": 0.2831, "step": 21338 }, { "epoch": 0.03783758385572781, "grad_norm": 0.296875, "learning_rate": 0.0018040245699247114, "loss": 0.1894, "step": 21340 }, { "epoch": 0.037841130021037624, "grad_norm": 1.1484375, "learning_rate": 0.0018039874903364541, "loss": 0.2194, "step": 21342 }, { "epoch": 0.03784467618634744, "grad_norm": 0.197265625, "learning_rate": 0.0018039504076693645, "loss": 0.1626, "step": 21344 }, { "epoch": 0.03784822235165725, "grad_norm": 0.41015625, "learning_rate": 0.0018039133219236046, "loss": 0.2114, "step": 21346 }, { "epoch": 0.037851768516967074, "grad_norm": 0.365234375, "learning_rate": 0.0018038762330993365, "loss": 0.1793, "step": 21348 }, { "epoch": 0.03785531468227689, "grad_norm": 1.0546875, "learning_rate": 0.001803839141196723, "loss": 0.2527, "step": 21350 }, { "epoch": 0.0378588608475867, "grad_norm": 0.6953125, "learning_rate": 0.0018038020462159258, "loss": 0.185, "step": 21352 }, { "epoch": 0.03786240701289652, "grad_norm": 1.7890625, "learning_rate": 0.001803764948157107, "loss": 0.2937, "step": 21354 }, { "epoch": 0.03786595317820633, "grad_norm": 0.435546875, "learning_rate": 0.0018037278470204295, "loss": 0.2109, "step": 21356 }, { "epoch": 0.03786949934351615, "grad_norm": 0.56640625, "learning_rate": 0.0018036907428060549, "loss": 0.2817, "step": 21358 }, { "epoch": 0.03787304550882596, "grad_norm": 0.287109375, "learning_rate": 0.001803653635514146, "loss": 0.235, "step": 21360 }, { "epoch": 0.037876591674135776, "grad_norm": 0.5546875, "learning_rate": 0.001803616525144865, "loss": 0.3035, "step": 21362 }, { "epoch": 0.03788013783944559, "grad_norm": 8.25, "learning_rate": 0.0018035794116983736, "loss": 0.1584, "step": 21364 }, { "epoch": 0.037883684004755405, "grad_norm": 0.51953125, "learning_rate": 0.0018035422951748347, "loss": 0.1782, "step": 21366 }, { "epoch": 0.03788723017006522, "grad_norm": 0.85546875, "learning_rate": 0.0018035051755744102, "loss": 0.1833, "step": 21368 }, { "epoch": 0.03789077633537504, "grad_norm": 0.34375, "learning_rate": 0.001803468052897263, "loss": 0.1709, "step": 21370 }, { "epoch": 0.037894322500684856, "grad_norm": 0.54296875, "learning_rate": 0.0018034309271435553, "loss": 0.1852, "step": 21372 }, { "epoch": 0.03789786866599467, "grad_norm": 1.3828125, "learning_rate": 0.001803393798313449, "loss": 0.3274, "step": 21374 }, { "epoch": 0.037901414831304485, "grad_norm": 0.3515625, "learning_rate": 0.001803356666407107, "loss": 0.1854, "step": 21376 }, { "epoch": 0.0379049609966143, "grad_norm": 0.458984375, "learning_rate": 0.0018033195314246912, "loss": 0.1703, "step": 21378 }, { "epoch": 0.037908507161924114, "grad_norm": 1.3359375, "learning_rate": 0.0018032823933663645, "loss": 0.2617, "step": 21380 }, { "epoch": 0.03791205332723393, "grad_norm": 3.25, "learning_rate": 0.0018032452522322893, "loss": 0.2223, "step": 21382 }, { "epoch": 0.03791559949254374, "grad_norm": 0.46875, "learning_rate": 0.0018032081080226277, "loss": 0.1999, "step": 21384 }, { "epoch": 0.03791914565785356, "grad_norm": 0.78125, "learning_rate": 0.0018031709607375422, "loss": 0.2009, "step": 21386 }, { "epoch": 0.03792269182316337, "grad_norm": 2.859375, "learning_rate": 0.0018031338103771956, "loss": 0.2983, "step": 21388 }, { "epoch": 0.03792623798847319, "grad_norm": 0.67578125, "learning_rate": 0.00180309665694175, "loss": 0.2067, "step": 21390 }, { "epoch": 0.03792978415378301, "grad_norm": 0.34375, "learning_rate": 0.001803059500431368, "loss": 0.179, "step": 21392 }, { "epoch": 0.03793333031909282, "grad_norm": 2.0625, "learning_rate": 0.0018030223408462124, "loss": 0.1839, "step": 21394 }, { "epoch": 0.03793687648440264, "grad_norm": 0.42578125, "learning_rate": 0.001802985178186445, "loss": 0.1773, "step": 21396 }, { "epoch": 0.03794042264971245, "grad_norm": 5.09375, "learning_rate": 0.0018029480124522293, "loss": 0.2803, "step": 21398 }, { "epoch": 0.037943968815022266, "grad_norm": 1.8359375, "learning_rate": 0.001802910843643727, "loss": 0.2233, "step": 21400 }, { "epoch": 0.03794751498033208, "grad_norm": 7.40625, "learning_rate": 0.001802873671761101, "loss": 0.2092, "step": 21402 }, { "epoch": 0.037951061145641896, "grad_norm": 0.38671875, "learning_rate": 0.0018028364968045137, "loss": 0.1647, "step": 21404 }, { "epoch": 0.03795460731095171, "grad_norm": 3.375, "learning_rate": 0.0018027993187741283, "loss": 0.2416, "step": 21406 }, { "epoch": 0.037958153476261525, "grad_norm": 1.296875, "learning_rate": 0.0018027621376701063, "loss": 0.2574, "step": 21408 }, { "epoch": 0.03796169964157134, "grad_norm": 0.75390625, "learning_rate": 0.0018027249534926112, "loss": 0.2454, "step": 21410 }, { "epoch": 0.037965245806881154, "grad_norm": 1.03125, "learning_rate": 0.0018026877662418051, "loss": 0.2003, "step": 21412 }, { "epoch": 0.03796879197219097, "grad_norm": 0.63671875, "learning_rate": 0.0018026505759178512, "loss": 0.1924, "step": 21414 }, { "epoch": 0.03797233813750079, "grad_norm": 0.97265625, "learning_rate": 0.001802613382520912, "loss": 0.2377, "step": 21416 }, { "epoch": 0.037975884302810604, "grad_norm": 1.6015625, "learning_rate": 0.0018025761860511497, "loss": 0.1858, "step": 21418 }, { "epoch": 0.03797943046812042, "grad_norm": 0.70703125, "learning_rate": 0.0018025389865087274, "loss": 0.2268, "step": 21420 }, { "epoch": 0.037982976633430234, "grad_norm": 0.34375, "learning_rate": 0.0018025017838938073, "loss": 0.2002, "step": 21422 }, { "epoch": 0.03798652279874005, "grad_norm": 0.3515625, "learning_rate": 0.0018024645782065527, "loss": 0.225, "step": 21424 }, { "epoch": 0.03799006896404986, "grad_norm": 0.7421875, "learning_rate": 0.0018024273694471262, "loss": 0.2857, "step": 21426 }, { "epoch": 0.03799361512935968, "grad_norm": 1.75, "learning_rate": 0.0018023901576156903, "loss": 0.3385, "step": 21428 }, { "epoch": 0.03799716129466949, "grad_norm": 0.365234375, "learning_rate": 0.0018023529427124078, "loss": 0.2366, "step": 21430 }, { "epoch": 0.038000707459979306, "grad_norm": 1.0859375, "learning_rate": 0.0018023157247374416, "loss": 0.211, "step": 21432 }, { "epoch": 0.03800425362528912, "grad_norm": 0.76953125, "learning_rate": 0.0018022785036909544, "loss": 0.1803, "step": 21434 }, { "epoch": 0.038007799790598935, "grad_norm": 1.3515625, "learning_rate": 0.0018022412795731088, "loss": 0.3451, "step": 21436 }, { "epoch": 0.03801134595590876, "grad_norm": 0.73046875, "learning_rate": 0.001802204052384068, "loss": 0.2073, "step": 21438 }, { "epoch": 0.03801489212121857, "grad_norm": 0.6875, "learning_rate": 0.0018021668221239941, "loss": 0.1702, "step": 21440 }, { "epoch": 0.038018438286528386, "grad_norm": 0.8203125, "learning_rate": 0.001802129588793051, "loss": 0.2185, "step": 21442 }, { "epoch": 0.0380219844518382, "grad_norm": 0.56640625, "learning_rate": 0.0018020923523914005, "loss": 0.2276, "step": 21444 }, { "epoch": 0.038025530617148015, "grad_norm": 1.09375, "learning_rate": 0.001802055112919206, "loss": 0.343, "step": 21446 }, { "epoch": 0.03802907678245783, "grad_norm": 0.55078125, "learning_rate": 0.0018020178703766303, "loss": 0.2146, "step": 21448 }, { "epoch": 0.038032622947767644, "grad_norm": 0.6484375, "learning_rate": 0.001801980624763836, "loss": 0.2428, "step": 21450 }, { "epoch": 0.03803616911307746, "grad_norm": 0.66015625, "learning_rate": 0.0018019433760809863, "loss": 0.2192, "step": 21452 }, { "epoch": 0.03803971527838727, "grad_norm": 0.76171875, "learning_rate": 0.0018019061243282441, "loss": 0.2024, "step": 21454 }, { "epoch": 0.03804326144369709, "grad_norm": 0.63671875, "learning_rate": 0.0018018688695057724, "loss": 0.2785, "step": 21456 }, { "epoch": 0.0380468076090069, "grad_norm": 0.6796875, "learning_rate": 0.0018018316116137337, "loss": 0.2465, "step": 21458 }, { "epoch": 0.038050353774316724, "grad_norm": 0.89453125, "learning_rate": 0.001801794350652291, "loss": 0.207, "step": 21460 }, { "epoch": 0.03805389993962654, "grad_norm": 1.0234375, "learning_rate": 0.0018017570866216079, "loss": 0.3178, "step": 21462 }, { "epoch": 0.03805744610493635, "grad_norm": 0.361328125, "learning_rate": 0.0018017198195218465, "loss": 0.1847, "step": 21464 }, { "epoch": 0.03806099227024617, "grad_norm": 0.322265625, "learning_rate": 0.0018016825493531708, "loss": 0.1926, "step": 21466 }, { "epoch": 0.03806453843555598, "grad_norm": 0.65234375, "learning_rate": 0.0018016452761157426, "loss": 0.2002, "step": 21468 }, { "epoch": 0.0380680846008658, "grad_norm": 0.9453125, "learning_rate": 0.0018016079998097259, "loss": 0.2189, "step": 21470 }, { "epoch": 0.03807163076617561, "grad_norm": 2.09375, "learning_rate": 0.0018015707204352834, "loss": 0.4627, "step": 21472 }, { "epoch": 0.038075176931485426, "grad_norm": 0.33984375, "learning_rate": 0.0018015334379925779, "loss": 0.253, "step": 21474 }, { "epoch": 0.03807872309679524, "grad_norm": 1.203125, "learning_rate": 0.0018014961524817725, "loss": 0.2035, "step": 21476 }, { "epoch": 0.038082269262105055, "grad_norm": 0.45703125, "learning_rate": 0.0018014588639030306, "loss": 0.2383, "step": 21478 }, { "epoch": 0.03808581542741487, "grad_norm": 1.3671875, "learning_rate": 0.001801421572256515, "loss": 0.2245, "step": 21480 }, { "epoch": 0.038089361592724684, "grad_norm": 0.7890625, "learning_rate": 0.001801384277542389, "loss": 0.4605, "step": 21482 }, { "epoch": 0.038092907758034505, "grad_norm": 0.56640625, "learning_rate": 0.0018013469797608158, "loss": 0.2007, "step": 21484 }, { "epoch": 0.03809645392334432, "grad_norm": 0.55859375, "learning_rate": 0.0018013096789119578, "loss": 0.1807, "step": 21486 }, { "epoch": 0.038100000088654135, "grad_norm": 0.8359375, "learning_rate": 0.001801272374995979, "loss": 0.2476, "step": 21488 }, { "epoch": 0.03810354625396395, "grad_norm": 0.828125, "learning_rate": 0.001801235068013042, "loss": 0.2311, "step": 21490 }, { "epoch": 0.038107092419273764, "grad_norm": 0.326171875, "learning_rate": 0.00180119775796331, "loss": 0.1984, "step": 21492 }, { "epoch": 0.03811063858458358, "grad_norm": 0.44140625, "learning_rate": 0.0018011604448469466, "loss": 0.214, "step": 21494 }, { "epoch": 0.03811418474989339, "grad_norm": 0.55078125, "learning_rate": 0.0018011231286641147, "loss": 0.2387, "step": 21496 }, { "epoch": 0.03811773091520321, "grad_norm": 0.45703125, "learning_rate": 0.0018010858094149773, "loss": 0.1921, "step": 21498 }, { "epoch": 0.03812127708051302, "grad_norm": 0.50390625, "learning_rate": 0.0018010484870996978, "loss": 0.2413, "step": 21500 }, { "epoch": 0.038124823245822836, "grad_norm": 0.2099609375, "learning_rate": 0.0018010111617184398, "loss": 0.1738, "step": 21502 }, { "epoch": 0.03812836941113265, "grad_norm": 2.03125, "learning_rate": 0.0018009738332713657, "loss": 0.5018, "step": 21504 }, { "epoch": 0.03813191557644247, "grad_norm": 0.412109375, "learning_rate": 0.0018009365017586394, "loss": 0.1976, "step": 21506 }, { "epoch": 0.03813546174175229, "grad_norm": 5.71875, "learning_rate": 0.0018008991671804241, "loss": 0.3669, "step": 21508 }, { "epoch": 0.0381390079070621, "grad_norm": 0.369140625, "learning_rate": 0.0018008618295368827, "loss": 0.2173, "step": 21510 }, { "epoch": 0.038142554072371916, "grad_norm": 0.7578125, "learning_rate": 0.001800824488828179, "loss": 0.2089, "step": 21512 }, { "epoch": 0.03814610023768173, "grad_norm": 3.90625, "learning_rate": 0.0018007871450544761, "loss": 0.3266, "step": 21514 }, { "epoch": 0.038149646402991545, "grad_norm": 0.314453125, "learning_rate": 0.0018007497982159373, "loss": 0.1582, "step": 21516 }, { "epoch": 0.03815319256830136, "grad_norm": 0.28515625, "learning_rate": 0.001800712448312726, "loss": 0.1981, "step": 21518 }, { "epoch": 0.038156738733611174, "grad_norm": 0.37890625, "learning_rate": 0.0018006750953450052, "loss": 0.2257, "step": 21520 }, { "epoch": 0.03816028489892099, "grad_norm": 0.5, "learning_rate": 0.0018006377393129385, "loss": 0.2191, "step": 21522 }, { "epoch": 0.0381638310642308, "grad_norm": 0.35546875, "learning_rate": 0.0018006003802166898, "loss": 0.2321, "step": 21524 }, { "epoch": 0.03816737722954062, "grad_norm": 0.41015625, "learning_rate": 0.0018005630180564216, "loss": 0.191, "step": 21526 }, { "epoch": 0.03817092339485044, "grad_norm": 0.29296875, "learning_rate": 0.0018005256528322974, "loss": 0.1865, "step": 21528 }, { "epoch": 0.038174469560160254, "grad_norm": 1.09375, "learning_rate": 0.0018004882845444813, "loss": 0.201, "step": 21530 }, { "epoch": 0.03817801572547007, "grad_norm": 2.40625, "learning_rate": 0.0018004509131931364, "loss": 0.165, "step": 21532 }, { "epoch": 0.03818156189077988, "grad_norm": 0.318359375, "learning_rate": 0.0018004135387784262, "loss": 0.1965, "step": 21534 }, { "epoch": 0.0381851080560897, "grad_norm": 0.408203125, "learning_rate": 0.0018003761613005138, "loss": 0.1487, "step": 21536 }, { "epoch": 0.03818865422139951, "grad_norm": 0.62890625, "learning_rate": 0.0018003387807595628, "loss": 0.195, "step": 21538 }, { "epoch": 0.03819220038670933, "grad_norm": 1.53125, "learning_rate": 0.001800301397155737, "loss": 0.2531, "step": 21540 }, { "epoch": 0.03819574655201914, "grad_norm": 1.1953125, "learning_rate": 0.0018002640104891996, "loss": 0.2566, "step": 21542 }, { "epoch": 0.038199292717328956, "grad_norm": 0.287109375, "learning_rate": 0.001800226620760114, "loss": 0.1852, "step": 21544 }, { "epoch": 0.03820283888263877, "grad_norm": 2.953125, "learning_rate": 0.001800189227968644, "loss": 0.2772, "step": 21546 }, { "epoch": 0.038206385047948585, "grad_norm": 0.62109375, "learning_rate": 0.001800151832114953, "loss": 0.2051, "step": 21548 }, { "epoch": 0.0382099312132584, "grad_norm": 0.93359375, "learning_rate": 0.0018001144331992048, "loss": 0.1843, "step": 21550 }, { "epoch": 0.03821347737856822, "grad_norm": 0.369140625, "learning_rate": 0.0018000770312215626, "loss": 0.2179, "step": 21552 }, { "epoch": 0.038217023543878036, "grad_norm": 0.30859375, "learning_rate": 0.00180003962618219, "loss": 0.1822, "step": 21554 }, { "epoch": 0.03822056970918785, "grad_norm": 0.91015625, "learning_rate": 0.001800002218081251, "loss": 0.3259, "step": 21556 }, { "epoch": 0.038224115874497665, "grad_norm": 0.26953125, "learning_rate": 0.0017999648069189085, "loss": 0.1598, "step": 21558 }, { "epoch": 0.03822766203980748, "grad_norm": 0.92578125, "learning_rate": 0.001799927392695327, "loss": 0.2842, "step": 21560 }, { "epoch": 0.038231208205117294, "grad_norm": 0.89453125, "learning_rate": 0.0017998899754106692, "loss": 0.3447, "step": 21562 }, { "epoch": 0.03823475437042711, "grad_norm": 0.9921875, "learning_rate": 0.0017998525550650996, "loss": 0.2216, "step": 21564 }, { "epoch": 0.03823830053573692, "grad_norm": 1.4609375, "learning_rate": 0.001799815131658781, "loss": 0.2649, "step": 21566 }, { "epoch": 0.03824184670104674, "grad_norm": 0.68359375, "learning_rate": 0.0017997777051918778, "loss": 0.2075, "step": 21568 }, { "epoch": 0.03824539286635655, "grad_norm": 0.91015625, "learning_rate": 0.0017997402756645534, "loss": 0.2362, "step": 21570 }, { "epoch": 0.03824893903166637, "grad_norm": 0.5078125, "learning_rate": 0.0017997028430769715, "loss": 0.1895, "step": 21572 }, { "epoch": 0.03825248519697619, "grad_norm": 0.921875, "learning_rate": 0.0017996654074292958, "loss": 0.2503, "step": 21574 }, { "epoch": 0.038256031362286, "grad_norm": 0.275390625, "learning_rate": 0.00179962796872169, "loss": 0.2608, "step": 21576 }, { "epoch": 0.03825957752759582, "grad_norm": 0.98046875, "learning_rate": 0.0017995905269543179, "loss": 0.2141, "step": 21578 }, { "epoch": 0.03826312369290563, "grad_norm": 1.0703125, "learning_rate": 0.0017995530821273429, "loss": 0.2324, "step": 21580 }, { "epoch": 0.038266669858215446, "grad_norm": 0.400390625, "learning_rate": 0.0017995156342409293, "loss": 0.2247, "step": 21582 }, { "epoch": 0.03827021602352526, "grad_norm": 0.32421875, "learning_rate": 0.0017994781832952407, "loss": 0.1777, "step": 21584 }, { "epoch": 0.038273762188835075, "grad_norm": 1.8828125, "learning_rate": 0.0017994407292904408, "loss": 0.201, "step": 21586 }, { "epoch": 0.03827730835414489, "grad_norm": 0.89453125, "learning_rate": 0.0017994032722266932, "loss": 0.2083, "step": 21588 }, { "epoch": 0.038280854519454705, "grad_norm": 0.41796875, "learning_rate": 0.0017993658121041624, "loss": 0.2172, "step": 21590 }, { "epoch": 0.03828440068476452, "grad_norm": 0.5625, "learning_rate": 0.0017993283489230114, "loss": 0.2217, "step": 21592 }, { "epoch": 0.038287946850074334, "grad_norm": 0.498046875, "learning_rate": 0.0017992908826834047, "loss": 0.1841, "step": 21594 }, { "epoch": 0.038291493015384155, "grad_norm": 0.8359375, "learning_rate": 0.0017992534133855057, "loss": 0.466, "step": 21596 }, { "epoch": 0.03829503918069397, "grad_norm": 0.275390625, "learning_rate": 0.0017992159410294784, "loss": 0.2008, "step": 21598 }, { "epoch": 0.038298585346003784, "grad_norm": 0.37890625, "learning_rate": 0.0017991784656154867, "loss": 0.1767, "step": 21600 }, { "epoch": 0.0383021315113136, "grad_norm": 0.384765625, "learning_rate": 0.0017991409871436948, "loss": 0.1752, "step": 21602 }, { "epoch": 0.03830567767662341, "grad_norm": 0.318359375, "learning_rate": 0.0017991035056142662, "loss": 0.3381, "step": 21604 }, { "epoch": 0.03830922384193323, "grad_norm": 0.52734375, "learning_rate": 0.0017990660210273647, "loss": 0.195, "step": 21606 }, { "epoch": 0.03831277000724304, "grad_norm": 0.6015625, "learning_rate": 0.001799028533383155, "loss": 0.1741, "step": 21608 }, { "epoch": 0.03831631617255286, "grad_norm": 0.412109375, "learning_rate": 0.0017989910426818, "loss": 0.2444, "step": 21610 }, { "epoch": 0.03831986233786267, "grad_norm": 0.609375, "learning_rate": 0.0017989535489234645, "loss": 0.212, "step": 21612 }, { "epoch": 0.038323408503172486, "grad_norm": 2.90625, "learning_rate": 0.001798916052108312, "loss": 0.3668, "step": 21614 }, { "epoch": 0.0383269546684823, "grad_norm": 0.478515625, "learning_rate": 0.001798878552236507, "loss": 0.2052, "step": 21616 }, { "epoch": 0.038330500833792115, "grad_norm": 0.26171875, "learning_rate": 0.0017988410493082127, "loss": 0.1844, "step": 21618 }, { "epoch": 0.03833404699910194, "grad_norm": 1.453125, "learning_rate": 0.001798803543323594, "loss": 0.2678, "step": 21620 }, { "epoch": 0.03833759316441175, "grad_norm": 0.462890625, "learning_rate": 0.0017987660342828145, "loss": 0.1717, "step": 21622 }, { "epoch": 0.038341139329721566, "grad_norm": 1.125, "learning_rate": 0.0017987285221860382, "loss": 0.2296, "step": 21624 }, { "epoch": 0.03834468549503138, "grad_norm": 0.7109375, "learning_rate": 0.0017986910070334288, "loss": 0.4399, "step": 21626 }, { "epoch": 0.038348231660341195, "grad_norm": 0.40234375, "learning_rate": 0.001798653488825151, "loss": 0.1886, "step": 21628 }, { "epoch": 0.03835177782565101, "grad_norm": 0.80859375, "learning_rate": 0.001798615967561369, "loss": 0.1967, "step": 21630 }, { "epoch": 0.038355323990960824, "grad_norm": 0.7734375, "learning_rate": 0.0017985784432422462, "loss": 0.2455, "step": 21632 }, { "epoch": 0.03835887015627064, "grad_norm": 0.45703125, "learning_rate": 0.0017985409158679474, "loss": 0.1426, "step": 21634 }, { "epoch": 0.03836241632158045, "grad_norm": 1.078125, "learning_rate": 0.0017985033854386362, "loss": 0.2842, "step": 21636 }, { "epoch": 0.03836596248689027, "grad_norm": 0.40234375, "learning_rate": 0.0017984658519544768, "loss": 0.2208, "step": 21638 }, { "epoch": 0.03836950865220008, "grad_norm": 0.345703125, "learning_rate": 0.0017984283154156336, "loss": 0.2409, "step": 21640 }, { "epoch": 0.038373054817509904, "grad_norm": 0.90234375, "learning_rate": 0.0017983907758222705, "loss": 0.1885, "step": 21642 }, { "epoch": 0.03837660098281972, "grad_norm": 0.69140625, "learning_rate": 0.0017983532331745523, "loss": 0.2739, "step": 21644 }, { "epoch": 0.03838014714812953, "grad_norm": 0.486328125, "learning_rate": 0.0017983156874726424, "loss": 0.2155, "step": 21646 }, { "epoch": 0.03838369331343935, "grad_norm": 1.71875, "learning_rate": 0.0017982781387167052, "loss": 0.2438, "step": 21648 }, { "epoch": 0.03838723947874916, "grad_norm": 0.380859375, "learning_rate": 0.0017982405869069052, "loss": 0.1899, "step": 21650 }, { "epoch": 0.038390785644058976, "grad_norm": 0.384765625, "learning_rate": 0.0017982030320434065, "loss": 0.1642, "step": 21652 }, { "epoch": 0.03839433180936879, "grad_norm": 0.48828125, "learning_rate": 0.0017981654741263732, "loss": 0.3173, "step": 21654 }, { "epoch": 0.038397877974678606, "grad_norm": 0.65625, "learning_rate": 0.0017981279131559695, "loss": 0.1585, "step": 21656 }, { "epoch": 0.03840142413998842, "grad_norm": 0.50390625, "learning_rate": 0.0017980903491323602, "loss": 0.1421, "step": 21658 }, { "epoch": 0.038404970305298235, "grad_norm": 0.484375, "learning_rate": 0.0017980527820557087, "loss": 0.2471, "step": 21660 }, { "epoch": 0.03840851647060805, "grad_norm": 0.451171875, "learning_rate": 0.0017980152119261802, "loss": 0.2446, "step": 21662 }, { "epoch": 0.03841206263591787, "grad_norm": 0.34375, "learning_rate": 0.0017979776387439385, "loss": 0.2102, "step": 21664 }, { "epoch": 0.038415608801227685, "grad_norm": 1.0703125, "learning_rate": 0.001797940062509148, "loss": 0.2099, "step": 21666 }, { "epoch": 0.0384191549665375, "grad_norm": 1.15625, "learning_rate": 0.0017979024832219732, "loss": 0.1909, "step": 21668 }, { "epoch": 0.038422701131847314, "grad_norm": 0.3984375, "learning_rate": 0.0017978649008825783, "loss": 0.2471, "step": 21670 }, { "epoch": 0.03842624729715713, "grad_norm": 0.58203125, "learning_rate": 0.0017978273154911277, "loss": 0.2312, "step": 21672 }, { "epoch": 0.038429793462466943, "grad_norm": 0.451171875, "learning_rate": 0.0017977897270477856, "loss": 0.2769, "step": 21674 }, { "epoch": 0.03843333962777676, "grad_norm": 1.7578125, "learning_rate": 0.0017977521355527167, "loss": 0.2314, "step": 21676 }, { "epoch": 0.03843688579308657, "grad_norm": 1.3828125, "learning_rate": 0.0017977145410060854, "loss": 0.2219, "step": 21678 }, { "epoch": 0.03844043195839639, "grad_norm": 0.5078125, "learning_rate": 0.0017976769434080557, "loss": 0.1992, "step": 21680 }, { "epoch": 0.0384439781237062, "grad_norm": 1.1328125, "learning_rate": 0.0017976393427587927, "loss": 0.2051, "step": 21682 }, { "epoch": 0.038447524289016016, "grad_norm": 0.279296875, "learning_rate": 0.0017976017390584598, "loss": 0.2115, "step": 21684 }, { "epoch": 0.03845107045432583, "grad_norm": 0.314453125, "learning_rate": 0.0017975641323072227, "loss": 0.1678, "step": 21686 }, { "epoch": 0.03845461661963565, "grad_norm": 0.80078125, "learning_rate": 0.0017975265225052453, "loss": 0.2602, "step": 21688 }, { "epoch": 0.03845816278494547, "grad_norm": 1.1640625, "learning_rate": 0.0017974889096526916, "loss": 0.2192, "step": 21690 }, { "epoch": 0.03846170895025528, "grad_norm": 0.2734375, "learning_rate": 0.0017974512937497266, "loss": 0.1584, "step": 21692 }, { "epoch": 0.038465255115565096, "grad_norm": 0.34375, "learning_rate": 0.0017974136747965152, "loss": 0.1772, "step": 21694 }, { "epoch": 0.03846880128087491, "grad_norm": 0.294921875, "learning_rate": 0.0017973760527932212, "loss": 0.2411, "step": 21696 }, { "epoch": 0.038472347446184725, "grad_norm": 0.40234375, "learning_rate": 0.0017973384277400095, "loss": 0.1927, "step": 21698 }, { "epoch": 0.03847589361149454, "grad_norm": 0.37109375, "learning_rate": 0.0017973007996370445, "loss": 0.2077, "step": 21700 }, { "epoch": 0.038479439776804354, "grad_norm": 0.408203125, "learning_rate": 0.0017972631684844908, "loss": 0.1795, "step": 21702 }, { "epoch": 0.03848298594211417, "grad_norm": 0.2490234375, "learning_rate": 0.0017972255342825131, "loss": 0.2127, "step": 21704 }, { "epoch": 0.03848653210742398, "grad_norm": 0.482421875, "learning_rate": 0.001797187897031276, "loss": 0.223, "step": 21706 }, { "epoch": 0.0384900782727338, "grad_norm": 0.455078125, "learning_rate": 0.0017971502567309442, "loss": 0.1951, "step": 21708 }, { "epoch": 0.03849362443804362, "grad_norm": 0.8515625, "learning_rate": 0.0017971126133816818, "loss": 0.2039, "step": 21710 }, { "epoch": 0.038497170603353434, "grad_norm": 0.396484375, "learning_rate": 0.0017970749669836538, "loss": 0.5035, "step": 21712 }, { "epoch": 0.03850071676866325, "grad_norm": 0.5859375, "learning_rate": 0.0017970373175370247, "loss": 0.2439, "step": 21714 }, { "epoch": 0.03850426293397306, "grad_norm": 0.92578125, "learning_rate": 0.0017969996650419595, "loss": 0.2206, "step": 21716 }, { "epoch": 0.03850780909928288, "grad_norm": 0.2421875, "learning_rate": 0.0017969620094986228, "loss": 0.1699, "step": 21718 }, { "epoch": 0.03851135526459269, "grad_norm": 0.42578125, "learning_rate": 0.0017969243509071786, "loss": 0.183, "step": 21720 }, { "epoch": 0.03851490142990251, "grad_norm": 0.5234375, "learning_rate": 0.0017968866892677925, "loss": 0.2239, "step": 21722 }, { "epoch": 0.03851844759521232, "grad_norm": 0.52734375, "learning_rate": 0.0017968490245806287, "loss": 0.22, "step": 21724 }, { "epoch": 0.038521993760522136, "grad_norm": 0.8671875, "learning_rate": 0.0017968113568458519, "loss": 0.164, "step": 21726 }, { "epoch": 0.03852553992583195, "grad_norm": 2.4375, "learning_rate": 0.001796773686063627, "loss": 0.2165, "step": 21728 }, { "epoch": 0.038529086091141765, "grad_norm": 0.37109375, "learning_rate": 0.001796736012234119, "loss": 0.1875, "step": 21730 }, { "epoch": 0.03853263225645158, "grad_norm": 0.83203125, "learning_rate": 0.0017966983353574922, "loss": 0.2295, "step": 21732 }, { "epoch": 0.0385361784217614, "grad_norm": 0.52734375, "learning_rate": 0.0017966606554339116, "loss": 0.2098, "step": 21734 }, { "epoch": 0.038539724587071215, "grad_norm": 0.87890625, "learning_rate": 0.0017966229724635419, "loss": 0.2323, "step": 21736 }, { "epoch": 0.03854327075238103, "grad_norm": 0.298828125, "learning_rate": 0.001796585286446548, "loss": 0.1817, "step": 21738 }, { "epoch": 0.038546816917690845, "grad_norm": 2.8125, "learning_rate": 0.001796547597383095, "loss": 0.2743, "step": 21740 }, { "epoch": 0.03855036308300066, "grad_norm": 0.216796875, "learning_rate": 0.001796509905273347, "loss": 0.1524, "step": 21742 }, { "epoch": 0.038553909248310474, "grad_norm": 0.384765625, "learning_rate": 0.0017964722101174696, "loss": 0.2215, "step": 21744 }, { "epoch": 0.03855745541362029, "grad_norm": 0.82421875, "learning_rate": 0.0017964345119156268, "loss": 0.2316, "step": 21746 }, { "epoch": 0.0385610015789301, "grad_norm": 0.458984375, "learning_rate": 0.0017963968106679841, "loss": 0.1612, "step": 21748 }, { "epoch": 0.03856454774423992, "grad_norm": 0.703125, "learning_rate": 0.0017963591063747068, "loss": 0.2325, "step": 21750 }, { "epoch": 0.03856809390954973, "grad_norm": 0.412109375, "learning_rate": 0.0017963213990359588, "loss": 0.23, "step": 21752 }, { "epoch": 0.038571640074859546, "grad_norm": 0.267578125, "learning_rate": 0.0017962836886519055, "loss": 0.2387, "step": 21754 }, { "epoch": 0.03857518624016937, "grad_norm": 0.6875, "learning_rate": 0.0017962459752227117, "loss": 0.2164, "step": 21756 }, { "epoch": 0.03857873240547918, "grad_norm": 0.302734375, "learning_rate": 0.0017962082587485429, "loss": 0.2466, "step": 21758 }, { "epoch": 0.038582278570789, "grad_norm": 0.36328125, "learning_rate": 0.0017961705392295633, "loss": 0.2395, "step": 21760 }, { "epoch": 0.03858582473609881, "grad_norm": 0.51171875, "learning_rate": 0.0017961328166659384, "loss": 0.182, "step": 21762 }, { "epoch": 0.038589370901408626, "grad_norm": 0.55859375, "learning_rate": 0.0017960950910578324, "loss": 0.2665, "step": 21764 }, { "epoch": 0.03859291706671844, "grad_norm": 1.09375, "learning_rate": 0.001796057362405411, "loss": 0.2522, "step": 21766 }, { "epoch": 0.038596463232028255, "grad_norm": 1.0078125, "learning_rate": 0.001796019630708839, "loss": 0.2188, "step": 21768 }, { "epoch": 0.03860000939733807, "grad_norm": 0.37109375, "learning_rate": 0.0017959818959682817, "loss": 0.1675, "step": 21770 }, { "epoch": 0.038603555562647884, "grad_norm": 0.45703125, "learning_rate": 0.0017959441581839037, "loss": 0.1646, "step": 21772 }, { "epoch": 0.0386071017279577, "grad_norm": 1.1875, "learning_rate": 0.00179590641735587, "loss": 0.1603, "step": 21774 }, { "epoch": 0.03861064789326751, "grad_norm": 0.59765625, "learning_rate": 0.0017958686734843461, "loss": 0.1961, "step": 21776 }, { "epoch": 0.038614194058577335, "grad_norm": 0.64453125, "learning_rate": 0.001795830926569497, "loss": 0.2326, "step": 21778 }, { "epoch": 0.03861774022388715, "grad_norm": 0.453125, "learning_rate": 0.0017957931766114875, "loss": 0.1864, "step": 21780 }, { "epoch": 0.038621286389196964, "grad_norm": 1.3125, "learning_rate": 0.0017957554236104824, "loss": 0.3128, "step": 21782 }, { "epoch": 0.03862483255450678, "grad_norm": 0.62109375, "learning_rate": 0.0017957176675666474, "loss": 0.2496, "step": 21784 }, { "epoch": 0.03862837871981659, "grad_norm": 1.6328125, "learning_rate": 0.0017956799084801476, "loss": 0.2231, "step": 21786 }, { "epoch": 0.03863192488512641, "grad_norm": 0.9140625, "learning_rate": 0.001795642146351148, "loss": 0.2446, "step": 21788 }, { "epoch": 0.03863547105043622, "grad_norm": 0.546875, "learning_rate": 0.0017956043811798137, "loss": 0.1515, "step": 21790 }, { "epoch": 0.03863901721574604, "grad_norm": 0.64453125, "learning_rate": 0.0017955666129663098, "loss": 0.291, "step": 21792 }, { "epoch": 0.03864256338105585, "grad_norm": 1.1640625, "learning_rate": 0.0017955288417108018, "loss": 0.1853, "step": 21794 }, { "epoch": 0.038646109546365666, "grad_norm": 0.46875, "learning_rate": 0.0017954910674134543, "loss": 0.2279, "step": 21796 }, { "epoch": 0.03864965571167548, "grad_norm": 0.36328125, "learning_rate": 0.001795453290074433, "loss": 0.1541, "step": 21798 }, { "epoch": 0.038653201876985295, "grad_norm": 2.921875, "learning_rate": 0.001795415509693903, "loss": 0.2079, "step": 21800 }, { "epoch": 0.038656748042295117, "grad_norm": 1.5, "learning_rate": 0.0017953777262720296, "loss": 0.2954, "step": 21802 }, { "epoch": 0.03866029420760493, "grad_norm": 2.640625, "learning_rate": 0.001795339939808978, "loss": 0.4074, "step": 21804 }, { "epoch": 0.038663840372914746, "grad_norm": 0.38671875, "learning_rate": 0.0017953021503049132, "loss": 0.1383, "step": 21806 }, { "epoch": 0.03866738653822456, "grad_norm": 7.09375, "learning_rate": 0.001795264357760001, "loss": 0.2181, "step": 21808 }, { "epoch": 0.038670932703534375, "grad_norm": 0.306640625, "learning_rate": 0.001795226562174406, "loss": 0.1974, "step": 21810 }, { "epoch": 0.03867447886884419, "grad_norm": 0.296875, "learning_rate": 0.0017951887635482937, "loss": 0.2331, "step": 21812 }, { "epoch": 0.038678025034154004, "grad_norm": 1.8359375, "learning_rate": 0.00179515096188183, "loss": 0.3978, "step": 21814 }, { "epoch": 0.03868157119946382, "grad_norm": 0.87890625, "learning_rate": 0.0017951131571751794, "loss": 0.2498, "step": 21816 }, { "epoch": 0.03868511736477363, "grad_norm": 1.6875, "learning_rate": 0.0017950753494285082, "loss": 0.3201, "step": 21818 }, { "epoch": 0.03868866353008345, "grad_norm": 0.62890625, "learning_rate": 0.0017950375386419806, "loss": 0.1894, "step": 21820 }, { "epoch": 0.03869220969539326, "grad_norm": 0.90234375, "learning_rate": 0.0017949997248157628, "loss": 0.1885, "step": 21822 }, { "epoch": 0.038695755860703084, "grad_norm": 0.37109375, "learning_rate": 0.0017949619079500197, "loss": 0.1972, "step": 21824 }, { "epoch": 0.0386993020260129, "grad_norm": 0.53125, "learning_rate": 0.001794924088044917, "loss": 0.163, "step": 21826 }, { "epoch": 0.03870284819132271, "grad_norm": 0.6328125, "learning_rate": 0.0017948862651006204, "loss": 0.1982, "step": 21828 }, { "epoch": 0.03870639435663253, "grad_norm": 0.474609375, "learning_rate": 0.0017948484391172943, "loss": 0.2272, "step": 21830 }, { "epoch": 0.03870994052194234, "grad_norm": 0.333984375, "learning_rate": 0.001794810610095105, "loss": 0.3554, "step": 21832 }, { "epoch": 0.038713486687252156, "grad_norm": 0.6796875, "learning_rate": 0.0017947727780342178, "loss": 0.2373, "step": 21834 }, { "epoch": 0.03871703285256197, "grad_norm": 0.478515625, "learning_rate": 0.0017947349429347978, "loss": 0.1816, "step": 21836 }, { "epoch": 0.038720579017871785, "grad_norm": 0.1875, "learning_rate": 0.0017946971047970112, "loss": 0.1593, "step": 21838 }, { "epoch": 0.0387241251831816, "grad_norm": 0.69921875, "learning_rate": 0.0017946592636210225, "loss": 0.212, "step": 21840 }, { "epoch": 0.038727671348491415, "grad_norm": 0.94921875, "learning_rate": 0.0017946214194069976, "loss": 0.3428, "step": 21842 }, { "epoch": 0.03873121751380123, "grad_norm": 0.40625, "learning_rate": 0.0017945835721551024, "loss": 0.1411, "step": 21844 }, { "epoch": 0.03873476367911105, "grad_norm": 0.2470703125, "learning_rate": 0.0017945457218655019, "loss": 0.2246, "step": 21846 }, { "epoch": 0.038738309844420865, "grad_norm": 1.796875, "learning_rate": 0.001794507868538362, "loss": 0.5908, "step": 21848 }, { "epoch": 0.03874185600973068, "grad_norm": 0.40625, "learning_rate": 0.001794470012173848, "loss": 0.2306, "step": 21850 }, { "epoch": 0.038745402175040494, "grad_norm": 0.62109375, "learning_rate": 0.0017944321527721258, "loss": 0.2245, "step": 21852 }, { "epoch": 0.03874894834035031, "grad_norm": 0.46875, "learning_rate": 0.0017943942903333606, "loss": 0.1925, "step": 21854 }, { "epoch": 0.03875249450566012, "grad_norm": 0.228515625, "learning_rate": 0.001794356424857718, "loss": 0.2012, "step": 21856 }, { "epoch": 0.03875604067096994, "grad_norm": 0.52734375, "learning_rate": 0.001794318556345364, "loss": 0.1919, "step": 21858 }, { "epoch": 0.03875958683627975, "grad_norm": 0.58203125, "learning_rate": 0.0017942806847964638, "loss": 0.2967, "step": 21860 }, { "epoch": 0.03876313300158957, "grad_norm": 0.4609375, "learning_rate": 0.001794242810211183, "loss": 0.2165, "step": 21862 }, { "epoch": 0.03876667916689938, "grad_norm": 0.30078125, "learning_rate": 0.0017942049325896877, "loss": 0.2089, "step": 21864 }, { "epoch": 0.038770225332209196, "grad_norm": 0.255859375, "learning_rate": 0.0017941670519321432, "loss": 0.3428, "step": 21866 }, { "epoch": 0.03877377149751901, "grad_norm": 0.69921875, "learning_rate": 0.0017941291682387148, "loss": 0.2266, "step": 21868 }, { "epoch": 0.03877731766282883, "grad_norm": 0.53515625, "learning_rate": 0.0017940912815095694, "loss": 0.2177, "step": 21870 }, { "epoch": 0.03878086382813865, "grad_norm": 0.5703125, "learning_rate": 0.0017940533917448712, "loss": 0.2007, "step": 21872 }, { "epoch": 0.03878440999344846, "grad_norm": 0.67578125, "learning_rate": 0.001794015498944787, "loss": 0.1605, "step": 21874 }, { "epoch": 0.038787956158758276, "grad_norm": 1.0, "learning_rate": 0.001793977603109482, "loss": 0.2885, "step": 21876 }, { "epoch": 0.03879150232406809, "grad_norm": 1.390625, "learning_rate": 0.0017939397042391221, "loss": 0.278, "step": 21878 }, { "epoch": 0.038795048489377905, "grad_norm": 1.796875, "learning_rate": 0.0017939018023338728, "loss": 0.3323, "step": 21880 }, { "epoch": 0.03879859465468772, "grad_norm": 3.71875, "learning_rate": 0.0017938638973939007, "loss": 0.2582, "step": 21882 }, { "epoch": 0.038802140819997534, "grad_norm": 0.6171875, "learning_rate": 0.0017938259894193702, "loss": 0.3284, "step": 21884 }, { "epoch": 0.03880568698530735, "grad_norm": 0.61328125, "learning_rate": 0.0017937880784104486, "loss": 0.1652, "step": 21886 }, { "epoch": 0.03880923315061716, "grad_norm": 1.03125, "learning_rate": 0.0017937501643673004, "loss": 0.2368, "step": 21888 }, { "epoch": 0.03881277931592698, "grad_norm": 0.51171875, "learning_rate": 0.001793712247290092, "loss": 0.2055, "step": 21890 }, { "epoch": 0.0388163254812368, "grad_norm": 0.4375, "learning_rate": 0.0017936743271789895, "loss": 0.2097, "step": 21892 }, { "epoch": 0.038819871646546614, "grad_norm": 0.27734375, "learning_rate": 0.0017936364040341583, "loss": 0.2082, "step": 21894 }, { "epoch": 0.03882341781185643, "grad_norm": 0.53515625, "learning_rate": 0.001793598477855764, "loss": 0.3441, "step": 21896 }, { "epoch": 0.03882696397716624, "grad_norm": 2.625, "learning_rate": 0.0017935605486439734, "loss": 0.3034, "step": 21898 }, { "epoch": 0.03883051014247606, "grad_norm": 2.140625, "learning_rate": 0.0017935226163989515, "loss": 0.2335, "step": 21900 }, { "epoch": 0.03883405630778587, "grad_norm": 0.62109375, "learning_rate": 0.0017934846811208647, "loss": 0.1856, "step": 21902 }, { "epoch": 0.038837602473095686, "grad_norm": 0.6328125, "learning_rate": 0.0017934467428098787, "loss": 0.2339, "step": 21904 }, { "epoch": 0.0388411486384055, "grad_norm": 0.3515625, "learning_rate": 0.0017934088014661593, "loss": 0.2676, "step": 21906 }, { "epoch": 0.038844694803715316, "grad_norm": 0.89453125, "learning_rate": 0.001793370857089873, "loss": 0.2663, "step": 21908 }, { "epoch": 0.03884824096902513, "grad_norm": 0.259765625, "learning_rate": 0.001793332909681185, "loss": 0.1632, "step": 21910 }, { "epoch": 0.038851787134334945, "grad_norm": 4.15625, "learning_rate": 0.0017932949592402614, "loss": 0.3467, "step": 21912 }, { "epoch": 0.038855333299644766, "grad_norm": 0.74609375, "learning_rate": 0.0017932570057672683, "loss": 0.2038, "step": 21914 }, { "epoch": 0.03885887946495458, "grad_norm": 1.03125, "learning_rate": 0.0017932190492623722, "loss": 0.2834, "step": 21916 }, { "epoch": 0.038862425630264395, "grad_norm": 0.91796875, "learning_rate": 0.0017931810897257386, "loss": 0.287, "step": 21918 }, { "epoch": 0.03886597179557421, "grad_norm": 0.2236328125, "learning_rate": 0.0017931431271575333, "loss": 0.2205, "step": 21920 }, { "epoch": 0.038869517960884024, "grad_norm": 0.78125, "learning_rate": 0.0017931051615579226, "loss": 0.2059, "step": 21922 }, { "epoch": 0.03887306412619384, "grad_norm": 0.421875, "learning_rate": 0.0017930671929270727, "loss": 0.1422, "step": 21924 }, { "epoch": 0.038876610291503653, "grad_norm": 0.62890625, "learning_rate": 0.0017930292212651492, "loss": 0.1882, "step": 21926 }, { "epoch": 0.03888015645681347, "grad_norm": 0.259765625, "learning_rate": 0.001792991246572319, "loss": 0.1638, "step": 21928 }, { "epoch": 0.03888370262212328, "grad_norm": 0.66796875, "learning_rate": 0.001792953268848747, "loss": 0.2229, "step": 21930 }, { "epoch": 0.0388872487874331, "grad_norm": 0.447265625, "learning_rate": 0.0017929152880946004, "loss": 0.205, "step": 21932 }, { "epoch": 0.03889079495274291, "grad_norm": 0.29296875, "learning_rate": 0.0017928773043100446, "loss": 0.2087, "step": 21934 }, { "epoch": 0.038894341118052726, "grad_norm": 1.796875, "learning_rate": 0.001792839317495246, "loss": 0.2559, "step": 21936 }, { "epoch": 0.03889788728336255, "grad_norm": 0.4296875, "learning_rate": 0.0017928013276503705, "loss": 0.1928, "step": 21938 }, { "epoch": 0.03890143344867236, "grad_norm": 0.33203125, "learning_rate": 0.0017927633347755846, "loss": 0.2183, "step": 21940 }, { "epoch": 0.03890497961398218, "grad_norm": 1.1484375, "learning_rate": 0.0017927253388710543, "loss": 0.2914, "step": 21942 }, { "epoch": 0.03890852577929199, "grad_norm": 6.03125, "learning_rate": 0.0017926873399369458, "loss": 0.4897, "step": 21944 }, { "epoch": 0.038912071944601806, "grad_norm": 0.5625, "learning_rate": 0.001792649337973425, "loss": 0.2279, "step": 21946 }, { "epoch": 0.03891561810991162, "grad_norm": 0.6953125, "learning_rate": 0.0017926113329806586, "loss": 0.1931, "step": 21948 }, { "epoch": 0.038919164275221435, "grad_norm": 0.291015625, "learning_rate": 0.0017925733249588122, "loss": 0.2048, "step": 21950 }, { "epoch": 0.03892271044053125, "grad_norm": 0.23828125, "learning_rate": 0.0017925353139080524, "loss": 0.2217, "step": 21952 }, { "epoch": 0.038926256605841064, "grad_norm": 0.53125, "learning_rate": 0.001792497299828546, "loss": 0.2628, "step": 21954 }, { "epoch": 0.03892980277115088, "grad_norm": 3.640625, "learning_rate": 0.001792459282720458, "loss": 0.4594, "step": 21956 }, { "epoch": 0.03893334893646069, "grad_norm": 1.171875, "learning_rate": 0.0017924212625839557, "loss": 0.207, "step": 21958 }, { "epoch": 0.038936895101770515, "grad_norm": 1.2578125, "learning_rate": 0.0017923832394192048, "loss": 0.4309, "step": 21960 }, { "epoch": 0.03894044126708033, "grad_norm": 4.5625, "learning_rate": 0.001792345213226372, "loss": 0.3272, "step": 21962 }, { "epoch": 0.038943987432390144, "grad_norm": 0.322265625, "learning_rate": 0.0017923071840056232, "loss": 0.1853, "step": 21964 }, { "epoch": 0.03894753359769996, "grad_norm": 0.5390625, "learning_rate": 0.001792269151757125, "loss": 0.2015, "step": 21966 }, { "epoch": 0.03895107976300977, "grad_norm": 0.408203125, "learning_rate": 0.0017922311164810434, "loss": 0.1751, "step": 21968 }, { "epoch": 0.03895462592831959, "grad_norm": 0.7734375, "learning_rate": 0.0017921930781775453, "loss": 0.1886, "step": 21970 }, { "epoch": 0.0389581720936294, "grad_norm": 0.8125, "learning_rate": 0.0017921550368467968, "loss": 0.2208, "step": 21972 }, { "epoch": 0.03896171825893922, "grad_norm": 0.546875, "learning_rate": 0.001792116992488964, "loss": 0.1982, "step": 21974 }, { "epoch": 0.03896526442424903, "grad_norm": 0.890625, "learning_rate": 0.0017920789451042138, "loss": 0.1581, "step": 21976 }, { "epoch": 0.038968810589558846, "grad_norm": 0.62109375, "learning_rate": 0.001792040894692712, "loss": 0.2182, "step": 21978 }, { "epoch": 0.03897235675486866, "grad_norm": 0.6796875, "learning_rate": 0.0017920028412546254, "loss": 0.1943, "step": 21980 }, { "epoch": 0.03897590292017848, "grad_norm": 0.40234375, "learning_rate": 0.00179196478479012, "loss": 0.2012, "step": 21982 }, { "epoch": 0.038979449085488296, "grad_norm": 0.77734375, "learning_rate": 0.0017919267252993632, "loss": 0.1723, "step": 21984 }, { "epoch": 0.03898299525079811, "grad_norm": 0.2294921875, "learning_rate": 0.0017918886627825204, "loss": 0.3973, "step": 21986 }, { "epoch": 0.038986541416107925, "grad_norm": 0.59765625, "learning_rate": 0.0017918505972397587, "loss": 0.1851, "step": 21988 }, { "epoch": 0.03899008758141774, "grad_norm": 0.60546875, "learning_rate": 0.0017918125286712444, "loss": 0.1303, "step": 21990 }, { "epoch": 0.038993633746727555, "grad_norm": 0.484375, "learning_rate": 0.0017917744570771438, "loss": 0.1856, "step": 21992 }, { "epoch": 0.03899717991203737, "grad_norm": 3.3125, "learning_rate": 0.0017917363824576239, "loss": 0.5767, "step": 21994 }, { "epoch": 0.039000726077347184, "grad_norm": 0.99609375, "learning_rate": 0.0017916983048128503, "loss": 0.2643, "step": 21996 }, { "epoch": 0.039004272242657, "grad_norm": 0.365234375, "learning_rate": 0.0017916602241429902, "loss": 0.1855, "step": 21998 }, { "epoch": 0.03900781840796681, "grad_norm": 0.50390625, "learning_rate": 0.0017916221404482104, "loss": 0.2681, "step": 22000 }, { "epoch": 0.03901136457327663, "grad_norm": 0.35546875, "learning_rate": 0.001791584053728677, "loss": 0.2332, "step": 22002 }, { "epoch": 0.03901491073858644, "grad_norm": 1.34375, "learning_rate": 0.0017915459639845563, "loss": 0.1947, "step": 22004 }, { "epoch": 0.03901845690389626, "grad_norm": 0.427734375, "learning_rate": 0.0017915078712160155, "loss": 0.2327, "step": 22006 }, { "epoch": 0.03902200306920608, "grad_norm": 1.2109375, "learning_rate": 0.0017914697754232213, "loss": 0.2249, "step": 22008 }, { "epoch": 0.03902554923451589, "grad_norm": 0.37109375, "learning_rate": 0.0017914316766063393, "loss": 0.1922, "step": 22010 }, { "epoch": 0.03902909539982571, "grad_norm": 0.53515625, "learning_rate": 0.0017913935747655373, "loss": 0.198, "step": 22012 }, { "epoch": 0.03903264156513552, "grad_norm": 0.81640625, "learning_rate": 0.0017913554699009813, "loss": 0.202, "step": 22014 }, { "epoch": 0.039036187730445336, "grad_norm": 0.90234375, "learning_rate": 0.001791317362012838, "loss": 0.1873, "step": 22016 }, { "epoch": 0.03903973389575515, "grad_norm": 0.3125, "learning_rate": 0.0017912792511012741, "loss": 0.1882, "step": 22018 }, { "epoch": 0.039043280061064965, "grad_norm": 0.3046875, "learning_rate": 0.0017912411371664564, "loss": 0.1989, "step": 22020 }, { "epoch": 0.03904682622637478, "grad_norm": 1.15625, "learning_rate": 0.001791203020208551, "loss": 0.4068, "step": 22022 }, { "epoch": 0.039050372391684594, "grad_norm": 2.203125, "learning_rate": 0.0017911649002277257, "loss": 0.2333, "step": 22024 }, { "epoch": 0.03905391855699441, "grad_norm": 1.0859375, "learning_rate": 0.0017911267772241465, "loss": 0.2052, "step": 22026 }, { "epoch": 0.03905746472230423, "grad_norm": 0.361328125, "learning_rate": 0.00179108865119798, "loss": 0.2758, "step": 22028 }, { "epoch": 0.039061010887614045, "grad_norm": 0.466796875, "learning_rate": 0.0017910505221493934, "loss": 0.3595, "step": 22030 }, { "epoch": 0.03906455705292386, "grad_norm": 1.4765625, "learning_rate": 0.001791012390078553, "loss": 0.1364, "step": 22032 }, { "epoch": 0.039068103218233674, "grad_norm": 0.54296875, "learning_rate": 0.001790974254985626, "loss": 0.2297, "step": 22034 }, { "epoch": 0.03907164938354349, "grad_norm": 0.35546875, "learning_rate": 0.0017909361168707788, "loss": 0.2233, "step": 22036 }, { "epoch": 0.0390751955488533, "grad_norm": 1.03125, "learning_rate": 0.0017908979757341786, "loss": 0.2059, "step": 22038 }, { "epoch": 0.03907874171416312, "grad_norm": 7.03125, "learning_rate": 0.0017908598315759916, "loss": 0.3606, "step": 22040 }, { "epoch": 0.03908228787947293, "grad_norm": 0.3828125, "learning_rate": 0.0017908216843963854, "loss": 0.2313, "step": 22042 }, { "epoch": 0.03908583404478275, "grad_norm": 0.296875, "learning_rate": 0.0017907835341955261, "loss": 0.2071, "step": 22044 }, { "epoch": 0.03908938021009256, "grad_norm": 0.51171875, "learning_rate": 0.0017907453809735813, "loss": 0.2179, "step": 22046 }, { "epoch": 0.039092926375402376, "grad_norm": 0.263671875, "learning_rate": 0.001790707224730717, "loss": 0.2232, "step": 22048 }, { "epoch": 0.0390964725407122, "grad_norm": 1.8046875, "learning_rate": 0.0017906690654671006, "loss": 0.3491, "step": 22050 }, { "epoch": 0.03910001870602201, "grad_norm": 0.271484375, "learning_rate": 0.001790630903182899, "loss": 0.1273, "step": 22052 }, { "epoch": 0.039103564871331827, "grad_norm": 1.15625, "learning_rate": 0.001790592737878279, "loss": 0.2397, "step": 22054 }, { "epoch": 0.03910711103664164, "grad_norm": 0.388671875, "learning_rate": 0.0017905545695534074, "loss": 0.1902, "step": 22056 }, { "epoch": 0.039110657201951456, "grad_norm": 2.046875, "learning_rate": 0.0017905163982084513, "loss": 0.3476, "step": 22058 }, { "epoch": 0.03911420336726127, "grad_norm": 0.64453125, "learning_rate": 0.0017904782238435774, "loss": 0.2215, "step": 22060 }, { "epoch": 0.039117749532571085, "grad_norm": 0.3203125, "learning_rate": 0.0017904400464589531, "loss": 0.2047, "step": 22062 }, { "epoch": 0.0391212956978809, "grad_norm": 0.64453125, "learning_rate": 0.0017904018660547451, "loss": 0.1973, "step": 22064 }, { "epoch": 0.039124841863190714, "grad_norm": 1.2578125, "learning_rate": 0.0017903636826311206, "loss": 0.2044, "step": 22066 }, { "epoch": 0.03912838802850053, "grad_norm": 0.55078125, "learning_rate": 0.0017903254961882458, "loss": 0.1693, "step": 22068 }, { "epoch": 0.03913193419381034, "grad_norm": 0.287109375, "learning_rate": 0.0017902873067262887, "loss": 0.2062, "step": 22070 }, { "epoch": 0.03913548035912016, "grad_norm": 0.345703125, "learning_rate": 0.0017902491142454157, "loss": 0.1765, "step": 22072 }, { "epoch": 0.03913902652442998, "grad_norm": 0.25390625, "learning_rate": 0.0017902109187457938, "loss": 0.2058, "step": 22074 }, { "epoch": 0.039142572689739794, "grad_norm": 0.39453125, "learning_rate": 0.0017901727202275903, "loss": 0.1492, "step": 22076 }, { "epoch": 0.03914611885504961, "grad_norm": 0.66796875, "learning_rate": 0.0017901345186909726, "loss": 0.2426, "step": 22078 }, { "epoch": 0.03914966502035942, "grad_norm": 0.2119140625, "learning_rate": 0.001790096314136107, "loss": 0.1855, "step": 22080 }, { "epoch": 0.03915321118566924, "grad_norm": 2.578125, "learning_rate": 0.0017900581065631615, "loss": 0.3733, "step": 22082 }, { "epoch": 0.03915675735097905, "grad_norm": 3.484375, "learning_rate": 0.0017900198959723019, "loss": 0.3413, "step": 22084 }, { "epoch": 0.039160303516288866, "grad_norm": 0.470703125, "learning_rate": 0.0017899816823636967, "loss": 0.1786, "step": 22086 }, { "epoch": 0.03916384968159868, "grad_norm": 0.28515625, "learning_rate": 0.0017899434657375124, "loss": 0.1873, "step": 22088 }, { "epoch": 0.039167395846908495, "grad_norm": 1.7890625, "learning_rate": 0.0017899052460939157, "loss": 0.203, "step": 22090 }, { "epoch": 0.03917094201221831, "grad_norm": 0.44140625, "learning_rate": 0.0017898670234330746, "loss": 0.2406, "step": 22092 }, { "epoch": 0.039174488177528125, "grad_norm": 0.88671875, "learning_rate": 0.0017898287977551558, "loss": 0.2262, "step": 22094 }, { "epoch": 0.039178034342837946, "grad_norm": 0.79296875, "learning_rate": 0.0017897905690603265, "loss": 0.1909, "step": 22096 }, { "epoch": 0.03918158050814776, "grad_norm": 0.58203125, "learning_rate": 0.001789752337348754, "loss": 0.1912, "step": 22098 }, { "epoch": 0.039185126673457575, "grad_norm": 0.478515625, "learning_rate": 0.0017897141026206053, "loss": 0.1712, "step": 22100 }, { "epoch": 0.03918867283876739, "grad_norm": 0.349609375, "learning_rate": 0.0017896758648760479, "loss": 0.2418, "step": 22102 }, { "epoch": 0.039192219004077204, "grad_norm": 0.76171875, "learning_rate": 0.0017896376241152488, "loss": 0.1972, "step": 22104 }, { "epoch": 0.03919576516938702, "grad_norm": 0.87890625, "learning_rate": 0.0017895993803383752, "loss": 0.2691, "step": 22106 }, { "epoch": 0.03919931133469683, "grad_norm": 1.34375, "learning_rate": 0.0017895611335455946, "loss": 0.1351, "step": 22108 }, { "epoch": 0.03920285750000665, "grad_norm": 0.443359375, "learning_rate": 0.001789522883737074, "loss": 0.1612, "step": 22110 }, { "epoch": 0.03920640366531646, "grad_norm": 0.458984375, "learning_rate": 0.0017894846309129808, "loss": 0.1786, "step": 22112 }, { "epoch": 0.03920994983062628, "grad_norm": 0.38671875, "learning_rate": 0.0017894463750734826, "loss": 0.1613, "step": 22114 }, { "epoch": 0.03921349599593609, "grad_norm": 0.69921875, "learning_rate": 0.0017894081162187464, "loss": 0.189, "step": 22116 }, { "epoch": 0.03921704216124591, "grad_norm": 0.412109375, "learning_rate": 0.0017893698543489393, "loss": 0.2212, "step": 22118 }, { "epoch": 0.03922058832655573, "grad_norm": 1.9453125, "learning_rate": 0.0017893315894642293, "loss": 0.2416, "step": 22120 }, { "epoch": 0.03922413449186554, "grad_norm": 0.470703125, "learning_rate": 0.0017892933215647828, "loss": 0.2088, "step": 22122 }, { "epoch": 0.03922768065717536, "grad_norm": 0.416015625, "learning_rate": 0.0017892550506507683, "loss": 0.1838, "step": 22124 }, { "epoch": 0.03923122682248517, "grad_norm": 0.255859375, "learning_rate": 0.0017892167767223522, "loss": 0.1605, "step": 22126 }, { "epoch": 0.039234772987794986, "grad_norm": 0.416015625, "learning_rate": 0.0017891784997797022, "loss": 0.3285, "step": 22128 }, { "epoch": 0.0392383191531048, "grad_norm": 1.0859375, "learning_rate": 0.001789140219822986, "loss": 0.2135, "step": 22130 }, { "epoch": 0.039241865318414615, "grad_norm": 0.44140625, "learning_rate": 0.0017891019368523706, "loss": 0.2212, "step": 22132 }, { "epoch": 0.03924541148372443, "grad_norm": 0.59375, "learning_rate": 0.0017890636508680238, "loss": 0.2315, "step": 22134 }, { "epoch": 0.039248957649034244, "grad_norm": 0.38671875, "learning_rate": 0.0017890253618701127, "loss": 0.1954, "step": 22136 }, { "epoch": 0.03925250381434406, "grad_norm": 1.6875, "learning_rate": 0.0017889870698588049, "loss": 0.2498, "step": 22138 }, { "epoch": 0.03925604997965387, "grad_norm": 0.89453125, "learning_rate": 0.001788948774834268, "loss": 0.1916, "step": 22140 }, { "epoch": 0.039259596144963695, "grad_norm": 0.29296875, "learning_rate": 0.001788910476796669, "loss": 0.3171, "step": 22142 }, { "epoch": 0.03926314231027351, "grad_norm": 4.5, "learning_rate": 0.001788872175746176, "loss": 0.2606, "step": 22144 }, { "epoch": 0.039266688475583324, "grad_norm": 0.81640625, "learning_rate": 0.0017888338716829564, "loss": 0.3094, "step": 22146 }, { "epoch": 0.03927023464089314, "grad_norm": 0.408203125, "learning_rate": 0.0017887955646071771, "loss": 0.2256, "step": 22148 }, { "epoch": 0.03927378080620295, "grad_norm": 0.27734375, "learning_rate": 0.0017887572545190065, "loss": 0.2259, "step": 22150 }, { "epoch": 0.03927732697151277, "grad_norm": 1.5703125, "learning_rate": 0.0017887189414186118, "loss": 0.1866, "step": 22152 }, { "epoch": 0.03928087313682258, "grad_norm": 1.265625, "learning_rate": 0.0017886806253061603, "loss": 0.2834, "step": 22154 }, { "epoch": 0.039284419302132396, "grad_norm": 0.353515625, "learning_rate": 0.0017886423061818197, "loss": 0.1738, "step": 22156 }, { "epoch": 0.03928796546744221, "grad_norm": 0.41015625, "learning_rate": 0.001788603984045758, "loss": 0.1712, "step": 22158 }, { "epoch": 0.039291511632752026, "grad_norm": 1.0859375, "learning_rate": 0.0017885656588981422, "loss": 0.1711, "step": 22160 }, { "epoch": 0.03929505779806184, "grad_norm": 1.0859375, "learning_rate": 0.0017885273307391401, "loss": 0.1854, "step": 22162 }, { "epoch": 0.03929860396337166, "grad_norm": 1.3828125, "learning_rate": 0.0017884889995689198, "loss": 0.2072, "step": 22164 }, { "epoch": 0.039302150128681476, "grad_norm": 0.6953125, "learning_rate": 0.001788450665387648, "loss": 0.1947, "step": 22166 }, { "epoch": 0.03930569629399129, "grad_norm": 0.75390625, "learning_rate": 0.0017884123281954932, "loss": 0.2326, "step": 22168 }, { "epoch": 0.039309242459301105, "grad_norm": 0.69140625, "learning_rate": 0.0017883739879926228, "loss": 0.151, "step": 22170 }, { "epoch": 0.03931278862461092, "grad_norm": 1.390625, "learning_rate": 0.0017883356447792043, "loss": 0.2206, "step": 22172 }, { "epoch": 0.039316334789920734, "grad_norm": 0.39453125, "learning_rate": 0.0017882972985554055, "loss": 0.1695, "step": 22174 }, { "epoch": 0.03931988095523055, "grad_norm": 0.337890625, "learning_rate": 0.0017882589493213945, "loss": 0.2672, "step": 22176 }, { "epoch": 0.039323427120540363, "grad_norm": 0.74609375, "learning_rate": 0.0017882205970773382, "loss": 0.3668, "step": 22178 }, { "epoch": 0.03932697328585018, "grad_norm": 0.318359375, "learning_rate": 0.001788182241823405, "loss": 0.2381, "step": 22180 }, { "epoch": 0.03933051945115999, "grad_norm": 0.400390625, "learning_rate": 0.0017881438835597623, "loss": 0.3068, "step": 22182 }, { "epoch": 0.03933406561646981, "grad_norm": 1.03125, "learning_rate": 0.0017881055222865778, "loss": 0.238, "step": 22184 }, { "epoch": 0.03933761178177963, "grad_norm": 0.7265625, "learning_rate": 0.0017880671580040198, "loss": 0.1642, "step": 22186 }, { "epoch": 0.03934115794708944, "grad_norm": 1.203125, "learning_rate": 0.0017880287907122555, "loss": 0.4286, "step": 22188 }, { "epoch": 0.03934470411239926, "grad_norm": 1.15625, "learning_rate": 0.001787990420411453, "loss": 0.2153, "step": 22190 }, { "epoch": 0.03934825027770907, "grad_norm": 1.4921875, "learning_rate": 0.0017879520471017804, "loss": 0.4067, "step": 22192 }, { "epoch": 0.03935179644301889, "grad_norm": 0.28125, "learning_rate": 0.0017879136707834044, "loss": 0.1787, "step": 22194 }, { "epoch": 0.0393553426083287, "grad_norm": 0.75, "learning_rate": 0.001787875291456494, "loss": 0.2395, "step": 22196 }, { "epoch": 0.039358888773638516, "grad_norm": 0.494140625, "learning_rate": 0.0017878369091212168, "loss": 0.1984, "step": 22198 }, { "epoch": 0.03936243493894833, "grad_norm": 0.39453125, "learning_rate": 0.00178779852377774, "loss": 0.1949, "step": 22200 }, { "epoch": 0.039365981104258145, "grad_norm": 0.30078125, "learning_rate": 0.0017877601354262325, "loss": 0.1921, "step": 22202 }, { "epoch": 0.03936952726956796, "grad_norm": 0.66796875, "learning_rate": 0.0017877217440668617, "loss": 0.2013, "step": 22204 }, { "epoch": 0.039373073434877774, "grad_norm": 0.46875, "learning_rate": 0.0017876833496997951, "loss": 0.2293, "step": 22206 }, { "epoch": 0.03937661960018759, "grad_norm": 0.5390625, "learning_rate": 0.001787644952325201, "loss": 0.2591, "step": 22208 }, { "epoch": 0.03938016576549741, "grad_norm": 0.326171875, "learning_rate": 0.0017876065519432474, "loss": 0.1907, "step": 22210 }, { "epoch": 0.039383711930807225, "grad_norm": 0.8515625, "learning_rate": 0.0017875681485541022, "loss": 0.2286, "step": 22212 }, { "epoch": 0.03938725809611704, "grad_norm": 0.31640625, "learning_rate": 0.0017875297421579334, "loss": 0.236, "step": 22214 }, { "epoch": 0.039390804261426854, "grad_norm": 0.498046875, "learning_rate": 0.0017874913327549087, "loss": 0.18, "step": 22216 }, { "epoch": 0.03939435042673667, "grad_norm": 0.22265625, "learning_rate": 0.0017874529203451965, "loss": 0.3153, "step": 22218 }, { "epoch": 0.03939789659204648, "grad_norm": 0.80078125, "learning_rate": 0.001787414504928964, "loss": 0.2818, "step": 22220 }, { "epoch": 0.0394014427573563, "grad_norm": 0.2041015625, "learning_rate": 0.0017873760865063802, "loss": 0.1262, "step": 22222 }, { "epoch": 0.03940498892266611, "grad_norm": 0.890625, "learning_rate": 0.0017873376650776127, "loss": 0.2494, "step": 22224 }, { "epoch": 0.03940853508797593, "grad_norm": 0.484375, "learning_rate": 0.0017872992406428292, "loss": 0.2258, "step": 22226 }, { "epoch": 0.03941208125328574, "grad_norm": 0.27734375, "learning_rate": 0.0017872608132021984, "loss": 0.1758, "step": 22228 }, { "epoch": 0.039415627418595556, "grad_norm": 0.55859375, "learning_rate": 0.001787222382755888, "loss": 0.2236, "step": 22230 }, { "epoch": 0.03941917358390538, "grad_norm": 0.388671875, "learning_rate": 0.0017871839493040656, "loss": 0.2376, "step": 22232 }, { "epoch": 0.03942271974921519, "grad_norm": 0.5625, "learning_rate": 0.0017871455128469003, "loss": 0.2132, "step": 22234 }, { "epoch": 0.039426265914525006, "grad_norm": 0.625, "learning_rate": 0.0017871070733845593, "loss": 0.2119, "step": 22236 }, { "epoch": 0.03942981207983482, "grad_norm": 1.7578125, "learning_rate": 0.0017870686309172114, "loss": 0.2734, "step": 22238 }, { "epoch": 0.039433358245144635, "grad_norm": 0.478515625, "learning_rate": 0.0017870301854450241, "loss": 0.1884, "step": 22240 }, { "epoch": 0.03943690441045445, "grad_norm": 0.265625, "learning_rate": 0.0017869917369681663, "loss": 0.1574, "step": 22242 }, { "epoch": 0.039440450575764265, "grad_norm": 0.240234375, "learning_rate": 0.0017869532854868054, "loss": 0.3469, "step": 22244 }, { "epoch": 0.03944399674107408, "grad_norm": 0.2412109375, "learning_rate": 0.0017869148310011096, "loss": 0.2359, "step": 22246 }, { "epoch": 0.039447542906383894, "grad_norm": 0.447265625, "learning_rate": 0.001786876373511248, "loss": 0.1972, "step": 22248 }, { "epoch": 0.03945108907169371, "grad_norm": 0.95703125, "learning_rate": 0.0017868379130173875, "loss": 0.306, "step": 22250 }, { "epoch": 0.03945463523700352, "grad_norm": 0.466796875, "learning_rate": 0.0017867994495196972, "loss": 0.2169, "step": 22252 }, { "epoch": 0.039458181402313344, "grad_norm": 0.69140625, "learning_rate": 0.0017867609830183452, "loss": 0.272, "step": 22254 }, { "epoch": 0.03946172756762316, "grad_norm": 0.44921875, "learning_rate": 0.001786722513513499, "loss": 0.2198, "step": 22256 }, { "epoch": 0.03946527373293297, "grad_norm": 0.90234375, "learning_rate": 0.001786684041005328, "loss": 0.2116, "step": 22258 }, { "epoch": 0.03946881989824279, "grad_norm": 0.27734375, "learning_rate": 0.0017866455654939996, "loss": 0.1665, "step": 22260 }, { "epoch": 0.0394723660635526, "grad_norm": 0.439453125, "learning_rate": 0.0017866070869796825, "loss": 0.2036, "step": 22262 }, { "epoch": 0.03947591222886242, "grad_norm": 0.412109375, "learning_rate": 0.0017865686054625448, "loss": 0.1828, "step": 22264 }, { "epoch": 0.03947945839417223, "grad_norm": 0.49609375, "learning_rate": 0.0017865301209427547, "loss": 0.1723, "step": 22266 }, { "epoch": 0.039483004559482046, "grad_norm": 0.29296875, "learning_rate": 0.0017864916334204806, "loss": 0.1818, "step": 22268 }, { "epoch": 0.03948655072479186, "grad_norm": 0.474609375, "learning_rate": 0.001786453142895891, "loss": 0.2001, "step": 22270 }, { "epoch": 0.039490096890101675, "grad_norm": 1.015625, "learning_rate": 0.0017864146493691542, "loss": 0.2373, "step": 22272 }, { "epoch": 0.03949364305541149, "grad_norm": 2.625, "learning_rate": 0.0017863761528404383, "loss": 0.223, "step": 22274 }, { "epoch": 0.039497189220721304, "grad_norm": 0.34765625, "learning_rate": 0.0017863376533099118, "loss": 0.1885, "step": 22276 }, { "epoch": 0.039500735386031126, "grad_norm": 0.251953125, "learning_rate": 0.0017862991507777432, "loss": 0.1824, "step": 22278 }, { "epoch": 0.03950428155134094, "grad_norm": 0.416015625, "learning_rate": 0.0017862606452441006, "loss": 0.229, "step": 22280 }, { "epoch": 0.039507827716650755, "grad_norm": 0.376953125, "learning_rate": 0.0017862221367091527, "loss": 0.2069, "step": 22282 }, { "epoch": 0.03951137388196057, "grad_norm": 0.27734375, "learning_rate": 0.0017861836251730674, "loss": 0.2617, "step": 22284 }, { "epoch": 0.039514920047270384, "grad_norm": 0.330078125, "learning_rate": 0.001786145110636014, "loss": 0.1854, "step": 22286 }, { "epoch": 0.0395184662125802, "grad_norm": 0.765625, "learning_rate": 0.0017861065930981602, "loss": 0.2667, "step": 22288 }, { "epoch": 0.03952201237789001, "grad_norm": 0.353515625, "learning_rate": 0.0017860680725596749, "loss": 0.2359, "step": 22290 }, { "epoch": 0.03952555854319983, "grad_norm": 0.51953125, "learning_rate": 0.0017860295490207262, "loss": 0.2201, "step": 22292 }, { "epoch": 0.03952910470850964, "grad_norm": 0.53515625, "learning_rate": 0.0017859910224814828, "loss": 0.1633, "step": 22294 }, { "epoch": 0.03953265087381946, "grad_norm": 0.21875, "learning_rate": 0.0017859524929421128, "loss": 0.1826, "step": 22296 }, { "epoch": 0.03953619703912927, "grad_norm": 0.337890625, "learning_rate": 0.0017859139604027856, "loss": 0.2732, "step": 22298 }, { "epoch": 0.03953974320443909, "grad_norm": 0.28125, "learning_rate": 0.001785875424863669, "loss": 0.2554, "step": 22300 }, { "epoch": 0.03954328936974891, "grad_norm": 0.5703125, "learning_rate": 0.0017858368863249315, "loss": 0.1859, "step": 22302 }, { "epoch": 0.03954683553505872, "grad_norm": 0.66796875, "learning_rate": 0.001785798344786742, "loss": 0.2124, "step": 22304 }, { "epoch": 0.039550381700368537, "grad_norm": 0.515625, "learning_rate": 0.0017857598002492686, "loss": 0.235, "step": 22306 }, { "epoch": 0.03955392786567835, "grad_norm": 1.9375, "learning_rate": 0.0017857212527126807, "loss": 0.3964, "step": 22308 }, { "epoch": 0.039557474030988166, "grad_norm": 1.0546875, "learning_rate": 0.0017856827021771459, "loss": 0.1741, "step": 22310 }, { "epoch": 0.03956102019629798, "grad_norm": 1.6015625, "learning_rate": 0.0017856441486428334, "loss": 0.4074, "step": 22312 }, { "epoch": 0.039564566361607795, "grad_norm": 0.515625, "learning_rate": 0.0017856055921099119, "loss": 0.2311, "step": 22314 }, { "epoch": 0.03956811252691761, "grad_norm": 0.427734375, "learning_rate": 0.0017855670325785495, "loss": 0.1507, "step": 22316 }, { "epoch": 0.039571658692227424, "grad_norm": 0.703125, "learning_rate": 0.0017855284700489151, "loss": 0.2447, "step": 22318 }, { "epoch": 0.03957520485753724, "grad_norm": 0.2890625, "learning_rate": 0.0017854899045211777, "loss": 0.1981, "step": 22320 }, { "epoch": 0.03957875102284706, "grad_norm": 0.6484375, "learning_rate": 0.001785451335995505, "loss": 0.2905, "step": 22322 }, { "epoch": 0.039582297188156874, "grad_norm": 0.2333984375, "learning_rate": 0.0017854127644720671, "loss": 0.1745, "step": 22324 }, { "epoch": 0.03958584335346669, "grad_norm": 0.62109375, "learning_rate": 0.0017853741899510314, "loss": 0.2087, "step": 22326 }, { "epoch": 0.039589389518776504, "grad_norm": 0.671875, "learning_rate": 0.0017853356124325672, "loss": 0.1757, "step": 22328 }, { "epoch": 0.03959293568408632, "grad_norm": 1.0546875, "learning_rate": 0.001785297031916843, "loss": 0.226, "step": 22330 }, { "epoch": 0.03959648184939613, "grad_norm": 0.353515625, "learning_rate": 0.001785258448404028, "loss": 0.2233, "step": 22332 }, { "epoch": 0.03960002801470595, "grad_norm": 0.55078125, "learning_rate": 0.0017852198618942905, "loss": 0.2188, "step": 22334 }, { "epoch": 0.03960357418001576, "grad_norm": 1.8828125, "learning_rate": 0.0017851812723877992, "loss": 0.3799, "step": 22336 }, { "epoch": 0.039607120345325576, "grad_norm": 0.44921875, "learning_rate": 0.001785142679884723, "loss": 0.1979, "step": 22338 }, { "epoch": 0.03961066651063539, "grad_norm": 2.359375, "learning_rate": 0.0017851040843852306, "loss": 0.2276, "step": 22340 }, { "epoch": 0.039614212675945205, "grad_norm": 0.302734375, "learning_rate": 0.0017850654858894911, "loss": 0.1964, "step": 22342 }, { "epoch": 0.03961775884125502, "grad_norm": 0.6953125, "learning_rate": 0.0017850268843976733, "loss": 0.2449, "step": 22344 }, { "epoch": 0.03962130500656484, "grad_norm": 0.80078125, "learning_rate": 0.0017849882799099454, "loss": 0.2109, "step": 22346 }, { "epoch": 0.039624851171874656, "grad_norm": 0.4921875, "learning_rate": 0.0017849496724264768, "loss": 0.2298, "step": 22348 }, { "epoch": 0.03962839733718447, "grad_norm": 0.263671875, "learning_rate": 0.0017849110619474362, "loss": 0.2042, "step": 22350 }, { "epoch": 0.039631943502494285, "grad_norm": 0.337890625, "learning_rate": 0.0017848724484729927, "loss": 0.1722, "step": 22352 }, { "epoch": 0.0396354896678041, "grad_norm": 0.6171875, "learning_rate": 0.0017848338320033148, "loss": 0.3229, "step": 22354 }, { "epoch": 0.039639035833113914, "grad_norm": 0.7109375, "learning_rate": 0.0017847952125385712, "loss": 0.4101, "step": 22356 }, { "epoch": 0.03964258199842373, "grad_norm": 0.50390625, "learning_rate": 0.0017847565900789312, "loss": 0.1905, "step": 22358 }, { "epoch": 0.03964612816373354, "grad_norm": 0.25390625, "learning_rate": 0.001784717964624564, "loss": 0.2077, "step": 22360 }, { "epoch": 0.03964967432904336, "grad_norm": 1.953125, "learning_rate": 0.0017846793361756378, "loss": 0.3593, "step": 22362 }, { "epoch": 0.03965322049435317, "grad_norm": 3.25, "learning_rate": 0.001784640704732322, "loss": 0.3158, "step": 22364 }, { "epoch": 0.03965676665966299, "grad_norm": 0.9609375, "learning_rate": 0.0017846020702947856, "loss": 0.2534, "step": 22366 }, { "epoch": 0.03966031282497281, "grad_norm": 0.4765625, "learning_rate": 0.0017845634328631972, "loss": 0.1518, "step": 22368 }, { "epoch": 0.03966385899028262, "grad_norm": 1.71875, "learning_rate": 0.0017845247924377261, "loss": 0.2004, "step": 22370 }, { "epoch": 0.03966740515559244, "grad_norm": 0.84765625, "learning_rate": 0.001784486149018541, "loss": 0.2589, "step": 22372 }, { "epoch": 0.03967095132090225, "grad_norm": 0.67578125, "learning_rate": 0.0017844475026058113, "loss": 0.1847, "step": 22374 }, { "epoch": 0.03967449748621207, "grad_norm": 0.3046875, "learning_rate": 0.001784408853199706, "loss": 0.1835, "step": 22376 }, { "epoch": 0.03967804365152188, "grad_norm": 0.33984375, "learning_rate": 0.0017843702008003934, "loss": 0.1979, "step": 22378 }, { "epoch": 0.039681589816831696, "grad_norm": 0.84765625, "learning_rate": 0.0017843315454080433, "loss": 0.4589, "step": 22380 }, { "epoch": 0.03968513598214151, "grad_norm": 0.333984375, "learning_rate": 0.0017842928870228246, "loss": 0.1963, "step": 22382 }, { "epoch": 0.039688682147451325, "grad_norm": 0.2890625, "learning_rate": 0.0017842542256449063, "loss": 0.3256, "step": 22384 }, { "epoch": 0.03969222831276114, "grad_norm": 0.75390625, "learning_rate": 0.0017842155612744573, "loss": 0.2031, "step": 22386 }, { "epoch": 0.039695774478070954, "grad_norm": 0.62109375, "learning_rate": 0.001784176893911647, "loss": 0.1842, "step": 22388 }, { "epoch": 0.039699320643380775, "grad_norm": 0.8515625, "learning_rate": 0.0017841382235566443, "loss": 0.1823, "step": 22390 }, { "epoch": 0.03970286680869059, "grad_norm": 0.419921875, "learning_rate": 0.0017840995502096185, "loss": 0.19, "step": 22392 }, { "epoch": 0.039706412974000405, "grad_norm": 0.80078125, "learning_rate": 0.0017840608738707386, "loss": 0.2092, "step": 22394 }, { "epoch": 0.03970995913931022, "grad_norm": 0.55078125, "learning_rate": 0.001784022194540174, "loss": 0.2094, "step": 22396 }, { "epoch": 0.039713505304620034, "grad_norm": 1.0859375, "learning_rate": 0.001783983512218093, "loss": 0.3682, "step": 22398 }, { "epoch": 0.03971705146992985, "grad_norm": 1.1640625, "learning_rate": 0.0017839448269046662, "loss": 0.2658, "step": 22400 }, { "epoch": 0.03972059763523966, "grad_norm": 1.25, "learning_rate": 0.0017839061386000616, "loss": 0.2217, "step": 22402 }, { "epoch": 0.03972414380054948, "grad_norm": 1.296875, "learning_rate": 0.0017838674473044489, "loss": 0.185, "step": 22404 }, { "epoch": 0.03972768996585929, "grad_norm": 0.546875, "learning_rate": 0.0017838287530179972, "loss": 0.1948, "step": 22406 }, { "epoch": 0.039731236131169106, "grad_norm": 1.671875, "learning_rate": 0.0017837900557408758, "loss": 0.2185, "step": 22408 }, { "epoch": 0.03973478229647892, "grad_norm": 0.400390625, "learning_rate": 0.0017837513554732538, "loss": 0.1688, "step": 22410 }, { "epoch": 0.039738328461788736, "grad_norm": 0.640625, "learning_rate": 0.0017837126522153007, "loss": 0.2303, "step": 22412 }, { "epoch": 0.03974187462709856, "grad_norm": 0.4140625, "learning_rate": 0.0017836739459671856, "loss": 0.209, "step": 22414 }, { "epoch": 0.03974542079240837, "grad_norm": 1.6015625, "learning_rate": 0.0017836352367290775, "loss": 0.3187, "step": 22416 }, { "epoch": 0.039748966957718186, "grad_norm": 0.466796875, "learning_rate": 0.0017835965245011462, "loss": 0.2451, "step": 22418 }, { "epoch": 0.039752513123028, "grad_norm": 0.78515625, "learning_rate": 0.0017835578092835608, "loss": 0.2171, "step": 22420 }, { "epoch": 0.039756059288337815, "grad_norm": 0.400390625, "learning_rate": 0.0017835190910764907, "loss": 0.1928, "step": 22422 }, { "epoch": 0.03975960545364763, "grad_norm": 0.7578125, "learning_rate": 0.0017834803698801049, "loss": 0.2319, "step": 22424 }, { "epoch": 0.039763151618957444, "grad_norm": 0.57421875, "learning_rate": 0.001783441645694573, "loss": 0.1809, "step": 22426 }, { "epoch": 0.03976669778426726, "grad_norm": 0.369140625, "learning_rate": 0.0017834029185200644, "loss": 0.1798, "step": 22428 }, { "epoch": 0.039770243949577073, "grad_norm": 0.224609375, "learning_rate": 0.0017833641883567481, "loss": 0.2321, "step": 22430 }, { "epoch": 0.03977379011488689, "grad_norm": 0.494140625, "learning_rate": 0.0017833254552047943, "loss": 0.2095, "step": 22432 }, { "epoch": 0.0397773362801967, "grad_norm": 0.4296875, "learning_rate": 0.0017832867190643717, "loss": 0.2292, "step": 22434 }, { "epoch": 0.039780882445506524, "grad_norm": 0.578125, "learning_rate": 0.00178324797993565, "loss": 0.3047, "step": 22436 }, { "epoch": 0.03978442861081634, "grad_norm": 0.33203125, "learning_rate": 0.001783209237818798, "loss": 0.2169, "step": 22438 }, { "epoch": 0.03978797477612615, "grad_norm": 0.703125, "learning_rate": 0.0017831704927139863, "loss": 0.2251, "step": 22440 }, { "epoch": 0.03979152094143597, "grad_norm": 0.2890625, "learning_rate": 0.0017831317446213833, "loss": 0.2115, "step": 22442 }, { "epoch": 0.03979506710674578, "grad_norm": 0.58984375, "learning_rate": 0.001783092993541159, "loss": 0.1597, "step": 22444 }, { "epoch": 0.0397986132720556, "grad_norm": 0.361328125, "learning_rate": 0.0017830542394734828, "loss": 0.4002, "step": 22446 }, { "epoch": 0.03980215943736541, "grad_norm": 0.64453125, "learning_rate": 0.001783015482418524, "loss": 0.2015, "step": 22448 }, { "epoch": 0.039805705602675226, "grad_norm": 0.392578125, "learning_rate": 0.0017829767223764522, "loss": 0.2061, "step": 22450 }, { "epoch": 0.03980925176798504, "grad_norm": 0.859375, "learning_rate": 0.0017829379593474368, "loss": 0.263, "step": 22452 }, { "epoch": 0.039812797933294855, "grad_norm": 0.318359375, "learning_rate": 0.0017828991933316477, "loss": 0.2022, "step": 22454 }, { "epoch": 0.03981634409860467, "grad_norm": 0.3671875, "learning_rate": 0.0017828604243292543, "loss": 0.3088, "step": 22456 }, { "epoch": 0.03981989026391449, "grad_norm": 0.515625, "learning_rate": 0.0017828216523404258, "loss": 0.1977, "step": 22458 }, { "epoch": 0.039823436429224306, "grad_norm": 0.498046875, "learning_rate": 0.0017827828773653322, "loss": 0.2256, "step": 22460 }, { "epoch": 0.03982698259453412, "grad_norm": 0.66796875, "learning_rate": 0.0017827440994041429, "loss": 0.2404, "step": 22462 }, { "epoch": 0.039830528759843935, "grad_norm": 1.765625, "learning_rate": 0.0017827053184570271, "loss": 0.2647, "step": 22464 }, { "epoch": 0.03983407492515375, "grad_norm": 1.2578125, "learning_rate": 0.0017826665345241554, "loss": 0.222, "step": 22466 }, { "epoch": 0.039837621090463564, "grad_norm": 0.37890625, "learning_rate": 0.0017826277476056967, "loss": 0.1669, "step": 22468 }, { "epoch": 0.03984116725577338, "grad_norm": 0.2890625, "learning_rate": 0.0017825889577018203, "loss": 0.2117, "step": 22470 }, { "epoch": 0.03984471342108319, "grad_norm": 0.875, "learning_rate": 0.0017825501648126966, "loss": 0.3853, "step": 22472 }, { "epoch": 0.03984825958639301, "grad_norm": 0.8515625, "learning_rate": 0.001782511368938495, "loss": 0.2538, "step": 22474 }, { "epoch": 0.03985180575170282, "grad_norm": 0.515625, "learning_rate": 0.0017824725700793852, "loss": 0.192, "step": 22476 }, { "epoch": 0.03985535191701264, "grad_norm": 0.373046875, "learning_rate": 0.0017824337682355363, "loss": 0.2097, "step": 22478 }, { "epoch": 0.03985889808232245, "grad_norm": 1.1640625, "learning_rate": 0.0017823949634071191, "loss": 0.2595, "step": 22480 }, { "epoch": 0.03986244424763227, "grad_norm": 0.54296875, "learning_rate": 0.0017823561555943023, "loss": 0.1397, "step": 22482 }, { "epoch": 0.03986599041294209, "grad_norm": 0.1630859375, "learning_rate": 0.0017823173447972563, "loss": 0.2044, "step": 22484 }, { "epoch": 0.0398695365782519, "grad_norm": 0.427734375, "learning_rate": 0.0017822785310161505, "loss": 0.1797, "step": 22486 }, { "epoch": 0.039873082743561716, "grad_norm": 0.53125, "learning_rate": 0.0017822397142511547, "loss": 0.2188, "step": 22488 }, { "epoch": 0.03987662890887153, "grad_norm": 0.283203125, "learning_rate": 0.0017822008945024388, "loss": 0.207, "step": 22490 }, { "epoch": 0.039880175074181345, "grad_norm": 1.65625, "learning_rate": 0.0017821620717701724, "loss": 0.2045, "step": 22492 }, { "epoch": 0.03988372123949116, "grad_norm": 0.466796875, "learning_rate": 0.001782123246054525, "loss": 0.2525, "step": 22494 }, { "epoch": 0.039887267404800975, "grad_norm": 0.365234375, "learning_rate": 0.0017820844173556674, "loss": 0.1855, "step": 22496 }, { "epoch": 0.03989081357011079, "grad_norm": 0.3046875, "learning_rate": 0.001782045585673768, "loss": 0.2436, "step": 22498 }, { "epoch": 0.039894359735420604, "grad_norm": 0.42578125, "learning_rate": 0.0017820067510089979, "loss": 0.2402, "step": 22500 }, { "epoch": 0.03989790590073042, "grad_norm": 2.046875, "learning_rate": 0.0017819679133615266, "loss": 0.3063, "step": 22502 }, { "epoch": 0.03990145206604024, "grad_norm": 0.484375, "learning_rate": 0.001781929072731523, "loss": 0.2296, "step": 22504 }, { "epoch": 0.039904998231350054, "grad_norm": 0.59375, "learning_rate": 0.0017818902291191585, "loss": 0.1958, "step": 22506 }, { "epoch": 0.03990854439665987, "grad_norm": 13.75, "learning_rate": 0.001781851382524602, "loss": 0.3618, "step": 22508 }, { "epoch": 0.03991209056196968, "grad_norm": 0.283203125, "learning_rate": 0.0017818125329480236, "loss": 0.2303, "step": 22510 }, { "epoch": 0.0399156367272795, "grad_norm": 0.283203125, "learning_rate": 0.0017817736803895931, "loss": 0.1836, "step": 22512 }, { "epoch": 0.03991918289258931, "grad_norm": 0.439453125, "learning_rate": 0.0017817348248494807, "loss": 0.2261, "step": 22514 }, { "epoch": 0.03992272905789913, "grad_norm": 0.85546875, "learning_rate": 0.001781695966327856, "loss": 0.1709, "step": 22516 }, { "epoch": 0.03992627522320894, "grad_norm": 0.30078125, "learning_rate": 0.0017816571048248893, "loss": 0.1793, "step": 22518 }, { "epoch": 0.039929821388518756, "grad_norm": 0.310546875, "learning_rate": 0.0017816182403407503, "loss": 0.1631, "step": 22520 }, { "epoch": 0.03993336755382857, "grad_norm": 0.55078125, "learning_rate": 0.001781579372875609, "loss": 0.1742, "step": 22522 }, { "epoch": 0.039936913719138385, "grad_norm": 0.80078125, "learning_rate": 0.0017815405024296355, "loss": 0.2342, "step": 22524 }, { "epoch": 0.03994045988444821, "grad_norm": 0.466796875, "learning_rate": 0.0017815016290029999, "loss": 0.2644, "step": 22526 }, { "epoch": 0.03994400604975802, "grad_norm": 0.37109375, "learning_rate": 0.001781462752595872, "loss": 0.1502, "step": 22528 }, { "epoch": 0.039947552215067836, "grad_norm": 1.1953125, "learning_rate": 0.0017814238732084219, "loss": 0.2626, "step": 22530 }, { "epoch": 0.03995109838037765, "grad_norm": 0.38671875, "learning_rate": 0.0017813849908408193, "loss": 0.2062, "step": 22532 }, { "epoch": 0.039954644545687465, "grad_norm": 0.8828125, "learning_rate": 0.0017813461054932344, "loss": 0.4728, "step": 22534 }, { "epoch": 0.03995819071099728, "grad_norm": 0.37890625, "learning_rate": 0.001781307217165838, "loss": 0.1716, "step": 22536 }, { "epoch": 0.039961736876307094, "grad_norm": 0.369140625, "learning_rate": 0.001781268325858799, "loss": 0.2533, "step": 22538 }, { "epoch": 0.03996528304161691, "grad_norm": 0.62109375, "learning_rate": 0.0017812294315722884, "loss": 0.4162, "step": 22540 }, { "epoch": 0.03996882920692672, "grad_norm": 1.09375, "learning_rate": 0.0017811905343064758, "loss": 0.2461, "step": 22542 }, { "epoch": 0.03997237537223654, "grad_norm": 0.91796875, "learning_rate": 0.0017811516340615317, "loss": 0.1988, "step": 22544 }, { "epoch": 0.03997592153754635, "grad_norm": 0.61328125, "learning_rate": 0.0017811127308376257, "loss": 0.228, "step": 22546 }, { "epoch": 0.03997946770285617, "grad_norm": 0.3046875, "learning_rate": 0.0017810738246349285, "loss": 0.1766, "step": 22548 }, { "epoch": 0.03998301386816599, "grad_norm": 1.1640625, "learning_rate": 0.00178103491545361, "loss": 0.3774, "step": 22550 }, { "epoch": 0.0399865600334758, "grad_norm": 1.0625, "learning_rate": 0.0017809960032938406, "loss": 0.3016, "step": 22552 }, { "epoch": 0.03999010619878562, "grad_norm": 0.287109375, "learning_rate": 0.0017809570881557896, "loss": 0.1963, "step": 22554 }, { "epoch": 0.03999365236409543, "grad_norm": 4.0625, "learning_rate": 0.0017809181700396285, "loss": 0.2796, "step": 22556 }, { "epoch": 0.039997198529405247, "grad_norm": 0.80078125, "learning_rate": 0.0017808792489455263, "loss": 0.2096, "step": 22558 }, { "epoch": 0.04000074469471506, "grad_norm": 1.078125, "learning_rate": 0.0017808403248736541, "loss": 0.1856, "step": 22560 }, { "epoch": 0.040004290860024876, "grad_norm": 0.23828125, "learning_rate": 0.0017808013978241818, "loss": 0.1762, "step": 22562 }, { "epoch": 0.04000783702533469, "grad_norm": 1.1171875, "learning_rate": 0.0017807624677972796, "loss": 0.2175, "step": 22564 }, { "epoch": 0.040011383190644505, "grad_norm": 0.365234375, "learning_rate": 0.0017807235347931177, "loss": 0.1747, "step": 22566 }, { "epoch": 0.04001492935595432, "grad_norm": 0.67578125, "learning_rate": 0.0017806845988118665, "loss": 0.1759, "step": 22568 }, { "epoch": 0.040018475521264134, "grad_norm": 1.140625, "learning_rate": 0.0017806456598536964, "loss": 0.3186, "step": 22570 }, { "epoch": 0.040022021686573955, "grad_norm": 0.357421875, "learning_rate": 0.0017806067179187777, "loss": 0.1907, "step": 22572 }, { "epoch": 0.04002556785188377, "grad_norm": 0.66015625, "learning_rate": 0.00178056777300728, "loss": 0.2771, "step": 22574 }, { "epoch": 0.040029114017193584, "grad_norm": 0.208984375, "learning_rate": 0.0017805288251193747, "loss": 0.1618, "step": 22576 }, { "epoch": 0.0400326601825034, "grad_norm": 1.671875, "learning_rate": 0.0017804898742552313, "loss": 0.2619, "step": 22578 }, { "epoch": 0.040036206347813214, "grad_norm": 0.421875, "learning_rate": 0.0017804509204150205, "loss": 0.2328, "step": 22580 }, { "epoch": 0.04003975251312303, "grad_norm": 0.45703125, "learning_rate": 0.0017804119635989127, "loss": 0.1591, "step": 22582 }, { "epoch": 0.04004329867843284, "grad_norm": 1.5703125, "learning_rate": 0.0017803730038070782, "loss": 0.2499, "step": 22584 }, { "epoch": 0.04004684484374266, "grad_norm": 0.83203125, "learning_rate": 0.0017803340410396874, "loss": 0.2408, "step": 22586 }, { "epoch": 0.04005039100905247, "grad_norm": 0.5859375, "learning_rate": 0.001780295075296911, "loss": 0.1496, "step": 22588 }, { "epoch": 0.040053937174362286, "grad_norm": 0.439453125, "learning_rate": 0.0017802561065789187, "loss": 0.2447, "step": 22590 }, { "epoch": 0.0400574833396721, "grad_norm": 0.173828125, "learning_rate": 0.0017802171348858813, "loss": 0.1626, "step": 22592 }, { "epoch": 0.04006102950498192, "grad_norm": 0.5390625, "learning_rate": 0.0017801781602179694, "loss": 0.2996, "step": 22594 }, { "epoch": 0.04006457567029174, "grad_norm": 0.3515625, "learning_rate": 0.0017801391825753535, "loss": 0.2186, "step": 22596 }, { "epoch": 0.04006812183560155, "grad_norm": 0.19140625, "learning_rate": 0.0017801002019582036, "loss": 0.1809, "step": 22598 }, { "epoch": 0.040071668000911366, "grad_norm": 0.84765625, "learning_rate": 0.0017800612183666905, "loss": 0.3143, "step": 22600 }, { "epoch": 0.04007521416622118, "grad_norm": 0.55859375, "learning_rate": 0.0017800222318009847, "loss": 0.2331, "step": 22602 }, { "epoch": 0.040078760331530995, "grad_norm": 0.65234375, "learning_rate": 0.0017799832422612566, "loss": 0.2865, "step": 22604 }, { "epoch": 0.04008230649684081, "grad_norm": 0.3515625, "learning_rate": 0.001779944249747677, "loss": 0.4221, "step": 22606 }, { "epoch": 0.040085852662150624, "grad_norm": 1.21875, "learning_rate": 0.0017799052542604161, "loss": 0.2567, "step": 22608 }, { "epoch": 0.04008939882746044, "grad_norm": 0.33203125, "learning_rate": 0.0017798662557996444, "loss": 0.1969, "step": 22610 }, { "epoch": 0.04009294499277025, "grad_norm": 2.25, "learning_rate": 0.0017798272543655326, "loss": 0.2831, "step": 22612 }, { "epoch": 0.04009649115808007, "grad_norm": 0.52734375, "learning_rate": 0.0017797882499582516, "loss": 0.141, "step": 22614 }, { "epoch": 0.04010003732338988, "grad_norm": 0.376953125, "learning_rate": 0.0017797492425779716, "loss": 0.1769, "step": 22616 }, { "epoch": 0.040103583488699704, "grad_norm": 0.44921875, "learning_rate": 0.0017797102322248631, "loss": 0.2547, "step": 22618 }, { "epoch": 0.04010712965400952, "grad_norm": 0.3515625, "learning_rate": 0.0017796712188990966, "loss": 0.2034, "step": 22620 }, { "epoch": 0.04011067581931933, "grad_norm": 1.8125, "learning_rate": 0.0017796322026008436, "loss": 0.361, "step": 22622 }, { "epoch": 0.04011422198462915, "grad_norm": 0.486328125, "learning_rate": 0.0017795931833302736, "loss": 0.1582, "step": 22624 }, { "epoch": 0.04011776814993896, "grad_norm": 0.453125, "learning_rate": 0.001779554161087558, "loss": 0.1978, "step": 22626 }, { "epoch": 0.04012131431524878, "grad_norm": 0.5234375, "learning_rate": 0.0017795151358728674, "loss": 0.243, "step": 22628 }, { "epoch": 0.04012486048055859, "grad_norm": 0.9453125, "learning_rate": 0.0017794761076863719, "loss": 0.2052, "step": 22630 }, { "epoch": 0.040128406645868406, "grad_norm": 2.21875, "learning_rate": 0.0017794370765282428, "loss": 0.2412, "step": 22632 }, { "epoch": 0.04013195281117822, "grad_norm": 0.54296875, "learning_rate": 0.0017793980423986507, "loss": 0.224, "step": 22634 }, { "epoch": 0.040135498976488035, "grad_norm": 0.361328125, "learning_rate": 0.001779359005297766, "loss": 0.1344, "step": 22636 }, { "epoch": 0.04013904514179785, "grad_norm": 0.6875, "learning_rate": 0.00177931996522576, "loss": 0.2234, "step": 22638 }, { "epoch": 0.04014259130710767, "grad_norm": 0.34375, "learning_rate": 0.0017792809221828026, "loss": 0.1831, "step": 22640 }, { "epoch": 0.040146137472417485, "grad_norm": 0.765625, "learning_rate": 0.0017792418761690655, "loss": 0.2317, "step": 22642 }, { "epoch": 0.0401496836377273, "grad_norm": 0.8671875, "learning_rate": 0.0017792028271847184, "loss": 0.1726, "step": 22644 }, { "epoch": 0.040153229803037115, "grad_norm": 0.96484375, "learning_rate": 0.0017791637752299333, "loss": 0.2046, "step": 22646 }, { "epoch": 0.04015677596834693, "grad_norm": 0.453125, "learning_rate": 0.0017791247203048797, "loss": 0.3859, "step": 22648 }, { "epoch": 0.040160322133656744, "grad_norm": 0.34765625, "learning_rate": 0.0017790856624097296, "loss": 0.1831, "step": 22650 }, { "epoch": 0.04016386829896656, "grad_norm": 1.046875, "learning_rate": 0.0017790466015446531, "loss": 0.2486, "step": 22652 }, { "epoch": 0.04016741446427637, "grad_norm": 0.45703125, "learning_rate": 0.0017790075377098209, "loss": 0.1742, "step": 22654 }, { "epoch": 0.04017096062958619, "grad_norm": 1.1875, "learning_rate": 0.0017789684709054046, "loss": 0.2696, "step": 22656 }, { "epoch": 0.040174506794896, "grad_norm": 0.42578125, "learning_rate": 0.0017789294011315744, "loss": 0.1789, "step": 22658 }, { "epoch": 0.040178052960205816, "grad_norm": 0.58984375, "learning_rate": 0.0017788903283885015, "loss": 0.2149, "step": 22660 }, { "epoch": 0.04018159912551564, "grad_norm": 0.44921875, "learning_rate": 0.0017788512526763563, "loss": 0.2549, "step": 22662 }, { "epoch": 0.04018514529082545, "grad_norm": 0.2021484375, "learning_rate": 0.0017788121739953104, "loss": 0.2346, "step": 22664 }, { "epoch": 0.04018869145613527, "grad_norm": 0.328125, "learning_rate": 0.001778773092345534, "loss": 0.1653, "step": 22666 }, { "epoch": 0.04019223762144508, "grad_norm": 0.34765625, "learning_rate": 0.0017787340077271986, "loss": 0.2001, "step": 22668 }, { "epoch": 0.040195783786754896, "grad_norm": 0.3203125, "learning_rate": 0.001778694920140475, "loss": 0.1679, "step": 22670 }, { "epoch": 0.04019932995206471, "grad_norm": 0.37109375, "learning_rate": 0.001778655829585534, "loss": 0.1692, "step": 22672 }, { "epoch": 0.040202876117374525, "grad_norm": 0.6875, "learning_rate": 0.0017786167360625464, "loss": 0.1622, "step": 22674 }, { "epoch": 0.04020642228268434, "grad_norm": 0.396484375, "learning_rate": 0.0017785776395716835, "loss": 0.1816, "step": 22676 }, { "epoch": 0.040209968447994154, "grad_norm": 0.640625, "learning_rate": 0.0017785385401131162, "loss": 0.1979, "step": 22678 }, { "epoch": 0.04021351461330397, "grad_norm": 0.8046875, "learning_rate": 0.0017784994376870155, "loss": 0.2207, "step": 22680 }, { "epoch": 0.040217060778613783, "grad_norm": 0.310546875, "learning_rate": 0.0017784603322935526, "loss": 0.1433, "step": 22682 }, { "epoch": 0.0402206069439236, "grad_norm": 0.74609375, "learning_rate": 0.001778421223932898, "loss": 0.1815, "step": 22684 }, { "epoch": 0.04022415310923342, "grad_norm": 0.9453125, "learning_rate": 0.001778382112605223, "loss": 0.2913, "step": 22686 }, { "epoch": 0.040227699274543234, "grad_norm": 0.279296875, "learning_rate": 0.001778342998310699, "loss": 0.1979, "step": 22688 }, { "epoch": 0.04023124543985305, "grad_norm": 0.65234375, "learning_rate": 0.0017783038810494963, "loss": 0.3506, "step": 22690 }, { "epoch": 0.04023479160516286, "grad_norm": 0.53125, "learning_rate": 0.0017782647608217865, "loss": 0.1917, "step": 22692 }, { "epoch": 0.04023833777047268, "grad_norm": 0.71875, "learning_rate": 0.001778225637627741, "loss": 0.263, "step": 22694 }, { "epoch": 0.04024188393578249, "grad_norm": 0.70703125, "learning_rate": 0.0017781865114675304, "loss": 0.3052, "step": 22696 }, { "epoch": 0.04024543010109231, "grad_norm": 0.44140625, "learning_rate": 0.0017781473823413256, "loss": 0.1704, "step": 22698 }, { "epoch": 0.04024897626640212, "grad_norm": 0.6015625, "learning_rate": 0.0017781082502492985, "loss": 0.1745, "step": 22700 }, { "epoch": 0.040252522431711936, "grad_norm": 0.5546875, "learning_rate": 0.0017780691151916194, "loss": 0.2955, "step": 22702 }, { "epoch": 0.04025606859702175, "grad_norm": 0.796875, "learning_rate": 0.0017780299771684605, "loss": 0.2641, "step": 22704 }, { "epoch": 0.040259614762331565, "grad_norm": 0.5546875, "learning_rate": 0.0017779908361799917, "loss": 0.2149, "step": 22706 }, { "epoch": 0.04026316092764139, "grad_norm": 0.435546875, "learning_rate": 0.0017779516922263852, "loss": 0.1936, "step": 22708 }, { "epoch": 0.0402667070929512, "grad_norm": 0.267578125, "learning_rate": 0.0017779125453078113, "loss": 0.1879, "step": 22710 }, { "epoch": 0.040270253258261016, "grad_norm": 0.25, "learning_rate": 0.0017778733954244422, "loss": 0.2303, "step": 22712 }, { "epoch": 0.04027379942357083, "grad_norm": 0.23828125, "learning_rate": 0.0017778342425764483, "loss": 0.2025, "step": 22714 }, { "epoch": 0.040277345588880645, "grad_norm": 1.5078125, "learning_rate": 0.0017777950867640016, "loss": 0.2637, "step": 22716 }, { "epoch": 0.04028089175419046, "grad_norm": 1.0234375, "learning_rate": 0.0017777559279872723, "loss": 0.194, "step": 22718 }, { "epoch": 0.040284437919500274, "grad_norm": 0.5390625, "learning_rate": 0.0017777167662464326, "loss": 0.2075, "step": 22720 }, { "epoch": 0.04028798408481009, "grad_norm": 0.6328125, "learning_rate": 0.0017776776015416534, "loss": 0.2202, "step": 22722 }, { "epoch": 0.0402915302501199, "grad_norm": 0.462890625, "learning_rate": 0.001777638433873106, "loss": 0.2157, "step": 22724 }, { "epoch": 0.04029507641542972, "grad_norm": 0.26953125, "learning_rate": 0.0017775992632409617, "loss": 0.1907, "step": 22726 }, { "epoch": 0.04029862258073953, "grad_norm": 0.2177734375, "learning_rate": 0.0017775600896453919, "loss": 0.1939, "step": 22728 }, { "epoch": 0.040302168746049354, "grad_norm": 0.2890625, "learning_rate": 0.0017775209130865676, "loss": 0.2506, "step": 22730 }, { "epoch": 0.04030571491135917, "grad_norm": 0.212890625, "learning_rate": 0.0017774817335646604, "loss": 0.1979, "step": 22732 }, { "epoch": 0.04030926107666898, "grad_norm": 0.341796875, "learning_rate": 0.0017774425510798423, "loss": 0.3306, "step": 22734 }, { "epoch": 0.0403128072419788, "grad_norm": 0.373046875, "learning_rate": 0.0017774033656322832, "loss": 0.178, "step": 22736 }, { "epoch": 0.04031635340728861, "grad_norm": 0.5625, "learning_rate": 0.0017773641772221554, "loss": 0.2687, "step": 22738 }, { "epoch": 0.040319899572598426, "grad_norm": 0.83203125, "learning_rate": 0.0017773249858496303, "loss": 0.2223, "step": 22740 }, { "epoch": 0.04032344573790824, "grad_norm": 0.4375, "learning_rate": 0.001777285791514879, "loss": 0.2178, "step": 22742 }, { "epoch": 0.040326991903218055, "grad_norm": 0.57421875, "learning_rate": 0.0017772465942180735, "loss": 0.2289, "step": 22744 }, { "epoch": 0.04033053806852787, "grad_norm": 0.478515625, "learning_rate": 0.0017772073939593845, "loss": 0.2133, "step": 22746 }, { "epoch": 0.040334084233837685, "grad_norm": 0.51953125, "learning_rate": 0.0017771681907389836, "loss": 0.2125, "step": 22748 }, { "epoch": 0.0403376303991475, "grad_norm": 0.388671875, "learning_rate": 0.0017771289845570424, "loss": 0.2092, "step": 22750 }, { "epoch": 0.040341176564457314, "grad_norm": 1.4609375, "learning_rate": 0.0017770897754137325, "loss": 0.1956, "step": 22752 }, { "epoch": 0.040344722729767135, "grad_norm": 0.3515625, "learning_rate": 0.001777050563309225, "loss": 0.2131, "step": 22754 }, { "epoch": 0.04034826889507695, "grad_norm": 0.466796875, "learning_rate": 0.001777011348243692, "loss": 0.2089, "step": 22756 }, { "epoch": 0.040351815060386764, "grad_norm": 0.62109375, "learning_rate": 0.0017769721302173043, "loss": 0.2507, "step": 22758 }, { "epoch": 0.04035536122569658, "grad_norm": 0.96484375, "learning_rate": 0.0017769329092302338, "loss": 0.2087, "step": 22760 }, { "epoch": 0.04035890739100639, "grad_norm": 0.75, "learning_rate": 0.001776893685282652, "loss": 0.2321, "step": 22762 }, { "epoch": 0.04036245355631621, "grad_norm": 0.546875, "learning_rate": 0.0017768544583747303, "loss": 0.1702, "step": 22764 }, { "epoch": 0.04036599972162602, "grad_norm": 0.208984375, "learning_rate": 0.0017768152285066406, "loss": 0.155, "step": 22766 }, { "epoch": 0.04036954588693584, "grad_norm": 0.55078125, "learning_rate": 0.0017767759956785541, "loss": 0.2236, "step": 22768 }, { "epoch": 0.04037309205224565, "grad_norm": 0.341796875, "learning_rate": 0.0017767367598906423, "loss": 0.1925, "step": 22770 }, { "epoch": 0.040376638217555466, "grad_norm": 0.48046875, "learning_rate": 0.001776697521143077, "loss": 0.3113, "step": 22772 }, { "epoch": 0.04038018438286528, "grad_norm": 2.15625, "learning_rate": 0.0017766582794360305, "loss": 0.2442, "step": 22774 }, { "epoch": 0.0403837305481751, "grad_norm": 1.203125, "learning_rate": 0.001776619034769673, "loss": 0.2394, "step": 22776 }, { "epoch": 0.04038727671348492, "grad_norm": 0.326171875, "learning_rate": 0.001776579787144177, "loss": 0.1634, "step": 22778 }, { "epoch": 0.04039082287879473, "grad_norm": 0.57421875, "learning_rate": 0.0017765405365597145, "loss": 0.2787, "step": 22780 }, { "epoch": 0.040394369044104546, "grad_norm": 0.443359375, "learning_rate": 0.0017765012830164562, "loss": 0.2401, "step": 22782 }, { "epoch": 0.04039791520941436, "grad_norm": 0.419921875, "learning_rate": 0.001776462026514574, "loss": 0.2863, "step": 22784 }, { "epoch": 0.040401461374724175, "grad_norm": 1.375, "learning_rate": 0.0017764227670542406, "loss": 0.4066, "step": 22786 }, { "epoch": 0.04040500754003399, "grad_norm": 0.224609375, "learning_rate": 0.0017763835046356266, "loss": 0.1735, "step": 22788 }, { "epoch": 0.040408553705343804, "grad_norm": 0.40625, "learning_rate": 0.0017763442392589037, "loss": 0.2091, "step": 22790 }, { "epoch": 0.04041209987065362, "grad_norm": 2.015625, "learning_rate": 0.0017763049709242445, "loss": 0.4002, "step": 22792 }, { "epoch": 0.04041564603596343, "grad_norm": 0.41015625, "learning_rate": 0.0017762656996318197, "loss": 0.1968, "step": 22794 }, { "epoch": 0.04041919220127325, "grad_norm": 0.365234375, "learning_rate": 0.001776226425381802, "loss": 0.1887, "step": 22796 }, { "epoch": 0.04042273836658307, "grad_norm": 0.24609375, "learning_rate": 0.0017761871481743621, "loss": 0.1856, "step": 22798 }, { "epoch": 0.040426284531892884, "grad_norm": 0.515625, "learning_rate": 0.001776147868009673, "loss": 0.2419, "step": 22800 }, { "epoch": 0.0404298306972027, "grad_norm": 0.6015625, "learning_rate": 0.0017761085848879055, "loss": 0.1933, "step": 22802 }, { "epoch": 0.04043337686251251, "grad_norm": 0.376953125, "learning_rate": 0.001776069298809232, "loss": 0.3345, "step": 22804 }, { "epoch": 0.04043692302782233, "grad_norm": 0.51953125, "learning_rate": 0.001776030009773824, "loss": 0.2815, "step": 22806 }, { "epoch": 0.04044046919313214, "grad_norm": 0.83984375, "learning_rate": 0.0017759907177818536, "loss": 0.2798, "step": 22808 }, { "epoch": 0.040444015358441956, "grad_norm": 2.609375, "learning_rate": 0.0017759514228334922, "loss": 0.2292, "step": 22810 }, { "epoch": 0.04044756152375177, "grad_norm": 0.25390625, "learning_rate": 0.001775912124928912, "loss": 0.1818, "step": 22812 }, { "epoch": 0.040451107689061586, "grad_norm": 3.890625, "learning_rate": 0.001775872824068285, "loss": 0.2926, "step": 22814 }, { "epoch": 0.0404546538543714, "grad_norm": 0.341796875, "learning_rate": 0.0017758335202517823, "loss": 0.2112, "step": 22816 }, { "epoch": 0.040458200019681215, "grad_norm": 0.3671875, "learning_rate": 0.0017757942134795767, "loss": 0.1841, "step": 22818 }, { "epoch": 0.04046174618499103, "grad_norm": 0.6484375, "learning_rate": 0.00177575490375184, "loss": 0.1668, "step": 22820 }, { "epoch": 0.04046529235030085, "grad_norm": 0.640625, "learning_rate": 0.0017757155910687436, "loss": 0.236, "step": 22822 }, { "epoch": 0.040468838515610665, "grad_norm": 0.341796875, "learning_rate": 0.0017756762754304598, "loss": 0.1939, "step": 22824 }, { "epoch": 0.04047238468092048, "grad_norm": 0.8125, "learning_rate": 0.0017756369568371604, "loss": 0.2601, "step": 22826 }, { "epoch": 0.040475930846230294, "grad_norm": 0.7421875, "learning_rate": 0.0017755976352890174, "loss": 0.2292, "step": 22828 }, { "epoch": 0.04047947701154011, "grad_norm": 0.5078125, "learning_rate": 0.0017755583107862028, "loss": 0.2038, "step": 22830 }, { "epoch": 0.040483023176849924, "grad_norm": 0.953125, "learning_rate": 0.0017755189833288888, "loss": 0.2102, "step": 22832 }, { "epoch": 0.04048656934215974, "grad_norm": 0.34375, "learning_rate": 0.0017754796529172467, "loss": 0.2589, "step": 22834 }, { "epoch": 0.04049011550746955, "grad_norm": 0.296875, "learning_rate": 0.0017754403195514494, "loss": 0.1979, "step": 22836 }, { "epoch": 0.04049366167277937, "grad_norm": 0.1982421875, "learning_rate": 0.0017754009832316685, "loss": 0.1727, "step": 22838 }, { "epoch": 0.04049720783808918, "grad_norm": 0.58203125, "learning_rate": 0.0017753616439580755, "loss": 0.2159, "step": 22840 }, { "epoch": 0.040500754003398996, "grad_norm": 0.361328125, "learning_rate": 0.0017753223017308434, "loss": 0.1862, "step": 22842 }, { "epoch": 0.04050430016870882, "grad_norm": 0.2265625, "learning_rate": 0.0017752829565501433, "loss": 0.1981, "step": 22844 }, { "epoch": 0.04050784633401863, "grad_norm": 1.28125, "learning_rate": 0.0017752436084161485, "loss": 0.2509, "step": 22846 }, { "epoch": 0.04051139249932845, "grad_norm": 0.302734375, "learning_rate": 0.00177520425732903, "loss": 0.2223, "step": 22848 }, { "epoch": 0.04051493866463826, "grad_norm": 0.78515625, "learning_rate": 0.0017751649032889602, "loss": 0.2327, "step": 22850 }, { "epoch": 0.040518484829948076, "grad_norm": 0.52734375, "learning_rate": 0.0017751255462961117, "loss": 0.2726, "step": 22852 }, { "epoch": 0.04052203099525789, "grad_norm": 0.33203125, "learning_rate": 0.001775086186350656, "loss": 0.1874, "step": 22854 }, { "epoch": 0.040525577160567705, "grad_norm": 0.267578125, "learning_rate": 0.0017750468234527654, "loss": 0.1793, "step": 22856 }, { "epoch": 0.04052912332587752, "grad_norm": 0.80859375, "learning_rate": 0.001775007457602612, "loss": 0.2426, "step": 22858 }, { "epoch": 0.040532669491187334, "grad_norm": 3.21875, "learning_rate": 0.0017749680888003681, "loss": 0.3081, "step": 22860 }, { "epoch": 0.04053621565649715, "grad_norm": 0.5703125, "learning_rate": 0.001774928717046206, "loss": 0.179, "step": 22862 }, { "epoch": 0.04053976182180696, "grad_norm": 0.55078125, "learning_rate": 0.0017748893423402978, "loss": 0.182, "step": 22864 }, { "epoch": 0.040543307987116785, "grad_norm": 0.625, "learning_rate": 0.0017748499646828156, "loss": 0.2264, "step": 22866 }, { "epoch": 0.0405468541524266, "grad_norm": 0.8984375, "learning_rate": 0.0017748105840739317, "loss": 0.2861, "step": 22868 }, { "epoch": 0.040550400317736414, "grad_norm": 0.23828125, "learning_rate": 0.0017747712005138179, "loss": 0.2218, "step": 22870 }, { "epoch": 0.04055394648304623, "grad_norm": 0.71484375, "learning_rate": 0.001774731814002647, "loss": 0.1753, "step": 22872 }, { "epoch": 0.04055749264835604, "grad_norm": 0.3046875, "learning_rate": 0.0017746924245405913, "loss": 0.199, "step": 22874 }, { "epoch": 0.04056103881366586, "grad_norm": 0.416015625, "learning_rate": 0.0017746530321278225, "loss": 0.1552, "step": 22876 }, { "epoch": 0.04056458497897567, "grad_norm": 0.91796875, "learning_rate": 0.0017746136367645137, "loss": 0.2423, "step": 22878 }, { "epoch": 0.04056813114428549, "grad_norm": 0.494140625, "learning_rate": 0.0017745742384508362, "loss": 0.1484, "step": 22880 }, { "epoch": 0.0405716773095953, "grad_norm": 0.875, "learning_rate": 0.0017745348371869631, "loss": 0.2091, "step": 22882 }, { "epoch": 0.040575223474905116, "grad_norm": 2.0625, "learning_rate": 0.0017744954329730665, "loss": 0.3813, "step": 22884 }, { "epoch": 0.04057876964021493, "grad_norm": 0.3828125, "learning_rate": 0.0017744560258093187, "loss": 0.2242, "step": 22886 }, { "epoch": 0.040582315805524745, "grad_norm": 0.640625, "learning_rate": 0.0017744166156958916, "loss": 0.1522, "step": 22888 }, { "epoch": 0.040585861970834566, "grad_norm": 0.40625, "learning_rate": 0.0017743772026329583, "loss": 0.1856, "step": 22890 }, { "epoch": 0.04058940813614438, "grad_norm": 0.3125, "learning_rate": 0.0017743377866206909, "loss": 0.4, "step": 22892 }, { "epoch": 0.040592954301454195, "grad_norm": 3.421875, "learning_rate": 0.0017742983676592617, "loss": 0.2224, "step": 22894 }, { "epoch": 0.04059650046676401, "grad_norm": 1.40625, "learning_rate": 0.0017742589457488426, "loss": 0.229, "step": 22896 }, { "epoch": 0.040600046632073825, "grad_norm": 0.296875, "learning_rate": 0.0017742195208896073, "loss": 0.1585, "step": 22898 }, { "epoch": 0.04060359279738364, "grad_norm": 0.40234375, "learning_rate": 0.001774180093081727, "loss": 0.2891, "step": 22900 }, { "epoch": 0.040607138962693454, "grad_norm": 0.546875, "learning_rate": 0.0017741406623253749, "loss": 0.2203, "step": 22902 }, { "epoch": 0.04061068512800327, "grad_norm": 0.3359375, "learning_rate": 0.001774101228620723, "loss": 0.1809, "step": 22904 }, { "epoch": 0.04061423129331308, "grad_norm": 1.0546875, "learning_rate": 0.0017740617919679437, "loss": 0.2655, "step": 22906 }, { "epoch": 0.0406177774586229, "grad_norm": 0.5078125, "learning_rate": 0.00177402235236721, "loss": 0.3109, "step": 22908 }, { "epoch": 0.04062132362393271, "grad_norm": 2.140625, "learning_rate": 0.0017739829098186934, "loss": 0.2354, "step": 22910 }, { "epoch": 0.04062486978924253, "grad_norm": 0.28515625, "learning_rate": 0.0017739434643225676, "loss": 0.1658, "step": 22912 }, { "epoch": 0.04062841595455235, "grad_norm": 0.67578125, "learning_rate": 0.0017739040158790045, "loss": 0.1382, "step": 22914 }, { "epoch": 0.04063196211986216, "grad_norm": 0.5546875, "learning_rate": 0.0017738645644881765, "loss": 0.223, "step": 22916 }, { "epoch": 0.04063550828517198, "grad_norm": 1.53125, "learning_rate": 0.0017738251101502565, "loss": 0.262, "step": 22918 }, { "epoch": 0.04063905445048179, "grad_norm": 0.92578125, "learning_rate": 0.001773785652865417, "loss": 0.2021, "step": 22920 }, { "epoch": 0.040642600615791606, "grad_norm": 0.58203125, "learning_rate": 0.0017737461926338302, "loss": 0.1961, "step": 22922 }, { "epoch": 0.04064614678110142, "grad_norm": 0.75390625, "learning_rate": 0.001773706729455669, "loss": 0.1981, "step": 22924 }, { "epoch": 0.040649692946411235, "grad_norm": 0.298828125, "learning_rate": 0.0017736672633311057, "loss": 0.2089, "step": 22926 }, { "epoch": 0.04065323911172105, "grad_norm": 0.4140625, "learning_rate": 0.0017736277942603134, "loss": 0.1582, "step": 22928 }, { "epoch": 0.040656785277030864, "grad_norm": 0.48828125, "learning_rate": 0.0017735883222434643, "loss": 0.1838, "step": 22930 }, { "epoch": 0.04066033144234068, "grad_norm": 1.25, "learning_rate": 0.001773548847280731, "loss": 0.1752, "step": 22932 }, { "epoch": 0.0406638776076505, "grad_norm": 1.859375, "learning_rate": 0.0017735093693722866, "loss": 0.533, "step": 22934 }, { "epoch": 0.040667423772960315, "grad_norm": 0.55859375, "learning_rate": 0.0017734698885183032, "loss": 0.2504, "step": 22936 }, { "epoch": 0.04067096993827013, "grad_norm": 1.453125, "learning_rate": 0.0017734304047189538, "loss": 0.3188, "step": 22938 }, { "epoch": 0.040674516103579944, "grad_norm": 0.271484375, "learning_rate": 0.0017733909179744108, "loss": 0.2338, "step": 22940 }, { "epoch": 0.04067806226888976, "grad_norm": 0.796875, "learning_rate": 0.0017733514282848473, "loss": 0.2289, "step": 22942 }, { "epoch": 0.04068160843419957, "grad_norm": 2.046875, "learning_rate": 0.0017733119356504357, "loss": 0.2713, "step": 22944 }, { "epoch": 0.04068515459950939, "grad_norm": 0.5234375, "learning_rate": 0.001773272440071349, "loss": 0.2236, "step": 22946 }, { "epoch": 0.0406887007648192, "grad_norm": 0.578125, "learning_rate": 0.0017732329415477595, "loss": 0.1627, "step": 22948 }, { "epoch": 0.04069224693012902, "grad_norm": 0.294921875, "learning_rate": 0.00177319344007984, "loss": 0.2277, "step": 22950 }, { "epoch": 0.04069579309543883, "grad_norm": 0.609375, "learning_rate": 0.0017731539356677635, "loss": 0.2055, "step": 22952 }, { "epoch": 0.040699339260748646, "grad_norm": 0.5390625, "learning_rate": 0.0017731144283117028, "loss": 0.1935, "step": 22954 }, { "epoch": 0.04070288542605846, "grad_norm": 0.390625, "learning_rate": 0.0017730749180118307, "loss": 0.2848, "step": 22956 }, { "epoch": 0.04070643159136828, "grad_norm": 0.396484375, "learning_rate": 0.0017730354047683198, "loss": 0.2068, "step": 22958 }, { "epoch": 0.0407099777566781, "grad_norm": 0.6484375, "learning_rate": 0.0017729958885813431, "loss": 0.198, "step": 22960 }, { "epoch": 0.04071352392198791, "grad_norm": 0.54296875, "learning_rate": 0.001772956369451073, "loss": 0.211, "step": 22962 }, { "epoch": 0.040717070087297726, "grad_norm": 0.546875, "learning_rate": 0.001772916847377683, "loss": 0.2404, "step": 22964 }, { "epoch": 0.04072061625260754, "grad_norm": 0.53515625, "learning_rate": 0.0017728773223613455, "loss": 0.22, "step": 22966 }, { "epoch": 0.040724162417917355, "grad_norm": 0.6328125, "learning_rate": 0.0017728377944022334, "loss": 0.1714, "step": 22968 }, { "epoch": 0.04072770858322717, "grad_norm": 0.283203125, "learning_rate": 0.0017727982635005195, "loss": 0.1405, "step": 22970 }, { "epoch": 0.040731254748536984, "grad_norm": 0.3984375, "learning_rate": 0.0017727587296563767, "loss": 0.1805, "step": 22972 }, { "epoch": 0.0407348009138468, "grad_norm": 0.484375, "learning_rate": 0.001772719192869978, "loss": 0.2062, "step": 22974 }, { "epoch": 0.04073834707915661, "grad_norm": 0.7734375, "learning_rate": 0.0017726796531414967, "loss": 0.2096, "step": 22976 }, { "epoch": 0.04074189324446643, "grad_norm": 0.419921875, "learning_rate": 0.0017726401104711052, "loss": 0.1723, "step": 22978 }, { "epoch": 0.04074543940977625, "grad_norm": 0.74609375, "learning_rate": 0.0017726005648589767, "loss": 0.1873, "step": 22980 }, { "epoch": 0.040748985575086064, "grad_norm": 0.447265625, "learning_rate": 0.0017725610163052837, "loss": 0.3235, "step": 22982 }, { "epoch": 0.04075253174039588, "grad_norm": 1.3984375, "learning_rate": 0.0017725214648101997, "loss": 0.2683, "step": 22984 }, { "epoch": 0.04075607790570569, "grad_norm": 0.60546875, "learning_rate": 0.0017724819103738974, "loss": 0.1775, "step": 22986 }, { "epoch": 0.04075962407101551, "grad_norm": 1.0546875, "learning_rate": 0.0017724423529965497, "loss": 0.2562, "step": 22988 }, { "epoch": 0.04076317023632532, "grad_norm": 0.333984375, "learning_rate": 0.0017724027926783298, "loss": 0.3659, "step": 22990 }, { "epoch": 0.040766716401635136, "grad_norm": 0.462890625, "learning_rate": 0.0017723632294194107, "loss": 0.1962, "step": 22992 }, { "epoch": 0.04077026256694495, "grad_norm": 0.62890625, "learning_rate": 0.0017723236632199656, "loss": 0.2046, "step": 22994 }, { "epoch": 0.040773808732254765, "grad_norm": 0.5234375, "learning_rate": 0.0017722840940801669, "loss": 0.2446, "step": 22996 }, { "epoch": 0.04077735489756458, "grad_norm": 0.52734375, "learning_rate": 0.0017722445220001886, "loss": 0.1942, "step": 22998 }, { "epoch": 0.040780901062874395, "grad_norm": 0.404296875, "learning_rate": 0.001772204946980203, "loss": 0.1216, "step": 23000 }, { "epoch": 0.040784447228184216, "grad_norm": 0.34765625, "learning_rate": 0.0017721653690203833, "loss": 0.2388, "step": 23002 }, { "epoch": 0.04078799339349403, "grad_norm": 5.375, "learning_rate": 0.001772125788120903, "loss": 0.1738, "step": 23004 }, { "epoch": 0.040791539558803845, "grad_norm": 1.7265625, "learning_rate": 0.0017720862042819349, "loss": 0.2508, "step": 23006 }, { "epoch": 0.04079508572411366, "grad_norm": 1.046875, "learning_rate": 0.0017720466175036515, "loss": 0.3182, "step": 23008 }, { "epoch": 0.040798631889423474, "grad_norm": 0.546875, "learning_rate": 0.0017720070277862272, "loss": 0.2032, "step": 23010 }, { "epoch": 0.04080217805473329, "grad_norm": 0.50390625, "learning_rate": 0.0017719674351298342, "loss": 0.1755, "step": 23012 }, { "epoch": 0.0408057242200431, "grad_norm": 0.6796875, "learning_rate": 0.0017719278395346464, "loss": 0.2211, "step": 23014 }, { "epoch": 0.04080927038535292, "grad_norm": 0.1943359375, "learning_rate": 0.0017718882410008361, "loss": 0.1721, "step": 23016 }, { "epoch": 0.04081281655066273, "grad_norm": 0.73046875, "learning_rate": 0.0017718486395285771, "loss": 0.2704, "step": 23018 }, { "epoch": 0.04081636271597255, "grad_norm": 1.7890625, "learning_rate": 0.0017718090351180424, "loss": 0.2036, "step": 23020 }, { "epoch": 0.04081990888128236, "grad_norm": 0.279296875, "learning_rate": 0.0017717694277694053, "loss": 0.1772, "step": 23022 }, { "epoch": 0.040823455046592176, "grad_norm": 0.40625, "learning_rate": 0.001771729817482839, "loss": 0.2523, "step": 23024 }, { "epoch": 0.040827001211902, "grad_norm": 0.55859375, "learning_rate": 0.0017716902042585162, "loss": 0.1698, "step": 23026 }, { "epoch": 0.04083054737721181, "grad_norm": 4.78125, "learning_rate": 0.0017716505880966107, "loss": 0.2229, "step": 23028 }, { "epoch": 0.04083409354252163, "grad_norm": 0.97265625, "learning_rate": 0.0017716109689972959, "loss": 0.2736, "step": 23030 }, { "epoch": 0.04083763970783144, "grad_norm": 0.279296875, "learning_rate": 0.0017715713469607449, "loss": 0.2427, "step": 23032 }, { "epoch": 0.040841185873141256, "grad_norm": 0.458984375, "learning_rate": 0.0017715317219871307, "loss": 0.2546, "step": 23034 }, { "epoch": 0.04084473203845107, "grad_norm": 0.515625, "learning_rate": 0.001771492094076627, "loss": 0.2277, "step": 23036 }, { "epoch": 0.040848278203760885, "grad_norm": 0.30859375, "learning_rate": 0.0017714524632294068, "loss": 0.2006, "step": 23038 }, { "epoch": 0.0408518243690707, "grad_norm": 0.435546875, "learning_rate": 0.0017714128294456437, "loss": 0.2222, "step": 23040 }, { "epoch": 0.040855370534380514, "grad_norm": 0.4453125, "learning_rate": 0.0017713731927255108, "loss": 0.2791, "step": 23042 }, { "epoch": 0.04085891669969033, "grad_norm": 1.1484375, "learning_rate": 0.0017713335530691818, "loss": 0.2175, "step": 23044 }, { "epoch": 0.04086246286500014, "grad_norm": 3.109375, "learning_rate": 0.0017712939104768293, "loss": 0.2304, "step": 23046 }, { "epoch": 0.040866009030309965, "grad_norm": 0.61328125, "learning_rate": 0.0017712542649486274, "loss": 0.3433, "step": 23048 }, { "epoch": 0.04086955519561978, "grad_norm": 0.73046875, "learning_rate": 0.0017712146164847494, "loss": 0.2304, "step": 23050 }, { "epoch": 0.040873101360929594, "grad_norm": 1.2578125, "learning_rate": 0.001771174965085368, "loss": 0.2263, "step": 23052 }, { "epoch": 0.04087664752623941, "grad_norm": 1.0703125, "learning_rate": 0.001771135310750658, "loss": 0.2265, "step": 23054 }, { "epoch": 0.04088019369154922, "grad_norm": 0.82421875, "learning_rate": 0.0017710956534807917, "loss": 0.2856, "step": 23056 }, { "epoch": 0.04088373985685904, "grad_norm": 1.046875, "learning_rate": 0.0017710559932759425, "loss": 0.2033, "step": 23058 }, { "epoch": 0.04088728602216885, "grad_norm": 1.53125, "learning_rate": 0.0017710163301362842, "loss": 0.3443, "step": 23060 }, { "epoch": 0.040890832187478666, "grad_norm": 0.55859375, "learning_rate": 0.0017709766640619906, "loss": 0.2194, "step": 23062 }, { "epoch": 0.04089437835278848, "grad_norm": 1.1484375, "learning_rate": 0.0017709369950532346, "loss": 0.2083, "step": 23064 }, { "epoch": 0.040897924518098296, "grad_norm": 0.56640625, "learning_rate": 0.00177089732311019, "loss": 0.2354, "step": 23066 }, { "epoch": 0.04090147068340811, "grad_norm": 1.234375, "learning_rate": 0.00177085764823303, "loss": 0.2038, "step": 23068 }, { "epoch": 0.04090501684871793, "grad_norm": 2.765625, "learning_rate": 0.0017708179704219286, "loss": 0.3019, "step": 23070 }, { "epoch": 0.040908563014027746, "grad_norm": 1.2421875, "learning_rate": 0.001770778289677059, "loss": 0.2663, "step": 23072 }, { "epoch": 0.04091210917933756, "grad_norm": 0.28125, "learning_rate": 0.0017707386059985948, "loss": 0.2171, "step": 23074 }, { "epoch": 0.040915655344647375, "grad_norm": 0.53125, "learning_rate": 0.0017706989193867094, "loss": 0.1798, "step": 23076 }, { "epoch": 0.04091920150995719, "grad_norm": 0.44140625, "learning_rate": 0.0017706592298415767, "loss": 0.2424, "step": 23078 }, { "epoch": 0.040922747675267004, "grad_norm": 0.37109375, "learning_rate": 0.00177061953736337, "loss": 0.289, "step": 23080 }, { "epoch": 0.04092629384057682, "grad_norm": 0.328125, "learning_rate": 0.0017705798419522629, "loss": 0.1244, "step": 23082 }, { "epoch": 0.040929840005886634, "grad_norm": 1.21875, "learning_rate": 0.0017705401436084293, "loss": 0.2376, "step": 23084 }, { "epoch": 0.04093338617119645, "grad_norm": 0.65625, "learning_rate": 0.0017705004423320425, "loss": 0.1959, "step": 23086 }, { "epoch": 0.04093693233650626, "grad_norm": 0.609375, "learning_rate": 0.0017704607381232764, "loss": 0.239, "step": 23088 }, { "epoch": 0.04094047850181608, "grad_norm": 0.7109375, "learning_rate": 0.0017704210309823043, "loss": 0.254, "step": 23090 }, { "epoch": 0.04094402466712589, "grad_norm": 1.4453125, "learning_rate": 0.0017703813209093001, "loss": 0.2401, "step": 23092 }, { "epoch": 0.04094757083243571, "grad_norm": 0.3046875, "learning_rate": 0.0017703416079044372, "loss": 0.175, "step": 23094 }, { "epoch": 0.04095111699774553, "grad_norm": 0.64453125, "learning_rate": 0.0017703018919678895, "loss": 0.1773, "step": 23096 }, { "epoch": 0.04095466316305534, "grad_norm": 0.2734375, "learning_rate": 0.001770262173099831, "loss": 0.2244, "step": 23098 }, { "epoch": 0.04095820932836516, "grad_norm": 0.326171875, "learning_rate": 0.001770222451300435, "loss": 0.1892, "step": 23100 }, { "epoch": 0.04096175549367497, "grad_norm": 0.5078125, "learning_rate": 0.0017701827265698751, "loss": 0.1675, "step": 23102 }, { "epoch": 0.040965301658984786, "grad_norm": 0.5, "learning_rate": 0.0017701429989083256, "loss": 0.2149, "step": 23104 }, { "epoch": 0.0409688478242946, "grad_norm": 0.43359375, "learning_rate": 0.0017701032683159594, "loss": 0.2345, "step": 23106 }, { "epoch": 0.040972393989604415, "grad_norm": 0.251953125, "learning_rate": 0.0017700635347929511, "loss": 0.1745, "step": 23108 }, { "epoch": 0.04097594015491423, "grad_norm": 0.384765625, "learning_rate": 0.001770023798339474, "loss": 0.2185, "step": 23110 }, { "epoch": 0.040979486320224044, "grad_norm": 0.52734375, "learning_rate": 0.0017699840589557021, "loss": 0.1661, "step": 23112 }, { "epoch": 0.04098303248553386, "grad_norm": 0.5390625, "learning_rate": 0.0017699443166418088, "loss": 0.2182, "step": 23114 }, { "epoch": 0.04098657865084368, "grad_norm": 0.578125, "learning_rate": 0.0017699045713979687, "loss": 0.2326, "step": 23116 }, { "epoch": 0.040990124816153495, "grad_norm": 0.388671875, "learning_rate": 0.001769864823224355, "loss": 0.1849, "step": 23118 }, { "epoch": 0.04099367098146331, "grad_norm": 0.66796875, "learning_rate": 0.0017698250721211414, "loss": 0.2014, "step": 23120 }, { "epoch": 0.040997217146773124, "grad_norm": 2.703125, "learning_rate": 0.0017697853180885022, "loss": 0.4575, "step": 23122 }, { "epoch": 0.04100076331208294, "grad_norm": 0.49609375, "learning_rate": 0.001769745561126611, "loss": 0.1951, "step": 23124 }, { "epoch": 0.04100430947739275, "grad_norm": 0.396484375, "learning_rate": 0.0017697058012356415, "loss": 0.1968, "step": 23126 }, { "epoch": 0.04100785564270257, "grad_norm": 1.125, "learning_rate": 0.0017696660384157684, "loss": 0.2112, "step": 23128 }, { "epoch": 0.04101140180801238, "grad_norm": 0.6796875, "learning_rate": 0.0017696262726671645, "loss": 0.1848, "step": 23130 }, { "epoch": 0.0410149479733222, "grad_norm": 0.279296875, "learning_rate": 0.0017695865039900048, "loss": 0.1366, "step": 23132 }, { "epoch": 0.04101849413863201, "grad_norm": 0.2578125, "learning_rate": 0.001769546732384462, "loss": 0.2428, "step": 23134 }, { "epoch": 0.041022040303941826, "grad_norm": 0.57421875, "learning_rate": 0.0017695069578507112, "loss": 0.2204, "step": 23136 }, { "epoch": 0.04102558646925165, "grad_norm": 0.796875, "learning_rate": 0.001769467180388926, "loss": 0.1482, "step": 23138 }, { "epoch": 0.04102913263456146, "grad_norm": 0.333984375, "learning_rate": 0.0017694273999992799, "loss": 0.2068, "step": 23140 }, { "epoch": 0.041032678799871276, "grad_norm": 0.36328125, "learning_rate": 0.0017693876166819471, "loss": 0.2585, "step": 23142 }, { "epoch": 0.04103622496518109, "grad_norm": 1.109375, "learning_rate": 0.001769347830437102, "loss": 0.229, "step": 23144 }, { "epoch": 0.041039771130490905, "grad_norm": 0.84765625, "learning_rate": 0.0017693080412649184, "loss": 0.2148, "step": 23146 }, { "epoch": 0.04104331729580072, "grad_norm": 0.8515625, "learning_rate": 0.0017692682491655695, "loss": 0.1616, "step": 23148 }, { "epoch": 0.041046863461110535, "grad_norm": 0.3203125, "learning_rate": 0.0017692284541392306, "loss": 0.2977, "step": 23150 }, { "epoch": 0.04105040962642035, "grad_norm": 0.50390625, "learning_rate": 0.001769188656186075, "loss": 0.1748, "step": 23152 }, { "epoch": 0.041053955791730164, "grad_norm": 1.40625, "learning_rate": 0.001769148855306277, "loss": 0.2493, "step": 23154 }, { "epoch": 0.04105750195703998, "grad_norm": 0.1416015625, "learning_rate": 0.0017691090515000107, "loss": 0.1821, "step": 23156 }, { "epoch": 0.04106104812234979, "grad_norm": 0.421875, "learning_rate": 0.00176906924476745, "loss": 0.178, "step": 23158 }, { "epoch": 0.04106459428765961, "grad_norm": 5.46875, "learning_rate": 0.001769029435108769, "loss": 0.2724, "step": 23160 }, { "epoch": 0.04106814045296943, "grad_norm": 0.2734375, "learning_rate": 0.001768989622524142, "loss": 0.1936, "step": 23162 }, { "epoch": 0.04107168661827924, "grad_norm": 0.421875, "learning_rate": 0.001768949807013743, "loss": 0.1615, "step": 23164 }, { "epoch": 0.04107523278358906, "grad_norm": 0.458984375, "learning_rate": 0.0017689099885777459, "loss": 0.2878, "step": 23166 }, { "epoch": 0.04107877894889887, "grad_norm": 0.71484375, "learning_rate": 0.001768870167216325, "loss": 0.2435, "step": 23168 }, { "epoch": 0.04108232511420869, "grad_norm": 0.4609375, "learning_rate": 0.0017688303429296548, "loss": 0.2074, "step": 23170 }, { "epoch": 0.0410858712795185, "grad_norm": 0.333984375, "learning_rate": 0.0017687905157179093, "loss": 0.1941, "step": 23172 }, { "epoch": 0.041089417444828316, "grad_norm": 0.484375, "learning_rate": 0.0017687506855812623, "loss": 0.223, "step": 23174 }, { "epoch": 0.04109296361013813, "grad_norm": 0.35546875, "learning_rate": 0.0017687108525198882, "loss": 0.1845, "step": 23176 }, { "epoch": 0.041096509775447945, "grad_norm": 0.4140625, "learning_rate": 0.0017686710165339611, "loss": 0.167, "step": 23178 }, { "epoch": 0.04110005594075776, "grad_norm": 2.734375, "learning_rate": 0.001768631177623656, "loss": 0.2387, "step": 23180 }, { "epoch": 0.041103602106067574, "grad_norm": 0.294921875, "learning_rate": 0.0017685913357891462, "loss": 0.1821, "step": 23182 }, { "epoch": 0.041107148271377396, "grad_norm": 0.30078125, "learning_rate": 0.0017685514910306059, "loss": 0.2343, "step": 23184 }, { "epoch": 0.04111069443668721, "grad_norm": 0.244140625, "learning_rate": 0.0017685116433482102, "loss": 0.1413, "step": 23186 }, { "epoch": 0.041114240601997025, "grad_norm": 0.55859375, "learning_rate": 0.0017684717927421328, "loss": 0.2076, "step": 23188 }, { "epoch": 0.04111778676730684, "grad_norm": 0.4609375, "learning_rate": 0.0017684319392125478, "loss": 0.1732, "step": 23190 }, { "epoch": 0.041121332932616654, "grad_norm": 1.6875, "learning_rate": 0.00176839208275963, "loss": 0.2775, "step": 23192 }, { "epoch": 0.04112487909792647, "grad_norm": 0.6328125, "learning_rate": 0.0017683522233835534, "loss": 0.1971, "step": 23194 }, { "epoch": 0.04112842526323628, "grad_norm": 1.0703125, "learning_rate": 0.0017683123610844923, "loss": 0.2487, "step": 23196 }, { "epoch": 0.0411319714285461, "grad_norm": 3.234375, "learning_rate": 0.0017682724958626216, "loss": 0.3749, "step": 23198 }, { "epoch": 0.04113551759385591, "grad_norm": 0.26953125, "learning_rate": 0.0017682326277181147, "loss": 0.1859, "step": 23200 }, { "epoch": 0.04113906375916573, "grad_norm": 0.75, "learning_rate": 0.0017681927566511467, "loss": 0.2172, "step": 23202 }, { "epoch": 0.04114260992447554, "grad_norm": 1.15625, "learning_rate": 0.0017681528826618915, "loss": 0.1834, "step": 23204 }, { "epoch": 0.04114615608978536, "grad_norm": 0.30859375, "learning_rate": 0.001768113005750524, "loss": 0.1862, "step": 23206 }, { "epoch": 0.04114970225509518, "grad_norm": 0.427734375, "learning_rate": 0.001768073125917218, "loss": 0.2102, "step": 23208 }, { "epoch": 0.04115324842040499, "grad_norm": 1.7421875, "learning_rate": 0.0017680332431621484, "loss": 0.2361, "step": 23210 }, { "epoch": 0.04115679458571481, "grad_norm": 0.3359375, "learning_rate": 0.0017679933574854894, "loss": 0.1733, "step": 23212 }, { "epoch": 0.04116034075102462, "grad_norm": 0.79296875, "learning_rate": 0.0017679534688874155, "loss": 0.2199, "step": 23214 }, { "epoch": 0.041163886916334436, "grad_norm": 0.8828125, "learning_rate": 0.001767913577368101, "loss": 0.2479, "step": 23216 }, { "epoch": 0.04116743308164425, "grad_norm": 0.5546875, "learning_rate": 0.0017678736829277203, "loss": 0.2076, "step": 23218 }, { "epoch": 0.041170979246954065, "grad_norm": 0.48828125, "learning_rate": 0.0017678337855664486, "loss": 0.2024, "step": 23220 }, { "epoch": 0.04117452541226388, "grad_norm": 0.57421875, "learning_rate": 0.0017677938852844594, "loss": 0.182, "step": 23222 }, { "epoch": 0.041178071577573694, "grad_norm": 0.271484375, "learning_rate": 0.0017677539820819279, "loss": 0.1936, "step": 23224 }, { "epoch": 0.04118161774288351, "grad_norm": 0.439453125, "learning_rate": 0.0017677140759590282, "loss": 0.2759, "step": 23226 }, { "epoch": 0.04118516390819332, "grad_norm": 0.51171875, "learning_rate": 0.0017676741669159352, "loss": 0.316, "step": 23228 }, { "epoch": 0.041188710073503144, "grad_norm": 1.484375, "learning_rate": 0.0017676342549528229, "loss": 0.409, "step": 23230 }, { "epoch": 0.04119225623881296, "grad_norm": 0.349609375, "learning_rate": 0.001767594340069866, "loss": 0.2155, "step": 23232 }, { "epoch": 0.041195802404122774, "grad_norm": 0.486328125, "learning_rate": 0.0017675544222672398, "loss": 0.2108, "step": 23234 }, { "epoch": 0.04119934856943259, "grad_norm": 0.47265625, "learning_rate": 0.001767514501545118, "loss": 0.1795, "step": 23236 }, { "epoch": 0.0412028947347424, "grad_norm": 0.451171875, "learning_rate": 0.0017674745779036759, "loss": 0.2196, "step": 23238 }, { "epoch": 0.04120644090005222, "grad_norm": 0.68359375, "learning_rate": 0.0017674346513430871, "loss": 0.1772, "step": 23240 }, { "epoch": 0.04120998706536203, "grad_norm": 0.88671875, "learning_rate": 0.0017673947218635273, "loss": 0.2201, "step": 23242 }, { "epoch": 0.041213533230671846, "grad_norm": 0.984375, "learning_rate": 0.0017673547894651704, "loss": 0.2178, "step": 23244 }, { "epoch": 0.04121707939598166, "grad_norm": 0.86328125, "learning_rate": 0.0017673148541481913, "loss": 0.1856, "step": 23246 }, { "epoch": 0.041220625561291475, "grad_norm": 0.330078125, "learning_rate": 0.001767274915912765, "loss": 0.1576, "step": 23248 }, { "epoch": 0.04122417172660129, "grad_norm": 0.421875, "learning_rate": 0.0017672349747590655, "loss": 0.172, "step": 23250 }, { "epoch": 0.04122771789191111, "grad_norm": 0.7578125, "learning_rate": 0.0017671950306872679, "loss": 0.1967, "step": 23252 }, { "epoch": 0.041231264057220926, "grad_norm": 0.330078125, "learning_rate": 0.0017671550836975468, "loss": 0.193, "step": 23254 }, { "epoch": 0.04123481022253074, "grad_norm": 0.419921875, "learning_rate": 0.0017671151337900768, "loss": 0.2208, "step": 23256 }, { "epoch": 0.041238356387840555, "grad_norm": 0.232421875, "learning_rate": 0.0017670751809650328, "loss": 0.2057, "step": 23258 }, { "epoch": 0.04124190255315037, "grad_norm": 1.5234375, "learning_rate": 0.0017670352252225894, "loss": 0.2621, "step": 23260 }, { "epoch": 0.041245448718460184, "grad_norm": 0.271484375, "learning_rate": 0.0017669952665629212, "loss": 0.202, "step": 23262 }, { "epoch": 0.04124899488377, "grad_norm": 0.52734375, "learning_rate": 0.0017669553049862036, "loss": 0.1683, "step": 23264 }, { "epoch": 0.04125254104907981, "grad_norm": 0.2890625, "learning_rate": 0.0017669153404926104, "loss": 0.2327, "step": 23266 }, { "epoch": 0.04125608721438963, "grad_norm": 0.61328125, "learning_rate": 0.0017668753730823172, "loss": 0.1284, "step": 23268 }, { "epoch": 0.04125963337969944, "grad_norm": 0.25390625, "learning_rate": 0.0017668354027554985, "loss": 0.1926, "step": 23270 }, { "epoch": 0.04126317954500926, "grad_norm": 0.734375, "learning_rate": 0.001766795429512329, "loss": 0.155, "step": 23272 }, { "epoch": 0.04126672571031908, "grad_norm": 1.1015625, "learning_rate": 0.0017667554533529835, "loss": 0.449, "step": 23274 }, { "epoch": 0.04127027187562889, "grad_norm": 0.7578125, "learning_rate": 0.0017667154742776372, "loss": 0.226, "step": 23276 }, { "epoch": 0.04127381804093871, "grad_norm": 2.421875, "learning_rate": 0.0017666754922864645, "loss": 0.5045, "step": 23278 }, { "epoch": 0.04127736420624852, "grad_norm": 0.484375, "learning_rate": 0.0017666355073796406, "loss": 0.2501, "step": 23280 }, { "epoch": 0.04128091037155834, "grad_norm": 0.333984375, "learning_rate": 0.00176659551955734, "loss": 0.1933, "step": 23282 }, { "epoch": 0.04128445653686815, "grad_norm": 0.443359375, "learning_rate": 0.001766555528819738, "loss": 0.2044, "step": 23284 }, { "epoch": 0.041288002702177966, "grad_norm": 0.2177734375, "learning_rate": 0.001766515535167009, "loss": 0.2059, "step": 23286 }, { "epoch": 0.04129154886748778, "grad_norm": 0.67578125, "learning_rate": 0.0017664755385993285, "loss": 0.1661, "step": 23288 }, { "epoch": 0.041295095032797595, "grad_norm": 0.80078125, "learning_rate": 0.001766435539116871, "loss": 0.2243, "step": 23290 }, { "epoch": 0.04129864119810741, "grad_norm": 0.2890625, "learning_rate": 0.0017663955367198119, "loss": 0.1847, "step": 23292 }, { "epoch": 0.041302187363417224, "grad_norm": 1.6328125, "learning_rate": 0.0017663555314083254, "loss": 0.6411, "step": 23294 }, { "epoch": 0.04130573352872704, "grad_norm": 0.28515625, "learning_rate": 0.001766315523182587, "loss": 0.1896, "step": 23296 }, { "epoch": 0.04130927969403686, "grad_norm": 0.640625, "learning_rate": 0.0017662755120427713, "loss": 0.1905, "step": 23298 }, { "epoch": 0.041312825859346675, "grad_norm": 0.236328125, "learning_rate": 0.001766235497989054, "loss": 0.2125, "step": 23300 }, { "epoch": 0.04131637202465649, "grad_norm": 1.015625, "learning_rate": 0.0017661954810216092, "loss": 0.2027, "step": 23302 }, { "epoch": 0.041319918189966304, "grad_norm": 1.3359375, "learning_rate": 0.0017661554611406126, "loss": 0.1939, "step": 23304 }, { "epoch": 0.04132346435527612, "grad_norm": 0.51953125, "learning_rate": 0.001766115438346239, "loss": 0.2046, "step": 23306 }, { "epoch": 0.04132701052058593, "grad_norm": 0.314453125, "learning_rate": 0.001766075412638663, "loss": 0.1783, "step": 23308 }, { "epoch": 0.04133055668589575, "grad_norm": 0.443359375, "learning_rate": 0.0017660353840180606, "loss": 0.2686, "step": 23310 }, { "epoch": 0.04133410285120556, "grad_norm": 0.48046875, "learning_rate": 0.0017659953524846062, "loss": 0.1926, "step": 23312 }, { "epoch": 0.041337649016515376, "grad_norm": 0.78515625, "learning_rate": 0.0017659553180384747, "loss": 0.2661, "step": 23314 }, { "epoch": 0.04134119518182519, "grad_norm": 0.54296875, "learning_rate": 0.0017659152806798416, "loss": 0.2497, "step": 23316 }, { "epoch": 0.041344741347135006, "grad_norm": 0.4765625, "learning_rate": 0.001765875240408882, "loss": 0.1757, "step": 23318 }, { "epoch": 0.04134828751244483, "grad_norm": 0.453125, "learning_rate": 0.0017658351972257708, "loss": 0.1618, "step": 23320 }, { "epoch": 0.04135183367775464, "grad_norm": 0.26953125, "learning_rate": 0.0017657951511306832, "loss": 0.1876, "step": 23322 }, { "epoch": 0.041355379843064456, "grad_norm": 0.84765625, "learning_rate": 0.0017657551021237942, "loss": 0.3632, "step": 23324 }, { "epoch": 0.04135892600837427, "grad_norm": 0.83203125, "learning_rate": 0.001765715050205279, "loss": 0.2406, "step": 23326 }, { "epoch": 0.041362472173684085, "grad_norm": 0.341796875, "learning_rate": 0.0017656749953753133, "loss": 0.2043, "step": 23328 }, { "epoch": 0.0413660183389939, "grad_norm": 0.30859375, "learning_rate": 0.0017656349376340716, "loss": 0.1712, "step": 23330 }, { "epoch": 0.041369564504303714, "grad_norm": 0.34375, "learning_rate": 0.0017655948769817293, "loss": 0.1547, "step": 23332 }, { "epoch": 0.04137311066961353, "grad_norm": 0.498046875, "learning_rate": 0.0017655548134184618, "loss": 0.1571, "step": 23334 }, { "epoch": 0.041376656834923344, "grad_norm": 0.7109375, "learning_rate": 0.0017655147469444442, "loss": 0.265, "step": 23336 }, { "epoch": 0.04138020300023316, "grad_norm": 0.6484375, "learning_rate": 0.0017654746775598513, "loss": 0.2537, "step": 23338 }, { "epoch": 0.04138374916554297, "grad_norm": 1.5234375, "learning_rate": 0.0017654346052648588, "loss": 0.2825, "step": 23340 }, { "epoch": 0.041387295330852794, "grad_norm": 0.87890625, "learning_rate": 0.0017653945300596422, "loss": 0.2425, "step": 23342 }, { "epoch": 0.04139084149616261, "grad_norm": 0.60546875, "learning_rate": 0.0017653544519443762, "loss": 0.201, "step": 23344 }, { "epoch": 0.04139438766147242, "grad_norm": 1.140625, "learning_rate": 0.0017653143709192363, "loss": 0.2244, "step": 23346 }, { "epoch": 0.04139793382678224, "grad_norm": 0.37890625, "learning_rate": 0.0017652742869843977, "loss": 0.2067, "step": 23348 }, { "epoch": 0.04140147999209205, "grad_norm": 0.349609375, "learning_rate": 0.0017652342001400361, "loss": 0.2372, "step": 23350 }, { "epoch": 0.04140502615740187, "grad_norm": 0.37890625, "learning_rate": 0.0017651941103863265, "loss": 0.4055, "step": 23352 }, { "epoch": 0.04140857232271168, "grad_norm": 0.8203125, "learning_rate": 0.001765154017723444, "loss": 0.2737, "step": 23354 }, { "epoch": 0.041412118488021496, "grad_norm": 0.322265625, "learning_rate": 0.0017651139221515643, "loss": 0.1615, "step": 23356 }, { "epoch": 0.04141566465333131, "grad_norm": 0.248046875, "learning_rate": 0.0017650738236708625, "loss": 0.1665, "step": 23358 }, { "epoch": 0.041419210818641125, "grad_norm": 0.294921875, "learning_rate": 0.0017650337222815144, "loss": 0.217, "step": 23360 }, { "epoch": 0.04142275698395094, "grad_norm": 0.88671875, "learning_rate": 0.0017649936179836949, "loss": 0.176, "step": 23362 }, { "epoch": 0.041426303149260754, "grad_norm": 0.3046875, "learning_rate": 0.0017649535107775798, "loss": 0.2306, "step": 23364 }, { "epoch": 0.041429849314570576, "grad_norm": 0.66796875, "learning_rate": 0.0017649134006633442, "loss": 0.2281, "step": 23366 }, { "epoch": 0.04143339547988039, "grad_norm": 0.259765625, "learning_rate": 0.0017648732876411634, "loss": 0.2211, "step": 23368 }, { "epoch": 0.041436941645190205, "grad_norm": 1.3125, "learning_rate": 0.0017648331717112133, "loss": 0.1993, "step": 23370 }, { "epoch": 0.04144048781050002, "grad_norm": 0.53515625, "learning_rate": 0.001764793052873669, "loss": 0.1779, "step": 23372 }, { "epoch": 0.041444033975809834, "grad_norm": 0.63671875, "learning_rate": 0.0017647529311287063, "loss": 0.2384, "step": 23374 }, { "epoch": 0.04144758014111965, "grad_norm": 1.21875, "learning_rate": 0.0017647128064764998, "loss": 0.2, "step": 23376 }, { "epoch": 0.04145112630642946, "grad_norm": 0.427734375, "learning_rate": 0.0017646726789172262, "loss": 0.191, "step": 23378 }, { "epoch": 0.04145467247173928, "grad_norm": 0.30859375, "learning_rate": 0.00176463254845106, "loss": 0.3467, "step": 23380 }, { "epoch": 0.04145821863704909, "grad_norm": 0.333984375, "learning_rate": 0.001764592415078177, "loss": 0.4592, "step": 23382 }, { "epoch": 0.04146176480235891, "grad_norm": 0.337890625, "learning_rate": 0.0017645522787987534, "loss": 0.2334, "step": 23384 }, { "epoch": 0.04146531096766872, "grad_norm": 0.333984375, "learning_rate": 0.0017645121396129637, "loss": 0.1927, "step": 23386 }, { "epoch": 0.04146885713297854, "grad_norm": 0.439453125, "learning_rate": 0.0017644719975209839, "loss": 0.2189, "step": 23388 }, { "epoch": 0.04147240329828836, "grad_norm": 0.2265625, "learning_rate": 0.0017644318525229898, "loss": 0.1522, "step": 23390 }, { "epoch": 0.04147594946359817, "grad_norm": 0.65625, "learning_rate": 0.0017643917046191566, "loss": 0.2035, "step": 23392 }, { "epoch": 0.041479495628907986, "grad_norm": 1.8203125, "learning_rate": 0.00176435155380966, "loss": 0.2391, "step": 23394 }, { "epoch": 0.0414830417942178, "grad_norm": 0.50390625, "learning_rate": 0.0017643114000946759, "loss": 0.2065, "step": 23396 }, { "epoch": 0.041486587959527615, "grad_norm": 0.46875, "learning_rate": 0.001764271243474379, "loss": 0.1847, "step": 23398 }, { "epoch": 0.04149013412483743, "grad_norm": 0.625, "learning_rate": 0.0017642310839489459, "loss": 0.2559, "step": 23400 }, { "epoch": 0.041493680290147245, "grad_norm": 0.2890625, "learning_rate": 0.001764190921518552, "loss": 0.2399, "step": 23402 }, { "epoch": 0.04149722645545706, "grad_norm": 0.2060546875, "learning_rate": 0.0017641507561833726, "loss": 0.2248, "step": 23404 }, { "epoch": 0.041500772620766874, "grad_norm": 0.52734375, "learning_rate": 0.0017641105879435837, "loss": 0.1774, "step": 23406 }, { "epoch": 0.04150431878607669, "grad_norm": 0.30078125, "learning_rate": 0.0017640704167993606, "loss": 0.1816, "step": 23408 }, { "epoch": 0.04150786495138651, "grad_norm": 4.0, "learning_rate": 0.0017640302427508795, "loss": 0.1873, "step": 23410 }, { "epoch": 0.041511411116696324, "grad_norm": 0.7578125, "learning_rate": 0.0017639900657983154, "loss": 0.2587, "step": 23412 }, { "epoch": 0.04151495728200614, "grad_norm": 0.443359375, "learning_rate": 0.001763949885941845, "loss": 0.3925, "step": 23414 }, { "epoch": 0.04151850344731595, "grad_norm": 0.423828125, "learning_rate": 0.001763909703181643, "loss": 0.2428, "step": 23416 }, { "epoch": 0.04152204961262577, "grad_norm": 0.62109375, "learning_rate": 0.0017638695175178861, "loss": 0.134, "step": 23418 }, { "epoch": 0.04152559577793558, "grad_norm": 0.65625, "learning_rate": 0.0017638293289507492, "loss": 0.2133, "step": 23420 }, { "epoch": 0.0415291419432454, "grad_norm": 0.76953125, "learning_rate": 0.0017637891374804085, "loss": 0.1615, "step": 23422 }, { "epoch": 0.04153268810855521, "grad_norm": 0.30859375, "learning_rate": 0.0017637489431070396, "loss": 0.2235, "step": 23424 }, { "epoch": 0.041536234273865026, "grad_norm": 0.263671875, "learning_rate": 0.0017637087458308187, "loss": 0.1985, "step": 23426 }, { "epoch": 0.04153978043917484, "grad_norm": 0.5234375, "learning_rate": 0.0017636685456519211, "loss": 0.1823, "step": 23428 }, { "epoch": 0.041543326604484655, "grad_norm": 0.291015625, "learning_rate": 0.0017636283425705227, "loss": 0.1806, "step": 23430 }, { "epoch": 0.04154687276979447, "grad_norm": 0.30078125, "learning_rate": 0.0017635881365867993, "loss": 0.1623, "step": 23432 }, { "epoch": 0.04155041893510429, "grad_norm": 0.345703125, "learning_rate": 0.001763547927700927, "loss": 0.1854, "step": 23434 }, { "epoch": 0.041553965100414106, "grad_norm": 1.96875, "learning_rate": 0.0017635077159130815, "loss": 0.4208, "step": 23436 }, { "epoch": 0.04155751126572392, "grad_norm": 0.416015625, "learning_rate": 0.0017634675012234387, "loss": 0.2213, "step": 23438 }, { "epoch": 0.041561057431033735, "grad_norm": 0.390625, "learning_rate": 0.0017634272836321743, "loss": 0.2013, "step": 23440 }, { "epoch": 0.04156460359634355, "grad_norm": 1.015625, "learning_rate": 0.0017633870631394645, "loss": 0.2571, "step": 23442 }, { "epoch": 0.041568149761653364, "grad_norm": 0.28125, "learning_rate": 0.001763346839745485, "loss": 0.3577, "step": 23444 }, { "epoch": 0.04157169592696318, "grad_norm": 0.66796875, "learning_rate": 0.0017633066134504119, "loss": 0.2191, "step": 23446 }, { "epoch": 0.04157524209227299, "grad_norm": 0.6484375, "learning_rate": 0.0017632663842544205, "loss": 0.188, "step": 23448 }, { "epoch": 0.04157878825758281, "grad_norm": 1.1484375, "learning_rate": 0.0017632261521576876, "loss": 0.2824, "step": 23450 }, { "epoch": 0.04158233442289262, "grad_norm": 1.96875, "learning_rate": 0.0017631859171603887, "loss": 0.19, "step": 23452 }, { "epoch": 0.04158588058820244, "grad_norm": 0.5859375, "learning_rate": 0.0017631456792626997, "loss": 0.1951, "step": 23454 }, { "epoch": 0.04158942675351226, "grad_norm": 0.494140625, "learning_rate": 0.0017631054384647964, "loss": 0.2401, "step": 23456 }, { "epoch": 0.04159297291882207, "grad_norm": 0.412109375, "learning_rate": 0.0017630651947668557, "loss": 0.2227, "step": 23458 }, { "epoch": 0.04159651908413189, "grad_norm": 0.8828125, "learning_rate": 0.0017630249481690526, "loss": 0.1963, "step": 23460 }, { "epoch": 0.0416000652494417, "grad_norm": 0.51953125, "learning_rate": 0.0017629846986715637, "loss": 0.2463, "step": 23462 }, { "epoch": 0.041603611414751517, "grad_norm": 0.341796875, "learning_rate": 0.0017629444462745648, "loss": 0.2246, "step": 23464 }, { "epoch": 0.04160715758006133, "grad_norm": 0.5703125, "learning_rate": 0.001762904190978232, "loss": 0.1829, "step": 23466 }, { "epoch": 0.041610703745371146, "grad_norm": 0.40234375, "learning_rate": 0.0017628639327827412, "loss": 0.2002, "step": 23468 }, { "epoch": 0.04161424991068096, "grad_norm": 0.2451171875, "learning_rate": 0.0017628236716882685, "loss": 0.1941, "step": 23470 }, { "epoch": 0.041617796075990775, "grad_norm": 0.84375, "learning_rate": 0.0017627834076949902, "loss": 0.1834, "step": 23472 }, { "epoch": 0.04162134224130059, "grad_norm": 1.3046875, "learning_rate": 0.0017627431408030825, "loss": 0.2683, "step": 23474 }, { "epoch": 0.041624888406610404, "grad_norm": 0.3671875, "learning_rate": 0.0017627028710127208, "loss": 0.2192, "step": 23476 }, { "epoch": 0.041628434571920225, "grad_norm": 0.349609375, "learning_rate": 0.0017626625983240822, "loss": 0.1581, "step": 23478 }, { "epoch": 0.04163198073723004, "grad_norm": 0.59375, "learning_rate": 0.001762622322737342, "loss": 0.1884, "step": 23480 }, { "epoch": 0.041635526902539854, "grad_norm": 0.291015625, "learning_rate": 0.0017625820442526768, "loss": 0.1589, "step": 23482 }, { "epoch": 0.04163907306784967, "grad_norm": 1.0, "learning_rate": 0.0017625417628702625, "loss": 0.2005, "step": 23484 }, { "epoch": 0.041642619233159484, "grad_norm": 0.33203125, "learning_rate": 0.0017625014785902756, "loss": 0.2145, "step": 23486 }, { "epoch": 0.0416461653984693, "grad_norm": 0.41796875, "learning_rate": 0.001762461191412892, "loss": 0.2566, "step": 23488 }, { "epoch": 0.04164971156377911, "grad_norm": 0.58203125, "learning_rate": 0.0017624209013382874, "loss": 0.2069, "step": 23490 }, { "epoch": 0.04165325772908893, "grad_norm": 0.50390625, "learning_rate": 0.0017623806083666392, "loss": 0.2237, "step": 23492 }, { "epoch": 0.04165680389439874, "grad_norm": 0.6640625, "learning_rate": 0.0017623403124981225, "loss": 0.2271, "step": 23494 }, { "epoch": 0.041660350059708556, "grad_norm": 0.50390625, "learning_rate": 0.0017623000137329144, "loss": 0.2027, "step": 23496 }, { "epoch": 0.04166389622501837, "grad_norm": 0.271484375, "learning_rate": 0.0017622597120711906, "loss": 0.264, "step": 23498 }, { "epoch": 0.041667442390328185, "grad_norm": 0.52734375, "learning_rate": 0.0017622194075131274, "loss": 0.2279, "step": 23500 }, { "epoch": 0.04167098855563801, "grad_norm": 0.330078125, "learning_rate": 0.0017621791000589015, "loss": 0.2074, "step": 23502 }, { "epoch": 0.04167453472094782, "grad_norm": 0.443359375, "learning_rate": 0.0017621387897086885, "loss": 0.2275, "step": 23504 }, { "epoch": 0.041678080886257636, "grad_norm": 0.2470703125, "learning_rate": 0.0017620984764626652, "loss": 0.2362, "step": 23506 }, { "epoch": 0.04168162705156745, "grad_norm": 0.40625, "learning_rate": 0.0017620581603210077, "loss": 0.1797, "step": 23508 }, { "epoch": 0.041685173216877265, "grad_norm": 1.8046875, "learning_rate": 0.0017620178412838926, "loss": 0.3199, "step": 23510 }, { "epoch": 0.04168871938218708, "grad_norm": 1.703125, "learning_rate": 0.0017619775193514957, "loss": 0.3936, "step": 23512 }, { "epoch": 0.041692265547496894, "grad_norm": 0.8515625, "learning_rate": 0.0017619371945239942, "loss": 0.2107, "step": 23514 }, { "epoch": 0.04169581171280671, "grad_norm": 0.5703125, "learning_rate": 0.0017618968668015632, "loss": 0.2593, "step": 23516 }, { "epoch": 0.04169935787811652, "grad_norm": 2.09375, "learning_rate": 0.0017618565361843801, "loss": 0.193, "step": 23518 }, { "epoch": 0.04170290404342634, "grad_norm": 0.255859375, "learning_rate": 0.001761816202672621, "loss": 0.2088, "step": 23520 }, { "epoch": 0.04170645020873615, "grad_norm": 2.109375, "learning_rate": 0.0017617758662664621, "loss": 0.304, "step": 23522 }, { "epoch": 0.041709996374045974, "grad_norm": 0.73046875, "learning_rate": 0.00176173552696608, "loss": 0.149, "step": 23524 }, { "epoch": 0.04171354253935579, "grad_norm": 0.318359375, "learning_rate": 0.0017616951847716513, "loss": 0.2706, "step": 23526 }, { "epoch": 0.0417170887046656, "grad_norm": 0.5078125, "learning_rate": 0.0017616548396833522, "loss": 0.1907, "step": 23528 }, { "epoch": 0.04172063486997542, "grad_norm": 2.03125, "learning_rate": 0.001761614491701359, "loss": 0.2211, "step": 23530 }, { "epoch": 0.04172418103528523, "grad_norm": 0.51953125, "learning_rate": 0.0017615741408258483, "loss": 0.1828, "step": 23532 }, { "epoch": 0.04172772720059505, "grad_norm": 0.6953125, "learning_rate": 0.0017615337870569966, "loss": 0.2412, "step": 23534 }, { "epoch": 0.04173127336590486, "grad_norm": 0.60546875, "learning_rate": 0.0017614934303949808, "loss": 0.2206, "step": 23536 }, { "epoch": 0.041734819531214676, "grad_norm": 0.40234375, "learning_rate": 0.0017614530708399763, "loss": 0.2399, "step": 23538 }, { "epoch": 0.04173836569652449, "grad_norm": 0.36328125, "learning_rate": 0.0017614127083921606, "loss": 0.1973, "step": 23540 }, { "epoch": 0.041741911861834305, "grad_norm": 0.46484375, "learning_rate": 0.00176137234305171, "loss": 0.178, "step": 23542 }, { "epoch": 0.04174545802714412, "grad_norm": 0.75390625, "learning_rate": 0.0017613319748188009, "loss": 0.1737, "step": 23544 }, { "epoch": 0.04174900419245394, "grad_norm": 0.73046875, "learning_rate": 0.0017612916036936098, "loss": 0.2034, "step": 23546 }, { "epoch": 0.041752550357763756, "grad_norm": 0.251953125, "learning_rate": 0.0017612512296763135, "loss": 0.2177, "step": 23548 }, { "epoch": 0.04175609652307357, "grad_norm": 0.2890625, "learning_rate": 0.0017612108527670883, "loss": 0.2208, "step": 23550 }, { "epoch": 0.041759642688383385, "grad_norm": 0.625, "learning_rate": 0.0017611704729661108, "loss": 0.2261, "step": 23552 }, { "epoch": 0.0417631888536932, "grad_norm": 0.279296875, "learning_rate": 0.0017611300902735578, "loss": 0.1859, "step": 23554 }, { "epoch": 0.041766735019003014, "grad_norm": 0.57421875, "learning_rate": 0.001761089704689606, "loss": 0.2082, "step": 23556 }, { "epoch": 0.04177028118431283, "grad_norm": 0.5703125, "learning_rate": 0.0017610493162144316, "loss": 0.4987, "step": 23558 }, { "epoch": 0.04177382734962264, "grad_norm": 0.45703125, "learning_rate": 0.0017610089248482116, "loss": 0.216, "step": 23560 }, { "epoch": 0.04177737351493246, "grad_norm": 0.54296875, "learning_rate": 0.0017609685305911226, "loss": 0.1714, "step": 23562 }, { "epoch": 0.04178091968024227, "grad_norm": 0.48828125, "learning_rate": 0.001760928133443341, "loss": 0.1958, "step": 23564 }, { "epoch": 0.041784465845552086, "grad_norm": 0.443359375, "learning_rate": 0.0017608877334050438, "loss": 0.3131, "step": 23566 }, { "epoch": 0.0417880120108619, "grad_norm": 0.283203125, "learning_rate": 0.0017608473304764074, "loss": 0.2163, "step": 23568 }, { "epoch": 0.04179155817617172, "grad_norm": 0.44140625, "learning_rate": 0.0017608069246576087, "loss": 0.2302, "step": 23570 }, { "epoch": 0.04179510434148154, "grad_norm": 1.953125, "learning_rate": 0.0017607665159488244, "loss": 0.2463, "step": 23572 }, { "epoch": 0.04179865050679135, "grad_norm": 0.703125, "learning_rate": 0.001760726104350231, "loss": 0.2713, "step": 23574 }, { "epoch": 0.041802196672101166, "grad_norm": 0.53125, "learning_rate": 0.0017606856898620055, "loss": 0.1806, "step": 23576 }, { "epoch": 0.04180574283741098, "grad_norm": 1.21875, "learning_rate": 0.0017606452724843247, "loss": 0.1866, "step": 23578 }, { "epoch": 0.041809289002720795, "grad_norm": 1.2890625, "learning_rate": 0.0017606048522173653, "loss": 0.2577, "step": 23580 }, { "epoch": 0.04181283516803061, "grad_norm": 0.625, "learning_rate": 0.001760564429061304, "loss": 0.2348, "step": 23582 }, { "epoch": 0.041816381333340424, "grad_norm": 0.31640625, "learning_rate": 0.0017605240030163173, "loss": 0.1864, "step": 23584 }, { "epoch": 0.04181992749865024, "grad_norm": 0.2734375, "learning_rate": 0.0017604835740825824, "loss": 0.3181, "step": 23586 }, { "epoch": 0.041823473663960054, "grad_norm": 0.341796875, "learning_rate": 0.001760443142260276, "loss": 0.2285, "step": 23588 }, { "epoch": 0.04182701982926987, "grad_norm": 12.25, "learning_rate": 0.0017604027075495748, "loss": 0.2903, "step": 23590 }, { "epoch": 0.04183056599457969, "grad_norm": 0.21875, "learning_rate": 0.001760362269950656, "loss": 0.1899, "step": 23592 }, { "epoch": 0.041834112159889504, "grad_norm": 0.53125, "learning_rate": 0.0017603218294636963, "loss": 0.2321, "step": 23594 }, { "epoch": 0.04183765832519932, "grad_norm": 0.5390625, "learning_rate": 0.0017602813860888723, "loss": 0.1917, "step": 23596 }, { "epoch": 0.04184120449050913, "grad_norm": 0.5078125, "learning_rate": 0.001760240939826361, "loss": 0.249, "step": 23598 }, { "epoch": 0.04184475065581895, "grad_norm": 1.8515625, "learning_rate": 0.0017602004906763394, "loss": 0.2614, "step": 23600 }, { "epoch": 0.04184829682112876, "grad_norm": 0.341796875, "learning_rate": 0.0017601600386389844, "loss": 0.2456, "step": 23602 }, { "epoch": 0.04185184298643858, "grad_norm": 0.2275390625, "learning_rate": 0.0017601195837144727, "loss": 0.2004, "step": 23604 }, { "epoch": 0.04185538915174839, "grad_norm": 2.609375, "learning_rate": 0.0017600791259029813, "loss": 0.3607, "step": 23606 }, { "epoch": 0.041858935317058206, "grad_norm": 0.37109375, "learning_rate": 0.0017600386652046876, "loss": 0.2111, "step": 23608 }, { "epoch": 0.04186248148236802, "grad_norm": 0.47265625, "learning_rate": 0.0017599982016197682, "loss": 0.1758, "step": 23610 }, { "epoch": 0.041866027647677835, "grad_norm": 0.32421875, "learning_rate": 0.0017599577351483995, "loss": 0.183, "step": 23612 }, { "epoch": 0.04186957381298766, "grad_norm": 0.296875, "learning_rate": 0.0017599172657907596, "loss": 0.2225, "step": 23614 }, { "epoch": 0.04187311997829747, "grad_norm": 0.7734375, "learning_rate": 0.0017598767935470246, "loss": 0.1616, "step": 23616 }, { "epoch": 0.041876666143607286, "grad_norm": 0.416015625, "learning_rate": 0.0017598363184173718, "loss": 0.1702, "step": 23618 }, { "epoch": 0.0418802123089171, "grad_norm": 0.328125, "learning_rate": 0.0017597958404019782, "loss": 0.1603, "step": 23620 }, { "epoch": 0.041883758474226915, "grad_norm": 0.296875, "learning_rate": 0.001759755359501021, "loss": 0.3313, "step": 23622 }, { "epoch": 0.04188730463953673, "grad_norm": 1.7421875, "learning_rate": 0.001759714875714677, "loss": 0.1736, "step": 23624 }, { "epoch": 0.041890850804846544, "grad_norm": 0.298828125, "learning_rate": 0.0017596743890431236, "loss": 0.3156, "step": 23626 }, { "epoch": 0.04189439697015636, "grad_norm": 0.67578125, "learning_rate": 0.0017596338994865374, "loss": 0.1944, "step": 23628 }, { "epoch": 0.04189794313546617, "grad_norm": 1.078125, "learning_rate": 0.0017595934070450954, "loss": 0.1899, "step": 23630 }, { "epoch": 0.04190148930077599, "grad_norm": 2.96875, "learning_rate": 0.0017595529117189754, "loss": 0.2606, "step": 23632 }, { "epoch": 0.0419050354660858, "grad_norm": 0.482421875, "learning_rate": 0.0017595124135083538, "loss": 0.1687, "step": 23634 }, { "epoch": 0.04190858163139562, "grad_norm": 0.38671875, "learning_rate": 0.001759471912413408, "loss": 0.1878, "step": 23636 }, { "epoch": 0.04191212779670544, "grad_norm": 0.298828125, "learning_rate": 0.0017594314084343153, "loss": 0.2149, "step": 23638 }, { "epoch": 0.04191567396201525, "grad_norm": 2.328125, "learning_rate": 0.0017593909015712525, "loss": 0.3939, "step": 23640 }, { "epoch": 0.04191922012732507, "grad_norm": 0.5703125, "learning_rate": 0.0017593503918243972, "loss": 0.1832, "step": 23642 }, { "epoch": 0.04192276629263488, "grad_norm": 0.578125, "learning_rate": 0.001759309879193926, "loss": 0.2584, "step": 23644 }, { "epoch": 0.041926312457944696, "grad_norm": 0.421875, "learning_rate": 0.0017592693636800164, "loss": 0.1827, "step": 23646 }, { "epoch": 0.04192985862325451, "grad_norm": 0.345703125, "learning_rate": 0.0017592288452828455, "loss": 0.3018, "step": 23648 }, { "epoch": 0.041933404788564325, "grad_norm": 0.890625, "learning_rate": 0.0017591883240025907, "loss": 0.2611, "step": 23650 }, { "epoch": 0.04193695095387414, "grad_norm": 0.61328125, "learning_rate": 0.0017591477998394292, "loss": 0.2099, "step": 23652 }, { "epoch": 0.041940497119183955, "grad_norm": 0.359375, "learning_rate": 0.0017591072727935377, "loss": 0.194, "step": 23654 }, { "epoch": 0.04194404328449377, "grad_norm": 0.400390625, "learning_rate": 0.0017590667428650942, "loss": 0.2605, "step": 23656 }, { "epoch": 0.041947589449803584, "grad_norm": 0.78515625, "learning_rate": 0.0017590262100542753, "loss": 0.264, "step": 23658 }, { "epoch": 0.041951135615113405, "grad_norm": 2.234375, "learning_rate": 0.001758985674361259, "loss": 0.5202, "step": 23660 }, { "epoch": 0.04195468178042322, "grad_norm": 0.87890625, "learning_rate": 0.001758945135786222, "loss": 0.2748, "step": 23662 }, { "epoch": 0.041958227945733034, "grad_norm": 0.9609375, "learning_rate": 0.0017589045943293413, "loss": 0.2368, "step": 23664 }, { "epoch": 0.04196177411104285, "grad_norm": 0.69921875, "learning_rate": 0.0017588640499907949, "loss": 0.1768, "step": 23666 }, { "epoch": 0.04196532027635266, "grad_norm": 0.42578125, "learning_rate": 0.00175882350277076, "loss": 0.2313, "step": 23668 }, { "epoch": 0.04196886644166248, "grad_norm": 0.2734375, "learning_rate": 0.0017587829526694138, "loss": 0.1453, "step": 23670 }, { "epoch": 0.04197241260697229, "grad_norm": 1.1796875, "learning_rate": 0.0017587423996869335, "loss": 0.3998, "step": 23672 }, { "epoch": 0.04197595877228211, "grad_norm": 0.224609375, "learning_rate": 0.0017587018438234966, "loss": 0.1915, "step": 23674 }, { "epoch": 0.04197950493759192, "grad_norm": 0.625, "learning_rate": 0.0017586612850792804, "loss": 0.2312, "step": 23676 }, { "epoch": 0.041983051102901736, "grad_norm": 1.28125, "learning_rate": 0.0017586207234544626, "loss": 0.2724, "step": 23678 }, { "epoch": 0.04198659726821155, "grad_norm": 0.251953125, "learning_rate": 0.0017585801589492201, "loss": 0.1967, "step": 23680 }, { "epoch": 0.04199014343352137, "grad_norm": 0.314453125, "learning_rate": 0.0017585395915637307, "loss": 0.2319, "step": 23682 }, { "epoch": 0.04199368959883119, "grad_norm": 0.291015625, "learning_rate": 0.0017584990212981715, "loss": 0.17, "step": 23684 }, { "epoch": 0.041997235764141, "grad_norm": 1.0625, "learning_rate": 0.0017584584481527199, "loss": 0.2614, "step": 23686 }, { "epoch": 0.042000781929450816, "grad_norm": 0.41015625, "learning_rate": 0.001758417872127554, "loss": 0.2044, "step": 23688 }, { "epoch": 0.04200432809476063, "grad_norm": 0.216796875, "learning_rate": 0.0017583772932228503, "loss": 0.2275, "step": 23690 }, { "epoch": 0.042007874260070445, "grad_norm": 2.6875, "learning_rate": 0.001758336711438787, "loss": 0.4451, "step": 23692 }, { "epoch": 0.04201142042538026, "grad_norm": 0.73828125, "learning_rate": 0.0017582961267755414, "loss": 0.3382, "step": 23694 }, { "epoch": 0.042014966590690074, "grad_norm": 0.45703125, "learning_rate": 0.0017582555392332906, "loss": 0.3099, "step": 23696 }, { "epoch": 0.04201851275599989, "grad_norm": 1.875, "learning_rate": 0.0017582149488122127, "loss": 0.2192, "step": 23698 }, { "epoch": 0.0420220589213097, "grad_norm": 0.8671875, "learning_rate": 0.001758174355512485, "loss": 0.2031, "step": 23700 }, { "epoch": 0.04202560508661952, "grad_norm": 0.609375, "learning_rate": 0.001758133759334285, "loss": 0.2054, "step": 23702 }, { "epoch": 0.04202915125192933, "grad_norm": 0.31640625, "learning_rate": 0.00175809316027779, "loss": 0.2175, "step": 23704 }, { "epoch": 0.042032697417239154, "grad_norm": 1.453125, "learning_rate": 0.0017580525583431779, "loss": 0.3159, "step": 23706 }, { "epoch": 0.04203624358254897, "grad_norm": 0.376953125, "learning_rate": 0.0017580119535306263, "loss": 0.1843, "step": 23708 }, { "epoch": 0.04203978974785878, "grad_norm": 0.388671875, "learning_rate": 0.0017579713458403124, "loss": 0.1867, "step": 23710 }, { "epoch": 0.0420433359131686, "grad_norm": 0.3203125, "learning_rate": 0.0017579307352724141, "loss": 0.2282, "step": 23712 }, { "epoch": 0.04204688207847841, "grad_norm": 0.310546875, "learning_rate": 0.0017578901218271087, "loss": 0.1697, "step": 23714 }, { "epoch": 0.042050428243788227, "grad_norm": 0.271484375, "learning_rate": 0.0017578495055045747, "loss": 0.2352, "step": 23716 }, { "epoch": 0.04205397440909804, "grad_norm": 0.3203125, "learning_rate": 0.0017578088863049886, "loss": 0.1852, "step": 23718 }, { "epoch": 0.042057520574407856, "grad_norm": 0.59765625, "learning_rate": 0.0017577682642285284, "loss": 0.2491, "step": 23720 }, { "epoch": 0.04206106673971767, "grad_norm": 0.296875, "learning_rate": 0.0017577276392753722, "loss": 0.219, "step": 23722 }, { "epoch": 0.042064612905027485, "grad_norm": 0.455078125, "learning_rate": 0.0017576870114456972, "loss": 0.1778, "step": 23724 }, { "epoch": 0.0420681590703373, "grad_norm": 1.5703125, "learning_rate": 0.0017576463807396814, "loss": 0.3497, "step": 23726 }, { "epoch": 0.04207170523564712, "grad_norm": 0.734375, "learning_rate": 0.0017576057471575023, "loss": 0.3019, "step": 23728 }, { "epoch": 0.042075251400956935, "grad_norm": 0.40625, "learning_rate": 0.0017575651106993373, "loss": 0.2189, "step": 23730 }, { "epoch": 0.04207879756626675, "grad_norm": 1.3125, "learning_rate": 0.001757524471365365, "loss": 0.2363, "step": 23732 }, { "epoch": 0.042082343731576564, "grad_norm": 0.359375, "learning_rate": 0.0017574838291557625, "loss": 0.2031, "step": 23734 }, { "epoch": 0.04208588989688638, "grad_norm": 1.21875, "learning_rate": 0.0017574431840707075, "loss": 0.1993, "step": 23736 }, { "epoch": 0.042089436062196194, "grad_norm": 0.3671875, "learning_rate": 0.0017574025361103778, "loss": 0.2137, "step": 23738 }, { "epoch": 0.04209298222750601, "grad_norm": 0.271484375, "learning_rate": 0.0017573618852749513, "loss": 0.2322, "step": 23740 }, { "epoch": 0.04209652839281582, "grad_norm": 0.5703125, "learning_rate": 0.001757321231564606, "loss": 0.1567, "step": 23742 }, { "epoch": 0.04210007455812564, "grad_norm": 0.74609375, "learning_rate": 0.0017572805749795192, "loss": 0.2025, "step": 23744 }, { "epoch": 0.04210362072343545, "grad_norm": 0.515625, "learning_rate": 0.001757239915519869, "loss": 0.1775, "step": 23746 }, { "epoch": 0.042107166888745266, "grad_norm": 0.625, "learning_rate": 0.0017571992531858333, "loss": 0.2102, "step": 23748 }, { "epoch": 0.04211071305405509, "grad_norm": 0.7578125, "learning_rate": 0.0017571585879775898, "loss": 0.2448, "step": 23750 }, { "epoch": 0.0421142592193649, "grad_norm": 0.62109375, "learning_rate": 0.001757117919895316, "loss": 0.1823, "step": 23752 }, { "epoch": 0.04211780538467472, "grad_norm": 0.2890625, "learning_rate": 0.0017570772489391906, "loss": 0.1865, "step": 23754 }, { "epoch": 0.04212135154998453, "grad_norm": 0.216796875, "learning_rate": 0.001757036575109391, "loss": 0.1847, "step": 23756 }, { "epoch": 0.042124897715294346, "grad_norm": 0.7734375, "learning_rate": 0.0017569958984060947, "loss": 0.1851, "step": 23758 }, { "epoch": 0.04212844388060416, "grad_norm": 0.416015625, "learning_rate": 0.0017569552188294799, "loss": 0.2543, "step": 23760 }, { "epoch": 0.042131990045913975, "grad_norm": 0.87890625, "learning_rate": 0.001756914536379725, "loss": 0.2409, "step": 23762 }, { "epoch": 0.04213553621122379, "grad_norm": 1.046875, "learning_rate": 0.0017568738510570074, "loss": 0.1804, "step": 23764 }, { "epoch": 0.042139082376533604, "grad_norm": 2.578125, "learning_rate": 0.001756833162861505, "loss": 0.3695, "step": 23766 }, { "epoch": 0.04214262854184342, "grad_norm": 0.94140625, "learning_rate": 0.0017567924717933959, "loss": 0.2674, "step": 23768 }, { "epoch": 0.04214617470715323, "grad_norm": 0.3125, "learning_rate": 0.001756751777852858, "loss": 0.24, "step": 23770 }, { "epoch": 0.04214972087246305, "grad_norm": 0.3359375, "learning_rate": 0.0017567110810400694, "loss": 0.2174, "step": 23772 }, { "epoch": 0.04215326703777287, "grad_norm": 0.5390625, "learning_rate": 0.0017566703813552082, "loss": 0.2044, "step": 23774 }, { "epoch": 0.042156813203082684, "grad_norm": 0.181640625, "learning_rate": 0.001756629678798452, "loss": 0.1134, "step": 23776 }, { "epoch": 0.0421603593683925, "grad_norm": 0.82421875, "learning_rate": 0.0017565889733699787, "loss": 0.259, "step": 23778 }, { "epoch": 0.04216390553370231, "grad_norm": 0.48828125, "learning_rate": 0.0017565482650699672, "loss": 0.2334, "step": 23780 }, { "epoch": 0.04216745169901213, "grad_norm": 0.65234375, "learning_rate": 0.0017565075538985946, "loss": 0.2257, "step": 23782 }, { "epoch": 0.04217099786432194, "grad_norm": 0.5, "learning_rate": 0.0017564668398560392, "loss": 0.1982, "step": 23784 }, { "epoch": 0.04217454402963176, "grad_norm": 0.90234375, "learning_rate": 0.001756426122942479, "loss": 0.1686, "step": 23786 }, { "epoch": 0.04217809019494157, "grad_norm": 0.341796875, "learning_rate": 0.001756385403158093, "loss": 0.1941, "step": 23788 }, { "epoch": 0.042181636360251386, "grad_norm": 0.416015625, "learning_rate": 0.0017563446805030582, "loss": 0.229, "step": 23790 }, { "epoch": 0.0421851825255612, "grad_norm": 0.578125, "learning_rate": 0.0017563039549775526, "loss": 0.1884, "step": 23792 }, { "epoch": 0.042188728690871015, "grad_norm": 0.2216796875, "learning_rate": 0.001756263226581755, "loss": 0.1616, "step": 23794 }, { "epoch": 0.042192274856180836, "grad_norm": 0.416015625, "learning_rate": 0.0017562224953158432, "loss": 0.1863, "step": 23796 }, { "epoch": 0.04219582102149065, "grad_norm": 0.65625, "learning_rate": 0.0017561817611799957, "loss": 0.2226, "step": 23798 }, { "epoch": 0.042199367186800466, "grad_norm": 0.453125, "learning_rate": 0.00175614102417439, "loss": 0.2248, "step": 23800 }, { "epoch": 0.04220291335211028, "grad_norm": 0.416015625, "learning_rate": 0.0017561002842992046, "loss": 0.1973, "step": 23802 }, { "epoch": 0.042206459517420095, "grad_norm": 0.349609375, "learning_rate": 0.001756059541554618, "loss": 0.2179, "step": 23804 }, { "epoch": 0.04221000568272991, "grad_norm": 0.283203125, "learning_rate": 0.0017560187959408077, "loss": 0.2762, "step": 23806 }, { "epoch": 0.042213551848039724, "grad_norm": 0.435546875, "learning_rate": 0.0017559780474579523, "loss": 0.169, "step": 23808 }, { "epoch": 0.04221709801334954, "grad_norm": 1.0390625, "learning_rate": 0.00175593729610623, "loss": 0.206, "step": 23810 }, { "epoch": 0.04222064417865935, "grad_norm": 0.412109375, "learning_rate": 0.001755896541885819, "loss": 0.2448, "step": 23812 }, { "epoch": 0.04222419034396917, "grad_norm": 0.640625, "learning_rate": 0.0017558557847968973, "loss": 0.2077, "step": 23814 }, { "epoch": 0.04222773650927898, "grad_norm": 0.9609375, "learning_rate": 0.0017558150248396436, "loss": 0.2575, "step": 23816 }, { "epoch": 0.0422312826745888, "grad_norm": 0.1787109375, "learning_rate": 0.001755774262014236, "loss": 0.1484, "step": 23818 }, { "epoch": 0.04223482883989862, "grad_norm": 0.408203125, "learning_rate": 0.0017557334963208525, "loss": 0.2298, "step": 23820 }, { "epoch": 0.04223837500520843, "grad_norm": 0.578125, "learning_rate": 0.0017556927277596714, "loss": 0.2882, "step": 23822 }, { "epoch": 0.04224192117051825, "grad_norm": 2.890625, "learning_rate": 0.0017556519563308717, "loss": 0.292, "step": 23824 }, { "epoch": 0.04224546733582806, "grad_norm": 0.259765625, "learning_rate": 0.001755611182034631, "loss": 0.1942, "step": 23826 }, { "epoch": 0.042249013501137876, "grad_norm": 0.7421875, "learning_rate": 0.0017555704048711276, "loss": 0.2217, "step": 23828 }, { "epoch": 0.04225255966644769, "grad_norm": 0.224609375, "learning_rate": 0.0017555296248405405, "loss": 0.1618, "step": 23830 }, { "epoch": 0.042256105831757505, "grad_norm": 0.33984375, "learning_rate": 0.0017554888419430471, "loss": 0.2221, "step": 23832 }, { "epoch": 0.04225965199706732, "grad_norm": 0.50390625, "learning_rate": 0.0017554480561788265, "loss": 0.1594, "step": 23834 }, { "epoch": 0.042263198162377134, "grad_norm": 0.515625, "learning_rate": 0.0017554072675480565, "loss": 0.2104, "step": 23836 }, { "epoch": 0.04226674432768695, "grad_norm": 0.412109375, "learning_rate": 0.0017553664760509165, "loss": 0.2439, "step": 23838 }, { "epoch": 0.042270290492996763, "grad_norm": 0.70703125, "learning_rate": 0.0017553256816875838, "loss": 0.1477, "step": 23840 }, { "epoch": 0.042273836658306585, "grad_norm": 0.7421875, "learning_rate": 0.0017552848844582369, "loss": 0.2293, "step": 23842 }, { "epoch": 0.0422773828236164, "grad_norm": 0.455078125, "learning_rate": 0.001755244084363055, "loss": 0.2671, "step": 23844 }, { "epoch": 0.042280928988926214, "grad_norm": 1.0, "learning_rate": 0.001755203281402216, "loss": 0.1841, "step": 23846 }, { "epoch": 0.04228447515423603, "grad_norm": 0.89453125, "learning_rate": 0.0017551624755758985, "loss": 0.2281, "step": 23848 }, { "epoch": 0.04228802131954584, "grad_norm": 0.2490234375, "learning_rate": 0.0017551216668842805, "loss": 0.1636, "step": 23850 }, { "epoch": 0.04229156748485566, "grad_norm": 0.421875, "learning_rate": 0.0017550808553275414, "loss": 0.3511, "step": 23852 }, { "epoch": 0.04229511365016547, "grad_norm": 0.63671875, "learning_rate": 0.001755040040905859, "loss": 0.1945, "step": 23854 }, { "epoch": 0.04229865981547529, "grad_norm": 2.71875, "learning_rate": 0.001754999223619412, "loss": 0.3168, "step": 23856 }, { "epoch": 0.0423022059807851, "grad_norm": 1.03125, "learning_rate": 0.0017549584034683788, "loss": 0.2384, "step": 23858 }, { "epoch": 0.042305752146094916, "grad_norm": 1.984375, "learning_rate": 0.0017549175804529378, "loss": 0.2456, "step": 23860 }, { "epoch": 0.04230929831140473, "grad_norm": 0.6015625, "learning_rate": 0.001754876754573268, "loss": 0.1457, "step": 23862 }, { "epoch": 0.04231284447671455, "grad_norm": 0.953125, "learning_rate": 0.0017548359258295475, "loss": 0.308, "step": 23864 }, { "epoch": 0.04231639064202437, "grad_norm": 0.35546875, "learning_rate": 0.001754795094221955, "loss": 0.2502, "step": 23866 }, { "epoch": 0.04231993680733418, "grad_norm": 0.51171875, "learning_rate": 0.0017547542597506692, "loss": 0.1906, "step": 23868 }, { "epoch": 0.042323482972643996, "grad_norm": 0.58984375, "learning_rate": 0.0017547134224158685, "loss": 0.2356, "step": 23870 }, { "epoch": 0.04232702913795381, "grad_norm": 1.1171875, "learning_rate": 0.0017546725822177318, "loss": 0.2045, "step": 23872 }, { "epoch": 0.042330575303263625, "grad_norm": 0.6796875, "learning_rate": 0.0017546317391564372, "loss": 0.1401, "step": 23874 }, { "epoch": 0.04233412146857344, "grad_norm": 0.498046875, "learning_rate": 0.0017545908932321641, "loss": 0.2598, "step": 23876 }, { "epoch": 0.042337667633883254, "grad_norm": 0.59375, "learning_rate": 0.0017545500444450903, "loss": 0.2051, "step": 23878 }, { "epoch": 0.04234121379919307, "grad_norm": 0.2890625, "learning_rate": 0.001754509192795395, "loss": 0.184, "step": 23880 }, { "epoch": 0.04234475996450288, "grad_norm": 1.1484375, "learning_rate": 0.0017544683382832566, "loss": 0.3238, "step": 23882 }, { "epoch": 0.0423483061298127, "grad_norm": 0.357421875, "learning_rate": 0.0017544274809088533, "loss": 0.3192, "step": 23884 }, { "epoch": 0.04235185229512252, "grad_norm": 2.109375, "learning_rate": 0.001754386620672365, "loss": 0.4365, "step": 23886 }, { "epoch": 0.042355398460432334, "grad_norm": 0.69921875, "learning_rate": 0.00175434575757397, "loss": 0.2085, "step": 23888 }, { "epoch": 0.04235894462574215, "grad_norm": 0.451171875, "learning_rate": 0.001754304891613846, "loss": 0.244, "step": 23890 }, { "epoch": 0.04236249079105196, "grad_norm": 0.1650390625, "learning_rate": 0.0017542640227921726, "loss": 0.3557, "step": 23892 }, { "epoch": 0.04236603695636178, "grad_norm": 0.69921875, "learning_rate": 0.0017542231511091287, "loss": 0.2205, "step": 23894 }, { "epoch": 0.04236958312167159, "grad_norm": 1.2890625, "learning_rate": 0.0017541822765648927, "loss": 0.2919, "step": 23896 }, { "epoch": 0.042373129286981406, "grad_norm": 0.458984375, "learning_rate": 0.0017541413991596433, "loss": 0.2988, "step": 23898 }, { "epoch": 0.04237667545229122, "grad_norm": 0.89453125, "learning_rate": 0.0017541005188935594, "loss": 0.2382, "step": 23900 }, { "epoch": 0.042380221617601035, "grad_norm": 0.33203125, "learning_rate": 0.00175405963576682, "loss": 0.1706, "step": 23902 }, { "epoch": 0.04238376778291085, "grad_norm": 0.2451171875, "learning_rate": 0.0017540187497796034, "loss": 0.2317, "step": 23904 }, { "epoch": 0.042387313948220665, "grad_norm": 1.140625, "learning_rate": 0.0017539778609320892, "loss": 0.2345, "step": 23906 }, { "epoch": 0.04239086011353048, "grad_norm": 0.474609375, "learning_rate": 0.001753936969224455, "loss": 0.2592, "step": 23908 }, { "epoch": 0.0423944062788403, "grad_norm": 0.345703125, "learning_rate": 0.0017538960746568807, "loss": 0.1874, "step": 23910 }, { "epoch": 0.042397952444150115, "grad_norm": 0.359375, "learning_rate": 0.0017538551772295448, "loss": 0.2347, "step": 23912 }, { "epoch": 0.04240149860945993, "grad_norm": 0.263671875, "learning_rate": 0.001753814276942626, "loss": 0.1409, "step": 23914 }, { "epoch": 0.042405044774769744, "grad_norm": 0.5078125, "learning_rate": 0.0017537733737963035, "loss": 0.198, "step": 23916 }, { "epoch": 0.04240859094007956, "grad_norm": 0.5390625, "learning_rate": 0.001753732467790756, "loss": 0.1597, "step": 23918 }, { "epoch": 0.04241213710538937, "grad_norm": 0.2001953125, "learning_rate": 0.0017536915589261622, "loss": 0.1795, "step": 23920 }, { "epoch": 0.04241568327069919, "grad_norm": 0.19140625, "learning_rate": 0.0017536506472027014, "loss": 0.1707, "step": 23922 }, { "epoch": 0.042419229436009, "grad_norm": 1.53125, "learning_rate": 0.0017536097326205525, "loss": 0.1775, "step": 23924 }, { "epoch": 0.04242277560131882, "grad_norm": 0.271484375, "learning_rate": 0.0017535688151798943, "loss": 0.2343, "step": 23926 }, { "epoch": 0.04242632176662863, "grad_norm": 0.4140625, "learning_rate": 0.0017535278948809054, "loss": 0.2046, "step": 23928 }, { "epoch": 0.042429867931938446, "grad_norm": 0.546875, "learning_rate": 0.001753486971723765, "loss": 0.1644, "step": 23930 }, { "epoch": 0.04243341409724827, "grad_norm": 0.93359375, "learning_rate": 0.0017534460457086527, "loss": 0.1567, "step": 23932 }, { "epoch": 0.04243696026255808, "grad_norm": 0.404296875, "learning_rate": 0.001753405116835747, "loss": 0.2068, "step": 23934 }, { "epoch": 0.0424405064278679, "grad_norm": 0.1708984375, "learning_rate": 0.0017533641851052264, "loss": 0.2209, "step": 23936 }, { "epoch": 0.04244405259317771, "grad_norm": 0.392578125, "learning_rate": 0.001753323250517271, "loss": 0.1865, "step": 23938 }, { "epoch": 0.042447598758487526, "grad_norm": 3.03125, "learning_rate": 0.0017532823130720586, "loss": 0.2062, "step": 23940 }, { "epoch": 0.04245114492379734, "grad_norm": 0.2099609375, "learning_rate": 0.0017532413727697688, "loss": 0.207, "step": 23942 }, { "epoch": 0.042454691089107155, "grad_norm": 0.466796875, "learning_rate": 0.001753200429610581, "loss": 0.2045, "step": 23944 }, { "epoch": 0.04245823725441697, "grad_norm": 0.400390625, "learning_rate": 0.001753159483594674, "loss": 0.2228, "step": 23946 }, { "epoch": 0.042461783419726784, "grad_norm": 0.68359375, "learning_rate": 0.0017531185347222265, "loss": 0.3209, "step": 23948 }, { "epoch": 0.0424653295850366, "grad_norm": 0.64453125, "learning_rate": 0.0017530775829934183, "loss": 0.2182, "step": 23950 }, { "epoch": 0.04246887575034641, "grad_norm": 0.181640625, "learning_rate": 0.0017530366284084279, "loss": 0.1772, "step": 23952 }, { "epoch": 0.042472421915656235, "grad_norm": 0.1904296875, "learning_rate": 0.0017529956709674344, "loss": 0.1603, "step": 23954 }, { "epoch": 0.04247596808096605, "grad_norm": 0.69140625, "learning_rate": 0.0017529547106706171, "loss": 0.2544, "step": 23956 }, { "epoch": 0.042479514246275864, "grad_norm": 0.55078125, "learning_rate": 0.0017529137475181553, "loss": 0.1482, "step": 23958 }, { "epoch": 0.04248306041158568, "grad_norm": 0.4140625, "learning_rate": 0.0017528727815102284, "loss": 0.1899, "step": 23960 }, { "epoch": 0.04248660657689549, "grad_norm": 1.6875, "learning_rate": 0.0017528318126470148, "loss": 0.2124, "step": 23962 }, { "epoch": 0.04249015274220531, "grad_norm": 0.953125, "learning_rate": 0.0017527908409286942, "loss": 0.2147, "step": 23964 }, { "epoch": 0.04249369890751512, "grad_norm": 0.2021484375, "learning_rate": 0.0017527498663554458, "loss": 0.1988, "step": 23966 }, { "epoch": 0.042497245072824937, "grad_norm": 1.609375, "learning_rate": 0.0017527088889274483, "loss": 0.1801, "step": 23968 }, { "epoch": 0.04250079123813475, "grad_norm": 0.5078125, "learning_rate": 0.0017526679086448816, "loss": 0.13, "step": 23970 }, { "epoch": 0.042504337403444566, "grad_norm": 0.384765625, "learning_rate": 0.0017526269255079244, "loss": 0.198, "step": 23972 }, { "epoch": 0.04250788356875438, "grad_norm": 0.474609375, "learning_rate": 0.0017525859395167561, "loss": 0.1921, "step": 23974 }, { "epoch": 0.042511429734064195, "grad_norm": 0.7890625, "learning_rate": 0.0017525449506715558, "loss": 0.3088, "step": 23976 }, { "epoch": 0.042514975899374016, "grad_norm": 0.1806640625, "learning_rate": 0.001752503958972503, "loss": 0.1595, "step": 23978 }, { "epoch": 0.04251852206468383, "grad_norm": 1.1328125, "learning_rate": 0.0017524629644197773, "loss": 0.1966, "step": 23980 }, { "epoch": 0.042522068229993645, "grad_norm": 0.5703125, "learning_rate": 0.0017524219670135572, "loss": 0.2517, "step": 23982 }, { "epoch": 0.04252561439530346, "grad_norm": 0.2734375, "learning_rate": 0.0017523809667540225, "loss": 0.1734, "step": 23984 }, { "epoch": 0.042529160560613274, "grad_norm": 1.78125, "learning_rate": 0.0017523399636413526, "loss": 0.2952, "step": 23986 }, { "epoch": 0.04253270672592309, "grad_norm": 0.71875, "learning_rate": 0.0017522989576757263, "loss": 0.2196, "step": 23988 }, { "epoch": 0.042536252891232904, "grad_norm": 0.328125, "learning_rate": 0.0017522579488573233, "loss": 0.1897, "step": 23990 }, { "epoch": 0.04253979905654272, "grad_norm": 1.0, "learning_rate": 0.0017522169371863231, "loss": 0.2234, "step": 23992 }, { "epoch": 0.04254334522185253, "grad_norm": 1.03125, "learning_rate": 0.0017521759226629048, "loss": 0.2371, "step": 23994 }, { "epoch": 0.04254689138716235, "grad_norm": 0.462890625, "learning_rate": 0.001752134905287248, "loss": 0.2498, "step": 23996 }, { "epoch": 0.04255043755247216, "grad_norm": 0.451171875, "learning_rate": 0.0017520938850595317, "loss": 0.2481, "step": 23998 }, { "epoch": 0.04255398371778198, "grad_norm": 0.96875, "learning_rate": 0.0017520528619799354, "loss": 0.3016, "step": 24000 }, { "epoch": 0.0425575298830918, "grad_norm": 0.23046875, "learning_rate": 0.0017520118360486389, "loss": 0.2567, "step": 24002 }, { "epoch": 0.04256107604840161, "grad_norm": 2.96875, "learning_rate": 0.001751970807265821, "loss": 0.27, "step": 24004 }, { "epoch": 0.04256462221371143, "grad_norm": 0.44921875, "learning_rate": 0.0017519297756316621, "loss": 0.1887, "step": 24006 }, { "epoch": 0.04256816837902124, "grad_norm": 0.765625, "learning_rate": 0.0017518887411463408, "loss": 0.2201, "step": 24008 }, { "epoch": 0.042571714544331056, "grad_norm": 0.392578125, "learning_rate": 0.0017518477038100365, "loss": 0.1935, "step": 24010 }, { "epoch": 0.04257526070964087, "grad_norm": 0.416015625, "learning_rate": 0.001751806663622929, "loss": 0.1803, "step": 24012 }, { "epoch": 0.042578806874950685, "grad_norm": 0.2177734375, "learning_rate": 0.0017517656205851982, "loss": 0.1566, "step": 24014 }, { "epoch": 0.0425823530402605, "grad_norm": 0.44921875, "learning_rate": 0.001751724574697023, "loss": 0.1373, "step": 24016 }, { "epoch": 0.042585899205570314, "grad_norm": 0.53125, "learning_rate": 0.001751683525958583, "loss": 0.1869, "step": 24018 }, { "epoch": 0.04258944537088013, "grad_norm": 0.3828125, "learning_rate": 0.0017516424743700576, "loss": 0.2613, "step": 24020 }, { "epoch": 0.04259299153618995, "grad_norm": 0.375, "learning_rate": 0.0017516014199316268, "loss": 0.2245, "step": 24022 }, { "epoch": 0.042596537701499765, "grad_norm": 0.2490234375, "learning_rate": 0.00175156036264347, "loss": 0.1988, "step": 24024 }, { "epoch": 0.04260008386680958, "grad_norm": 0.45703125, "learning_rate": 0.0017515193025057665, "loss": 0.2416, "step": 24026 }, { "epoch": 0.042603630032119394, "grad_norm": 0.2099609375, "learning_rate": 0.001751478239518696, "loss": 0.2254, "step": 24028 }, { "epoch": 0.04260717619742921, "grad_norm": 0.21875, "learning_rate": 0.001751437173682438, "loss": 0.1807, "step": 24030 }, { "epoch": 0.04261072236273902, "grad_norm": 1.40625, "learning_rate": 0.0017513961049971723, "loss": 0.2672, "step": 24032 }, { "epoch": 0.04261426852804884, "grad_norm": 1.796875, "learning_rate": 0.0017513550334630785, "loss": 0.2345, "step": 24034 }, { "epoch": 0.04261781469335865, "grad_norm": 0.46484375, "learning_rate": 0.001751313959080336, "loss": 0.1862, "step": 24036 }, { "epoch": 0.04262136085866847, "grad_norm": 0.640625, "learning_rate": 0.0017512728818491247, "loss": 0.2259, "step": 24038 }, { "epoch": 0.04262490702397828, "grad_norm": 1.9921875, "learning_rate": 0.001751231801769624, "loss": 0.5851, "step": 24040 }, { "epoch": 0.042628453189288096, "grad_norm": 0.408203125, "learning_rate": 0.0017511907188420136, "loss": 0.2609, "step": 24042 }, { "epoch": 0.04263199935459791, "grad_norm": 0.435546875, "learning_rate": 0.0017511496330664736, "loss": 0.1681, "step": 24044 }, { "epoch": 0.04263554551990773, "grad_norm": 0.443359375, "learning_rate": 0.0017511085444431832, "loss": 0.1393, "step": 24046 }, { "epoch": 0.042639091685217546, "grad_norm": 0.671875, "learning_rate": 0.001751067452972322, "loss": 0.2269, "step": 24048 }, { "epoch": 0.04264263785052736, "grad_norm": 0.9453125, "learning_rate": 0.00175102635865407, "loss": 0.2427, "step": 24050 }, { "epoch": 0.042646184015837175, "grad_norm": 0.392578125, "learning_rate": 0.001750985261488607, "loss": 0.2772, "step": 24052 }, { "epoch": 0.04264973018114699, "grad_norm": 1.3671875, "learning_rate": 0.0017509441614761126, "loss": 0.2939, "step": 24054 }, { "epoch": 0.042653276346456805, "grad_norm": 0.68359375, "learning_rate": 0.0017509030586167664, "loss": 0.2081, "step": 24056 }, { "epoch": 0.04265682251176662, "grad_norm": 1.6953125, "learning_rate": 0.0017508619529107486, "loss": 0.198, "step": 24058 }, { "epoch": 0.042660368677076434, "grad_norm": 0.59375, "learning_rate": 0.0017508208443582384, "loss": 0.1808, "step": 24060 }, { "epoch": 0.04266391484238625, "grad_norm": 1.7734375, "learning_rate": 0.0017507797329594159, "loss": 0.3574, "step": 24062 }, { "epoch": 0.04266746100769606, "grad_norm": 0.447265625, "learning_rate": 0.001750738618714461, "loss": 0.1958, "step": 24064 }, { "epoch": 0.04267100717300588, "grad_norm": 0.28515625, "learning_rate": 0.0017506975016235533, "loss": 0.1873, "step": 24066 }, { "epoch": 0.0426745533383157, "grad_norm": 0.7421875, "learning_rate": 0.0017506563816868728, "loss": 0.1911, "step": 24068 }, { "epoch": 0.04267809950362551, "grad_norm": 0.81640625, "learning_rate": 0.0017506152589045992, "loss": 0.1759, "step": 24070 }, { "epoch": 0.04268164566893533, "grad_norm": 0.8671875, "learning_rate": 0.0017505741332769123, "loss": 0.3919, "step": 24072 }, { "epoch": 0.04268519183424514, "grad_norm": 0.734375, "learning_rate": 0.001750533004803992, "loss": 0.197, "step": 24074 }, { "epoch": 0.04268873799955496, "grad_norm": 0.5625, "learning_rate": 0.0017504918734860182, "loss": 0.3264, "step": 24076 }, { "epoch": 0.04269228416486477, "grad_norm": 0.4140625, "learning_rate": 0.001750450739323171, "loss": 0.1925, "step": 24078 }, { "epoch": 0.042695830330174586, "grad_norm": 1.1171875, "learning_rate": 0.00175040960231563, "loss": 0.2104, "step": 24080 }, { "epoch": 0.0426993764954844, "grad_norm": 0.265625, "learning_rate": 0.0017503684624635752, "loss": 0.1488, "step": 24082 }, { "epoch": 0.042702922660794215, "grad_norm": 0.5546875, "learning_rate": 0.0017503273197671863, "loss": 0.2518, "step": 24084 }, { "epoch": 0.04270646882610403, "grad_norm": 0.33984375, "learning_rate": 0.0017502861742266437, "loss": 0.1225, "step": 24086 }, { "epoch": 0.042710014991413844, "grad_norm": 0.6484375, "learning_rate": 0.0017502450258421271, "loss": 0.242, "step": 24088 }, { "epoch": 0.04271356115672366, "grad_norm": 0.97265625, "learning_rate": 0.0017502038746138166, "loss": 0.1617, "step": 24090 }, { "epoch": 0.04271710732203348, "grad_norm": 0.31640625, "learning_rate": 0.0017501627205418918, "loss": 0.1525, "step": 24092 }, { "epoch": 0.042720653487343295, "grad_norm": 2.75, "learning_rate": 0.001750121563626533, "loss": 0.2369, "step": 24094 }, { "epoch": 0.04272419965265311, "grad_norm": 0.5078125, "learning_rate": 0.00175008040386792, "loss": 0.2657, "step": 24096 }, { "epoch": 0.042727745817962924, "grad_norm": 0.390625, "learning_rate": 0.001750039241266233, "loss": 0.1413, "step": 24098 }, { "epoch": 0.04273129198327274, "grad_norm": 0.2021484375, "learning_rate": 0.0017499980758216521, "loss": 0.1775, "step": 24100 }, { "epoch": 0.04273483814858255, "grad_norm": 0.63671875, "learning_rate": 0.0017499569075343571, "loss": 0.178, "step": 24102 }, { "epoch": 0.04273838431389237, "grad_norm": 0.404296875, "learning_rate": 0.001749915736404528, "loss": 0.2415, "step": 24104 }, { "epoch": 0.04274193047920218, "grad_norm": 0.3515625, "learning_rate": 0.0017498745624323448, "loss": 0.2049, "step": 24106 }, { "epoch": 0.042745476644512, "grad_norm": 0.283203125, "learning_rate": 0.0017498333856179882, "loss": 0.1907, "step": 24108 }, { "epoch": 0.04274902280982181, "grad_norm": 0.435546875, "learning_rate": 0.0017497922059616377, "loss": 0.1859, "step": 24110 }, { "epoch": 0.042752568975131626, "grad_norm": 0.279296875, "learning_rate": 0.0017497510234634737, "loss": 0.2112, "step": 24112 }, { "epoch": 0.04275611514044145, "grad_norm": 1.25, "learning_rate": 0.0017497098381236758, "loss": 0.2508, "step": 24114 }, { "epoch": 0.04275966130575126, "grad_norm": 0.8671875, "learning_rate": 0.0017496686499424245, "loss": 0.2283, "step": 24116 }, { "epoch": 0.04276320747106108, "grad_norm": 1.421875, "learning_rate": 0.0017496274589199003, "loss": 0.3208, "step": 24118 }, { "epoch": 0.04276675363637089, "grad_norm": 0.75390625, "learning_rate": 0.0017495862650562823, "loss": 0.1682, "step": 24120 }, { "epoch": 0.042770299801680706, "grad_norm": 0.609375, "learning_rate": 0.0017495450683517517, "loss": 0.1606, "step": 24122 }, { "epoch": 0.04277384596699052, "grad_norm": 0.58984375, "learning_rate": 0.0017495038688064882, "loss": 0.2723, "step": 24124 }, { "epoch": 0.042777392132300335, "grad_norm": 0.90625, "learning_rate": 0.0017494626664206721, "loss": 0.2757, "step": 24126 }, { "epoch": 0.04278093829761015, "grad_norm": 0.330078125, "learning_rate": 0.0017494214611944833, "loss": 0.2325, "step": 24128 }, { "epoch": 0.042784484462919964, "grad_norm": 0.24609375, "learning_rate": 0.0017493802531281027, "loss": 0.1646, "step": 24130 }, { "epoch": 0.04278803062822978, "grad_norm": 1.734375, "learning_rate": 0.00174933904222171, "loss": 0.1959, "step": 24132 }, { "epoch": 0.04279157679353959, "grad_norm": 0.2001953125, "learning_rate": 0.0017492978284754852, "loss": 0.1478, "step": 24134 }, { "epoch": 0.042795122958849414, "grad_norm": 0.43359375, "learning_rate": 0.0017492566118896089, "loss": 0.2366, "step": 24136 }, { "epoch": 0.04279866912415923, "grad_norm": 2.15625, "learning_rate": 0.0017492153924642618, "loss": 0.2153, "step": 24138 }, { "epoch": 0.042802215289469044, "grad_norm": 2.34375, "learning_rate": 0.001749174170199623, "loss": 0.3991, "step": 24140 }, { "epoch": 0.04280576145477886, "grad_norm": 0.25390625, "learning_rate": 0.001749132945095874, "loss": 0.2526, "step": 24142 }, { "epoch": 0.04280930762008867, "grad_norm": 1.28125, "learning_rate": 0.0017490917171531942, "loss": 0.2236, "step": 24144 }, { "epoch": 0.04281285378539849, "grad_norm": 0.328125, "learning_rate": 0.0017490504863717645, "loss": 0.1934, "step": 24146 }, { "epoch": 0.0428163999507083, "grad_norm": 0.515625, "learning_rate": 0.0017490092527517648, "loss": 0.179, "step": 24148 }, { "epoch": 0.042819946116018116, "grad_norm": 0.8046875, "learning_rate": 0.0017489680162933756, "loss": 0.2783, "step": 24150 }, { "epoch": 0.04282349228132793, "grad_norm": 0.478515625, "learning_rate": 0.0017489267769967773, "loss": 0.1923, "step": 24152 }, { "epoch": 0.042827038446637745, "grad_norm": 1.109375, "learning_rate": 0.0017488855348621504, "loss": 0.1705, "step": 24154 }, { "epoch": 0.04283058461194756, "grad_norm": 0.62109375, "learning_rate": 0.001748844289889675, "loss": 0.1778, "step": 24156 }, { "epoch": 0.042834130777257375, "grad_norm": 0.51171875, "learning_rate": 0.0017488030420795316, "loss": 0.2089, "step": 24158 }, { "epoch": 0.042837676942567196, "grad_norm": 0.6875, "learning_rate": 0.0017487617914319004, "loss": 0.2046, "step": 24160 }, { "epoch": 0.04284122310787701, "grad_norm": 0.439453125, "learning_rate": 0.0017487205379469622, "loss": 0.2638, "step": 24162 }, { "epoch": 0.042844769273186825, "grad_norm": 0.66796875, "learning_rate": 0.0017486792816248972, "loss": 0.167, "step": 24164 }, { "epoch": 0.04284831543849664, "grad_norm": 1.484375, "learning_rate": 0.0017486380224658855, "loss": 0.2415, "step": 24166 }, { "epoch": 0.042851861603806454, "grad_norm": 0.61328125, "learning_rate": 0.0017485967604701084, "loss": 0.2292, "step": 24168 }, { "epoch": 0.04285540776911627, "grad_norm": 0.7109375, "learning_rate": 0.0017485554956377452, "loss": 0.2215, "step": 24170 }, { "epoch": 0.04285895393442608, "grad_norm": 0.57421875, "learning_rate": 0.001748514227968977, "loss": 0.2161, "step": 24172 }, { "epoch": 0.0428625000997359, "grad_norm": 0.79296875, "learning_rate": 0.0017484729574639849, "loss": 0.2107, "step": 24174 }, { "epoch": 0.04286604626504571, "grad_norm": 0.61328125, "learning_rate": 0.0017484316841229481, "loss": 0.1965, "step": 24176 }, { "epoch": 0.04286959243035553, "grad_norm": 0.94921875, "learning_rate": 0.001748390407946048, "loss": 0.2579, "step": 24178 }, { "epoch": 0.04287313859566534, "grad_norm": 0.431640625, "learning_rate": 0.0017483491289334652, "loss": 0.1948, "step": 24180 }, { "epoch": 0.04287668476097516, "grad_norm": 0.474609375, "learning_rate": 0.0017483078470853796, "loss": 0.158, "step": 24182 }, { "epoch": 0.04288023092628498, "grad_norm": 0.287109375, "learning_rate": 0.0017482665624019723, "loss": 0.2029, "step": 24184 }, { "epoch": 0.04288377709159479, "grad_norm": 0.208984375, "learning_rate": 0.0017482252748834233, "loss": 0.1857, "step": 24186 }, { "epoch": 0.04288732325690461, "grad_norm": 0.224609375, "learning_rate": 0.0017481839845299137, "loss": 0.2128, "step": 24188 }, { "epoch": 0.04289086942221442, "grad_norm": 2.40625, "learning_rate": 0.0017481426913416235, "loss": 0.2198, "step": 24190 }, { "epoch": 0.042894415587524236, "grad_norm": 0.37890625, "learning_rate": 0.001748101395318734, "loss": 0.1862, "step": 24192 }, { "epoch": 0.04289796175283405, "grad_norm": 1.7578125, "learning_rate": 0.0017480600964614255, "loss": 0.2643, "step": 24194 }, { "epoch": 0.042901507918143865, "grad_norm": 0.77734375, "learning_rate": 0.0017480187947698784, "loss": 0.3297, "step": 24196 }, { "epoch": 0.04290505408345368, "grad_norm": 1.421875, "learning_rate": 0.0017479774902442735, "loss": 0.2786, "step": 24198 }, { "epoch": 0.042908600248763494, "grad_norm": 0.51171875, "learning_rate": 0.0017479361828847916, "loss": 0.1989, "step": 24200 }, { "epoch": 0.04291214641407331, "grad_norm": 0.419921875, "learning_rate": 0.0017478948726916133, "loss": 0.1993, "step": 24202 }, { "epoch": 0.04291569257938313, "grad_norm": 0.498046875, "learning_rate": 0.0017478535596649187, "loss": 0.2735, "step": 24204 }, { "epoch": 0.042919238744692945, "grad_norm": 0.220703125, "learning_rate": 0.0017478122438048893, "loss": 0.1735, "step": 24206 }, { "epoch": 0.04292278491000276, "grad_norm": 2.625, "learning_rate": 0.0017477709251117056, "loss": 0.2374, "step": 24208 }, { "epoch": 0.042926331075312574, "grad_norm": 0.50390625, "learning_rate": 0.0017477296035855476, "loss": 0.1774, "step": 24210 }, { "epoch": 0.04292987724062239, "grad_norm": 0.306640625, "learning_rate": 0.0017476882792265972, "loss": 0.2171, "step": 24212 }, { "epoch": 0.0429334234059322, "grad_norm": 1.34375, "learning_rate": 0.001747646952035034, "loss": 0.3039, "step": 24214 }, { "epoch": 0.04293696957124202, "grad_norm": 0.224609375, "learning_rate": 0.0017476056220110394, "loss": 0.2463, "step": 24216 }, { "epoch": 0.04294051573655183, "grad_norm": 1.0, "learning_rate": 0.0017475642891547941, "loss": 0.3457, "step": 24218 }, { "epoch": 0.042944061901861647, "grad_norm": 2.171875, "learning_rate": 0.0017475229534664786, "loss": 0.246, "step": 24220 }, { "epoch": 0.04294760806717146, "grad_norm": 0.462890625, "learning_rate": 0.001747481614946274, "loss": 0.2216, "step": 24222 }, { "epoch": 0.042951154232481276, "grad_norm": 0.71484375, "learning_rate": 0.0017474402735943606, "loss": 0.1745, "step": 24224 }, { "epoch": 0.04295470039779109, "grad_norm": 1.0625, "learning_rate": 0.0017473989294109196, "loss": 0.2014, "step": 24226 }, { "epoch": 0.04295824656310091, "grad_norm": 0.5625, "learning_rate": 0.0017473575823961321, "loss": 0.1977, "step": 24228 }, { "epoch": 0.042961792728410726, "grad_norm": 2.40625, "learning_rate": 0.0017473162325501782, "loss": 0.2359, "step": 24230 }, { "epoch": 0.04296533889372054, "grad_norm": 1.125, "learning_rate": 0.0017472748798732394, "loss": 0.1812, "step": 24232 }, { "epoch": 0.042968885059030355, "grad_norm": 0.58984375, "learning_rate": 0.0017472335243654958, "loss": 0.1912, "step": 24234 }, { "epoch": 0.04297243122434017, "grad_norm": 0.46484375, "learning_rate": 0.001747192166027129, "loss": 0.3422, "step": 24236 }, { "epoch": 0.042975977389649984, "grad_norm": 0.421875, "learning_rate": 0.0017471508048583194, "loss": 0.1902, "step": 24238 }, { "epoch": 0.0429795235549598, "grad_norm": 1.59375, "learning_rate": 0.0017471094408592486, "loss": 0.1939, "step": 24240 }, { "epoch": 0.042983069720269614, "grad_norm": 0.32421875, "learning_rate": 0.0017470680740300966, "loss": 0.1786, "step": 24242 }, { "epoch": 0.04298661588557943, "grad_norm": 1.0859375, "learning_rate": 0.0017470267043710446, "loss": 0.1565, "step": 24244 }, { "epoch": 0.04299016205088924, "grad_norm": 2.0, "learning_rate": 0.001746985331882274, "loss": 0.2989, "step": 24246 }, { "epoch": 0.04299370821619906, "grad_norm": 0.62890625, "learning_rate": 0.0017469439565639651, "loss": 0.1724, "step": 24248 }, { "epoch": 0.04299725438150888, "grad_norm": 0.578125, "learning_rate": 0.0017469025784162993, "loss": 0.1557, "step": 24250 }, { "epoch": 0.04300080054681869, "grad_norm": 0.375, "learning_rate": 0.001746861197439457, "loss": 0.1974, "step": 24252 }, { "epoch": 0.04300434671212851, "grad_norm": 0.5859375, "learning_rate": 0.0017468198136336203, "loss": 0.2223, "step": 24254 }, { "epoch": 0.04300789287743832, "grad_norm": 1.0859375, "learning_rate": 0.0017467784269989688, "loss": 0.1743, "step": 24256 }, { "epoch": 0.04301143904274814, "grad_norm": 0.482421875, "learning_rate": 0.0017467370375356846, "loss": 0.2179, "step": 24258 }, { "epoch": 0.04301498520805795, "grad_norm": 1.3828125, "learning_rate": 0.001746695645243948, "loss": 0.2148, "step": 24260 }, { "epoch": 0.043018531373367766, "grad_norm": 0.84765625, "learning_rate": 0.0017466542501239403, "loss": 0.2186, "step": 24262 }, { "epoch": 0.04302207753867758, "grad_norm": 0.71875, "learning_rate": 0.0017466128521758423, "loss": 0.1757, "step": 24264 }, { "epoch": 0.043025623703987395, "grad_norm": 0.36328125, "learning_rate": 0.0017465714513998356, "loss": 0.1866, "step": 24266 }, { "epoch": 0.04302916986929721, "grad_norm": 0.51171875, "learning_rate": 0.0017465300477961009, "loss": 0.2024, "step": 24268 }, { "epoch": 0.043032716034607024, "grad_norm": 0.40625, "learning_rate": 0.0017464886413648191, "loss": 0.1483, "step": 24270 }, { "epoch": 0.043036262199916846, "grad_norm": 0.70703125, "learning_rate": 0.0017464472321061719, "loss": 0.225, "step": 24272 }, { "epoch": 0.04303980836522666, "grad_norm": 0.578125, "learning_rate": 0.00174640582002034, "loss": 0.2247, "step": 24274 }, { "epoch": 0.043043354530536475, "grad_norm": 0.3046875, "learning_rate": 0.0017463644051075042, "loss": 0.157, "step": 24276 }, { "epoch": 0.04304690069584629, "grad_norm": 0.322265625, "learning_rate": 0.0017463229873678461, "loss": 0.2338, "step": 24278 }, { "epoch": 0.043050446861156104, "grad_norm": 0.78125, "learning_rate": 0.0017462815668015466, "loss": 0.1954, "step": 24280 }, { "epoch": 0.04305399302646592, "grad_norm": 1.78125, "learning_rate": 0.001746240143408787, "loss": 0.2601, "step": 24282 }, { "epoch": 0.04305753919177573, "grad_norm": 0.73828125, "learning_rate": 0.0017461987171897484, "loss": 0.2385, "step": 24284 }, { "epoch": 0.04306108535708555, "grad_norm": 0.3203125, "learning_rate": 0.001746157288144612, "loss": 0.2056, "step": 24286 }, { "epoch": 0.04306463152239536, "grad_norm": 2.8125, "learning_rate": 0.001746115856273559, "loss": 0.2709, "step": 24288 }, { "epoch": 0.04306817768770518, "grad_norm": 0.86328125, "learning_rate": 0.0017460744215767703, "loss": 0.1859, "step": 24290 }, { "epoch": 0.04307172385301499, "grad_norm": 0.333984375, "learning_rate": 0.0017460329840544276, "loss": 0.1771, "step": 24292 }, { "epoch": 0.043075270018324806, "grad_norm": 0.5078125, "learning_rate": 0.001745991543706712, "loss": 0.1575, "step": 24294 }, { "epoch": 0.04307881618363463, "grad_norm": 0.890625, "learning_rate": 0.0017459501005338044, "loss": 0.4684, "step": 24296 }, { "epoch": 0.04308236234894444, "grad_norm": 1.375, "learning_rate": 0.0017459086545358862, "loss": 0.2327, "step": 24298 }, { "epoch": 0.043085908514254256, "grad_norm": 0.37109375, "learning_rate": 0.001745867205713139, "loss": 0.2052, "step": 24300 }, { "epoch": 0.04308945467956407, "grad_norm": 0.41796875, "learning_rate": 0.0017458257540657433, "loss": 0.1432, "step": 24302 }, { "epoch": 0.043093000844873885, "grad_norm": 0.349609375, "learning_rate": 0.0017457842995938814, "loss": 0.2114, "step": 24304 }, { "epoch": 0.0430965470101837, "grad_norm": 0.60546875, "learning_rate": 0.001745742842297734, "loss": 0.3504, "step": 24306 }, { "epoch": 0.043100093175493515, "grad_norm": 0.59765625, "learning_rate": 0.0017457013821774823, "loss": 0.2117, "step": 24308 }, { "epoch": 0.04310363934080333, "grad_norm": 0.369140625, "learning_rate": 0.001745659919233308, "loss": 0.2333, "step": 24310 }, { "epoch": 0.043107185506113144, "grad_norm": 0.455078125, "learning_rate": 0.0017456184534653922, "loss": 0.2491, "step": 24312 }, { "epoch": 0.04311073167142296, "grad_norm": 0.70703125, "learning_rate": 0.0017455769848739163, "loss": 0.2023, "step": 24314 }, { "epoch": 0.04311427783673277, "grad_norm": 1.046875, "learning_rate": 0.0017455355134590618, "loss": 0.4318, "step": 24316 }, { "epoch": 0.043117824002042594, "grad_norm": 0.408203125, "learning_rate": 0.00174549403922101, "loss": 0.1757, "step": 24318 }, { "epoch": 0.04312137016735241, "grad_norm": 0.5546875, "learning_rate": 0.0017454525621599418, "loss": 0.1738, "step": 24320 }, { "epoch": 0.04312491633266222, "grad_norm": 1.46875, "learning_rate": 0.0017454110822760392, "loss": 0.2655, "step": 24322 }, { "epoch": 0.04312846249797204, "grad_norm": 0.70703125, "learning_rate": 0.0017453695995694833, "loss": 0.2151, "step": 24324 }, { "epoch": 0.04313200866328185, "grad_norm": 0.419921875, "learning_rate": 0.0017453281140404563, "loss": 0.162, "step": 24326 }, { "epoch": 0.04313555482859167, "grad_norm": 0.3203125, "learning_rate": 0.0017452866256891383, "loss": 0.2355, "step": 24328 }, { "epoch": 0.04313910099390148, "grad_norm": 0.376953125, "learning_rate": 0.0017452451345157114, "loss": 0.4167, "step": 24330 }, { "epoch": 0.043142647159211296, "grad_norm": 0.6484375, "learning_rate": 0.0017452036405203575, "loss": 0.1681, "step": 24332 }, { "epoch": 0.04314619332452111, "grad_norm": 0.380859375, "learning_rate": 0.0017451621437032574, "loss": 0.1656, "step": 24334 }, { "epoch": 0.043149739489830925, "grad_norm": 0.31640625, "learning_rate": 0.001745120644064593, "loss": 0.1696, "step": 24336 }, { "epoch": 0.04315328565514074, "grad_norm": 0.453125, "learning_rate": 0.0017450791416045454, "loss": 0.2043, "step": 24338 }, { "epoch": 0.04315683182045056, "grad_norm": 0.39453125, "learning_rate": 0.0017450376363232966, "loss": 0.2185, "step": 24340 }, { "epoch": 0.043160377985760376, "grad_norm": 0.56640625, "learning_rate": 0.0017449961282210277, "loss": 0.196, "step": 24342 }, { "epoch": 0.04316392415107019, "grad_norm": 0.94921875, "learning_rate": 0.0017449546172979206, "loss": 0.3017, "step": 24344 }, { "epoch": 0.043167470316380005, "grad_norm": 1.421875, "learning_rate": 0.0017449131035541564, "loss": 0.2033, "step": 24346 }, { "epoch": 0.04317101648168982, "grad_norm": 0.376953125, "learning_rate": 0.0017448715869899172, "loss": 0.2427, "step": 24348 }, { "epoch": 0.043174562646999634, "grad_norm": 0.3515625, "learning_rate": 0.001744830067605384, "loss": 0.1931, "step": 24350 }, { "epoch": 0.04317810881230945, "grad_norm": 0.5, "learning_rate": 0.0017447885454007386, "loss": 0.2006, "step": 24352 }, { "epoch": 0.04318165497761926, "grad_norm": 0.48046875, "learning_rate": 0.0017447470203761628, "loss": 0.2021, "step": 24354 }, { "epoch": 0.04318520114292908, "grad_norm": 0.8359375, "learning_rate": 0.001744705492531838, "loss": 0.2128, "step": 24356 }, { "epoch": 0.04318874730823889, "grad_norm": 2.078125, "learning_rate": 0.0017446639618679456, "loss": 0.2395, "step": 24358 }, { "epoch": 0.04319229347354871, "grad_norm": 1.1796875, "learning_rate": 0.001744622428384668, "loss": 0.2409, "step": 24360 }, { "epoch": 0.04319583963885852, "grad_norm": 1.2890625, "learning_rate": 0.001744580892082186, "loss": 0.2344, "step": 24362 }, { "epoch": 0.04319938580416834, "grad_norm": 0.337890625, "learning_rate": 0.0017445393529606818, "loss": 0.2098, "step": 24364 }, { "epoch": 0.04320293196947816, "grad_norm": 0.7890625, "learning_rate": 0.0017444978110203364, "loss": 0.1994, "step": 24366 }, { "epoch": 0.04320647813478797, "grad_norm": 0.58203125, "learning_rate": 0.0017444562662613323, "loss": 0.1759, "step": 24368 }, { "epoch": 0.04321002430009779, "grad_norm": 0.5, "learning_rate": 0.0017444147186838508, "loss": 0.2018, "step": 24370 }, { "epoch": 0.0432135704654076, "grad_norm": 0.51953125, "learning_rate": 0.0017443731682880738, "loss": 0.153, "step": 24372 }, { "epoch": 0.043217116630717416, "grad_norm": 1.7890625, "learning_rate": 0.0017443316150741828, "loss": 0.2891, "step": 24374 }, { "epoch": 0.04322066279602723, "grad_norm": 0.58984375, "learning_rate": 0.0017442900590423594, "loss": 0.1841, "step": 24376 }, { "epoch": 0.043224208961337045, "grad_norm": 0.25390625, "learning_rate": 0.0017442485001927857, "loss": 0.1902, "step": 24378 }, { "epoch": 0.04322775512664686, "grad_norm": 1.140625, "learning_rate": 0.0017442069385256434, "loss": 0.3534, "step": 24380 }, { "epoch": 0.043231301291956674, "grad_norm": 0.240234375, "learning_rate": 0.0017441653740411136, "loss": 0.2274, "step": 24382 }, { "epoch": 0.04323484745726649, "grad_norm": 0.65625, "learning_rate": 0.001744123806739379, "loss": 0.2303, "step": 24384 }, { "epoch": 0.04323839362257631, "grad_norm": 0.357421875, "learning_rate": 0.0017440822366206212, "loss": 0.1955, "step": 24386 }, { "epoch": 0.043241939787886124, "grad_norm": 0.318359375, "learning_rate": 0.0017440406636850217, "loss": 0.1802, "step": 24388 }, { "epoch": 0.04324548595319594, "grad_norm": 0.392578125, "learning_rate": 0.0017439990879327624, "loss": 0.2531, "step": 24390 }, { "epoch": 0.043249032118505754, "grad_norm": 0.52734375, "learning_rate": 0.0017439575093640249, "loss": 0.2286, "step": 24392 }, { "epoch": 0.04325257828381557, "grad_norm": 0.73828125, "learning_rate": 0.0017439159279789916, "loss": 0.2392, "step": 24394 }, { "epoch": 0.04325612444912538, "grad_norm": 1.34375, "learning_rate": 0.0017438743437778442, "loss": 0.2198, "step": 24396 }, { "epoch": 0.0432596706144352, "grad_norm": 0.5625, "learning_rate": 0.0017438327567607641, "loss": 0.2168, "step": 24398 }, { "epoch": 0.04326321677974501, "grad_norm": 0.93359375, "learning_rate": 0.0017437911669279337, "loss": 0.3038, "step": 24400 }, { "epoch": 0.043266762945054826, "grad_norm": 0.419921875, "learning_rate": 0.0017437495742795347, "loss": 0.5243, "step": 24402 }, { "epoch": 0.04327030911036464, "grad_norm": 0.369140625, "learning_rate": 0.0017437079788157489, "loss": 0.1738, "step": 24404 }, { "epoch": 0.043273855275674455, "grad_norm": 0.26171875, "learning_rate": 0.0017436663805367584, "loss": 0.1757, "step": 24406 }, { "epoch": 0.04327740144098428, "grad_norm": 0.640625, "learning_rate": 0.0017436247794427452, "loss": 0.2239, "step": 24408 }, { "epoch": 0.04328094760629409, "grad_norm": 0.40234375, "learning_rate": 0.0017435831755338907, "loss": 0.1496, "step": 24410 }, { "epoch": 0.043284493771603906, "grad_norm": 0.6953125, "learning_rate": 0.0017435415688103772, "loss": 0.1979, "step": 24412 }, { "epoch": 0.04328803993691372, "grad_norm": 0.474609375, "learning_rate": 0.001743499959272387, "loss": 0.2165, "step": 24414 }, { "epoch": 0.043291586102223535, "grad_norm": 0.3125, "learning_rate": 0.0017434583469201016, "loss": 0.1757, "step": 24416 }, { "epoch": 0.04329513226753335, "grad_norm": 0.63671875, "learning_rate": 0.0017434167317537032, "loss": 0.1548, "step": 24418 }, { "epoch": 0.043298678432843164, "grad_norm": 1.3046875, "learning_rate": 0.0017433751137733737, "loss": 0.2083, "step": 24420 }, { "epoch": 0.04330222459815298, "grad_norm": 1.0703125, "learning_rate": 0.0017433334929792955, "loss": 0.3403, "step": 24422 }, { "epoch": 0.04330577076346279, "grad_norm": 0.271484375, "learning_rate": 0.0017432918693716497, "loss": 0.1731, "step": 24424 }, { "epoch": 0.04330931692877261, "grad_norm": 0.2177734375, "learning_rate": 0.0017432502429506195, "loss": 0.2183, "step": 24426 }, { "epoch": 0.04331286309408242, "grad_norm": 0.4765625, "learning_rate": 0.001743208613716386, "loss": 0.3217, "step": 24428 }, { "epoch": 0.04331640925939224, "grad_norm": 0.310546875, "learning_rate": 0.0017431669816691318, "loss": 0.1976, "step": 24430 }, { "epoch": 0.04331995542470206, "grad_norm": 0.26171875, "learning_rate": 0.0017431253468090388, "loss": 0.2333, "step": 24432 }, { "epoch": 0.04332350159001187, "grad_norm": 0.828125, "learning_rate": 0.0017430837091362893, "loss": 0.2347, "step": 24434 }, { "epoch": 0.04332704775532169, "grad_norm": 2.40625, "learning_rate": 0.0017430420686510649, "loss": 0.2624, "step": 24436 }, { "epoch": 0.0433305939206315, "grad_norm": 0.515625, "learning_rate": 0.0017430004253535484, "loss": 0.1599, "step": 24438 }, { "epoch": 0.04333414008594132, "grad_norm": 0.34765625, "learning_rate": 0.001742958779243921, "loss": 0.206, "step": 24440 }, { "epoch": 0.04333768625125113, "grad_norm": 3.203125, "learning_rate": 0.0017429171303223656, "loss": 0.3181, "step": 24442 }, { "epoch": 0.043341232416560946, "grad_norm": 0.439453125, "learning_rate": 0.001742875478589064, "loss": 0.3261, "step": 24444 }, { "epoch": 0.04334477858187076, "grad_norm": 0.45703125, "learning_rate": 0.0017428338240441988, "loss": 0.1977, "step": 24446 }, { "epoch": 0.043348324747180575, "grad_norm": 0.421875, "learning_rate": 0.0017427921666879518, "loss": 0.1796, "step": 24448 }, { "epoch": 0.04335187091249039, "grad_norm": 3.296875, "learning_rate": 0.0017427505065205052, "loss": 0.4206, "step": 24450 }, { "epoch": 0.043355417077800204, "grad_norm": 1.03125, "learning_rate": 0.0017427088435420408, "loss": 0.2164, "step": 24452 }, { "epoch": 0.043358963243110026, "grad_norm": 0.1875, "learning_rate": 0.0017426671777527418, "loss": 0.1914, "step": 24454 }, { "epoch": 0.04336250940841984, "grad_norm": 2.140625, "learning_rate": 0.0017426255091527896, "loss": 0.2748, "step": 24456 }, { "epoch": 0.043366055573729655, "grad_norm": 0.55078125, "learning_rate": 0.001742583837742367, "loss": 0.2643, "step": 24458 }, { "epoch": 0.04336960173903947, "grad_norm": 1.234375, "learning_rate": 0.0017425421635216558, "loss": 0.2253, "step": 24460 }, { "epoch": 0.043373147904349284, "grad_norm": 0.232421875, "learning_rate": 0.0017425004864908382, "loss": 0.2106, "step": 24462 }, { "epoch": 0.0433766940696591, "grad_norm": 0.890625, "learning_rate": 0.001742458806650097, "loss": 0.2723, "step": 24464 }, { "epoch": 0.04338024023496891, "grad_norm": 0.60546875, "learning_rate": 0.001742417123999614, "loss": 0.1871, "step": 24466 }, { "epoch": 0.04338378640027873, "grad_norm": 1.265625, "learning_rate": 0.0017423754385395716, "loss": 0.2935, "step": 24468 }, { "epoch": 0.04338733256558854, "grad_norm": 0.828125, "learning_rate": 0.0017423337502701522, "loss": 0.2, "step": 24470 }, { "epoch": 0.043390878730898357, "grad_norm": 1.0625, "learning_rate": 0.0017422920591915383, "loss": 0.2161, "step": 24472 }, { "epoch": 0.04339442489620817, "grad_norm": 1.5234375, "learning_rate": 0.001742250365303912, "loss": 0.2065, "step": 24474 }, { "epoch": 0.04339797106151799, "grad_norm": 1.1328125, "learning_rate": 0.0017422086686074553, "loss": 0.1756, "step": 24476 }, { "epoch": 0.04340151722682781, "grad_norm": 1.0859375, "learning_rate": 0.0017421669691023512, "loss": 0.1563, "step": 24478 }, { "epoch": 0.04340506339213762, "grad_norm": 0.431640625, "learning_rate": 0.0017421252667887816, "loss": 0.2812, "step": 24480 }, { "epoch": 0.043408609557447436, "grad_norm": 0.65625, "learning_rate": 0.0017420835616669293, "loss": 0.1827, "step": 24482 }, { "epoch": 0.04341215572275725, "grad_norm": 0.203125, "learning_rate": 0.0017420418537369762, "loss": 0.1739, "step": 24484 }, { "epoch": 0.043415701888067065, "grad_norm": 1.71875, "learning_rate": 0.0017420001429991052, "loss": 0.3764, "step": 24486 }, { "epoch": 0.04341924805337688, "grad_norm": 0.515625, "learning_rate": 0.0017419584294534984, "loss": 0.2501, "step": 24488 }, { "epoch": 0.043422794218686694, "grad_norm": 0.453125, "learning_rate": 0.0017419167131003384, "loss": 0.1938, "step": 24490 }, { "epoch": 0.04342634038399651, "grad_norm": 0.7265625, "learning_rate": 0.0017418749939398077, "loss": 0.2063, "step": 24492 }, { "epoch": 0.043429886549306324, "grad_norm": 0.609375, "learning_rate": 0.0017418332719720882, "loss": 0.2003, "step": 24494 }, { "epoch": 0.04343343271461614, "grad_norm": 1.109375, "learning_rate": 0.001741791547197363, "loss": 0.2152, "step": 24496 }, { "epoch": 0.04343697887992595, "grad_norm": 0.248046875, "learning_rate": 0.0017417498196158144, "loss": 0.3106, "step": 24498 }, { "epoch": 0.043440525045235774, "grad_norm": 0.9921875, "learning_rate": 0.001741708089227625, "loss": 0.2202, "step": 24500 }, { "epoch": 0.04344407121054559, "grad_norm": 1.3046875, "learning_rate": 0.001741666356032977, "loss": 0.165, "step": 24502 }, { "epoch": 0.0434476173758554, "grad_norm": 0.431640625, "learning_rate": 0.0017416246200320533, "loss": 0.1848, "step": 24504 }, { "epoch": 0.04345116354116522, "grad_norm": 0.365234375, "learning_rate": 0.0017415828812250359, "loss": 0.2009, "step": 24506 }, { "epoch": 0.04345470970647503, "grad_norm": 0.365234375, "learning_rate": 0.0017415411396121078, "loss": 0.206, "step": 24508 }, { "epoch": 0.04345825587178485, "grad_norm": 0.392578125, "learning_rate": 0.0017414993951934512, "loss": 0.1575, "step": 24510 }, { "epoch": 0.04346180203709466, "grad_norm": 0.419921875, "learning_rate": 0.0017414576479692493, "loss": 0.1594, "step": 24512 }, { "epoch": 0.043465348202404476, "grad_norm": 0.83203125, "learning_rate": 0.001741415897939684, "loss": 0.2186, "step": 24514 }, { "epoch": 0.04346889436771429, "grad_norm": 0.953125, "learning_rate": 0.001741374145104938, "loss": 0.2389, "step": 24516 }, { "epoch": 0.043472440533024105, "grad_norm": 0.2080078125, "learning_rate": 0.0017413323894651942, "loss": 0.168, "step": 24518 }, { "epoch": 0.04347598669833392, "grad_norm": 0.375, "learning_rate": 0.0017412906310206352, "loss": 0.1699, "step": 24520 }, { "epoch": 0.04347953286364374, "grad_norm": 0.71875, "learning_rate": 0.0017412488697714432, "loss": 0.1966, "step": 24522 }, { "epoch": 0.043483079028953556, "grad_norm": 0.75, "learning_rate": 0.0017412071057178013, "loss": 0.1513, "step": 24524 }, { "epoch": 0.04348662519426337, "grad_norm": 0.76171875, "learning_rate": 0.001741165338859892, "loss": 0.1901, "step": 24526 }, { "epoch": 0.043490171359573185, "grad_norm": 0.3359375, "learning_rate": 0.001741123569197898, "loss": 0.1782, "step": 24528 }, { "epoch": 0.043493717524883, "grad_norm": 0.9375, "learning_rate": 0.0017410817967320016, "loss": 0.1966, "step": 24530 }, { "epoch": 0.043497263690192814, "grad_norm": 0.65625, "learning_rate": 0.001741040021462386, "loss": 0.1827, "step": 24532 }, { "epoch": 0.04350080985550263, "grad_norm": 1.0390625, "learning_rate": 0.001740998243389234, "loss": 0.1767, "step": 24534 }, { "epoch": 0.04350435602081244, "grad_norm": 0.51953125, "learning_rate": 0.0017409564625127274, "loss": 0.1974, "step": 24536 }, { "epoch": 0.04350790218612226, "grad_norm": 1.3515625, "learning_rate": 0.00174091467883305, "loss": 0.2534, "step": 24538 }, { "epoch": 0.04351144835143207, "grad_norm": 1.921875, "learning_rate": 0.001740872892350384, "loss": 0.2744, "step": 24540 }, { "epoch": 0.04351499451674189, "grad_norm": 0.578125, "learning_rate": 0.0017408311030649123, "loss": 0.1968, "step": 24542 }, { "epoch": 0.04351854068205171, "grad_norm": 0.41015625, "learning_rate": 0.0017407893109768174, "loss": 0.2805, "step": 24544 }, { "epoch": 0.04352208684736152, "grad_norm": 0.390625, "learning_rate": 0.0017407475160862824, "loss": 0.1895, "step": 24546 }, { "epoch": 0.04352563301267134, "grad_norm": 0.2412109375, "learning_rate": 0.0017407057183934898, "loss": 0.1834, "step": 24548 }, { "epoch": 0.04352917917798115, "grad_norm": 0.34375, "learning_rate": 0.0017406639178986227, "loss": 0.2249, "step": 24550 }, { "epoch": 0.043532725343290966, "grad_norm": 0.5078125, "learning_rate": 0.0017406221146018639, "loss": 0.1804, "step": 24552 }, { "epoch": 0.04353627150860078, "grad_norm": 0.435546875, "learning_rate": 0.0017405803085033958, "loss": 0.1507, "step": 24554 }, { "epoch": 0.043539817673910595, "grad_norm": 0.255859375, "learning_rate": 0.0017405384996034014, "loss": 0.2211, "step": 24556 }, { "epoch": 0.04354336383922041, "grad_norm": 0.2021484375, "learning_rate": 0.001740496687902064, "loss": 0.1858, "step": 24558 }, { "epoch": 0.043546910004530225, "grad_norm": 0.4921875, "learning_rate": 0.0017404548733995662, "loss": 0.2412, "step": 24560 }, { "epoch": 0.04355045616984004, "grad_norm": 1.4296875, "learning_rate": 0.0017404130560960908, "loss": 0.178, "step": 24562 }, { "epoch": 0.043554002335149854, "grad_norm": 0.48828125, "learning_rate": 0.0017403712359918202, "loss": 0.1823, "step": 24564 }, { "epoch": 0.04355754850045967, "grad_norm": 0.1962890625, "learning_rate": 0.0017403294130869382, "loss": 0.2138, "step": 24566 }, { "epoch": 0.04356109466576949, "grad_norm": 0.470703125, "learning_rate": 0.0017402875873816274, "loss": 0.2772, "step": 24568 }, { "epoch": 0.043564640831079304, "grad_norm": 0.6328125, "learning_rate": 0.0017402457588760703, "loss": 0.2471, "step": 24570 }, { "epoch": 0.04356818699638912, "grad_norm": 3.890625, "learning_rate": 0.0017402039275704504, "loss": 0.2246, "step": 24572 }, { "epoch": 0.04357173316169893, "grad_norm": 0.50390625, "learning_rate": 0.0017401620934649501, "loss": 0.4466, "step": 24574 }, { "epoch": 0.04357527932700875, "grad_norm": 0.640625, "learning_rate": 0.0017401202565597528, "loss": 0.165, "step": 24576 }, { "epoch": 0.04357882549231856, "grad_norm": 0.578125, "learning_rate": 0.0017400784168550417, "loss": 0.2183, "step": 24578 }, { "epoch": 0.04358237165762838, "grad_norm": 0.298828125, "learning_rate": 0.0017400365743509988, "loss": 0.1525, "step": 24580 }, { "epoch": 0.04358591782293819, "grad_norm": 4.125, "learning_rate": 0.0017399947290478077, "loss": 0.4064, "step": 24582 }, { "epoch": 0.043589463988248006, "grad_norm": 0.1953125, "learning_rate": 0.001739952880945652, "loss": 0.1757, "step": 24584 }, { "epoch": 0.04359301015355782, "grad_norm": 0.291015625, "learning_rate": 0.001739911030044714, "loss": 0.2448, "step": 24586 }, { "epoch": 0.043596556318867635, "grad_norm": 1.09375, "learning_rate": 0.0017398691763451765, "loss": 0.2006, "step": 24588 }, { "epoch": 0.04360010248417746, "grad_norm": 1.125, "learning_rate": 0.001739827319847223, "loss": 0.2919, "step": 24590 }, { "epoch": 0.04360364864948727, "grad_norm": 0.53125, "learning_rate": 0.0017397854605510363, "loss": 0.1869, "step": 24592 }, { "epoch": 0.043607194814797086, "grad_norm": 0.62109375, "learning_rate": 0.0017397435984568, "loss": 0.2285, "step": 24594 }, { "epoch": 0.0436107409801069, "grad_norm": 0.17578125, "learning_rate": 0.0017397017335646968, "loss": 0.1463, "step": 24596 }, { "epoch": 0.043614287145416715, "grad_norm": 0.337890625, "learning_rate": 0.0017396598658749097, "loss": 0.1731, "step": 24598 }, { "epoch": 0.04361783331072653, "grad_norm": 0.28515625, "learning_rate": 0.0017396179953876217, "loss": 0.1394, "step": 24600 }, { "epoch": 0.043621379476036344, "grad_norm": 0.5234375, "learning_rate": 0.0017395761221030161, "loss": 0.211, "step": 24602 }, { "epoch": 0.04362492564134616, "grad_norm": 0.498046875, "learning_rate": 0.0017395342460212763, "loss": 0.2733, "step": 24604 }, { "epoch": 0.04362847180665597, "grad_norm": 0.578125, "learning_rate": 0.0017394923671425852, "loss": 0.2176, "step": 24606 }, { "epoch": 0.04363201797196579, "grad_norm": 0.53515625, "learning_rate": 0.0017394504854671257, "loss": 0.1652, "step": 24608 }, { "epoch": 0.0436355641372756, "grad_norm": 0.6875, "learning_rate": 0.0017394086009950816, "loss": 0.3754, "step": 24610 }, { "epoch": 0.043639110302585424, "grad_norm": 0.443359375, "learning_rate": 0.0017393667137266353, "loss": 0.3114, "step": 24612 }, { "epoch": 0.04364265646789524, "grad_norm": 0.76171875, "learning_rate": 0.0017393248236619707, "loss": 0.2052, "step": 24614 }, { "epoch": 0.04364620263320505, "grad_norm": 0.345703125, "learning_rate": 0.0017392829308012707, "loss": 0.2849, "step": 24616 }, { "epoch": 0.04364974879851487, "grad_norm": 0.5390625, "learning_rate": 0.0017392410351447184, "loss": 0.3229, "step": 24618 }, { "epoch": 0.04365329496382468, "grad_norm": 0.58203125, "learning_rate": 0.001739199136692497, "loss": 0.2477, "step": 24620 }, { "epoch": 0.0436568411291345, "grad_norm": 0.42578125, "learning_rate": 0.00173915723544479, "loss": 0.2171, "step": 24622 }, { "epoch": 0.04366038729444431, "grad_norm": 0.345703125, "learning_rate": 0.0017391153314017804, "loss": 0.2439, "step": 24624 }, { "epoch": 0.043663933459754126, "grad_norm": 2.609375, "learning_rate": 0.0017390734245636516, "loss": 0.2733, "step": 24626 }, { "epoch": 0.04366747962506394, "grad_norm": 0.380859375, "learning_rate": 0.001739031514930587, "loss": 0.3275, "step": 24628 }, { "epoch": 0.043671025790373755, "grad_norm": 0.890625, "learning_rate": 0.0017389896025027697, "loss": 0.1998, "step": 24630 }, { "epoch": 0.04367457195568357, "grad_norm": 0.369140625, "learning_rate": 0.001738947687280383, "loss": 0.1971, "step": 24632 }, { "epoch": 0.043678118120993384, "grad_norm": 0.8671875, "learning_rate": 0.00173890576926361, "loss": 0.1863, "step": 24634 }, { "epoch": 0.043681664286303205, "grad_norm": 0.515625, "learning_rate": 0.001738863848452635, "loss": 0.1731, "step": 24636 }, { "epoch": 0.04368521045161302, "grad_norm": 0.77734375, "learning_rate": 0.0017388219248476399, "loss": 0.3525, "step": 24638 }, { "epoch": 0.043688756616922834, "grad_norm": 0.640625, "learning_rate": 0.001738779998448809, "loss": 0.197, "step": 24640 }, { "epoch": 0.04369230278223265, "grad_norm": 0.2255859375, "learning_rate": 0.0017387380692563256, "loss": 0.191, "step": 24642 }, { "epoch": 0.043695848947542464, "grad_norm": 0.29296875, "learning_rate": 0.0017386961372703727, "loss": 0.1856, "step": 24644 }, { "epoch": 0.04369939511285228, "grad_norm": 2.609375, "learning_rate": 0.001738654202491134, "loss": 0.263, "step": 24646 }, { "epoch": 0.04370294127816209, "grad_norm": 0.33203125, "learning_rate": 0.0017386122649187927, "loss": 0.1944, "step": 24648 }, { "epoch": 0.04370648744347191, "grad_norm": 0.45703125, "learning_rate": 0.0017385703245535325, "loss": 0.1627, "step": 24650 }, { "epoch": 0.04371003360878172, "grad_norm": 2.28125, "learning_rate": 0.0017385283813955363, "loss": 0.2837, "step": 24652 }, { "epoch": 0.043713579774091536, "grad_norm": 0.52734375, "learning_rate": 0.0017384864354449883, "loss": 0.1947, "step": 24654 }, { "epoch": 0.04371712593940135, "grad_norm": 0.515625, "learning_rate": 0.0017384444867020712, "loss": 0.1553, "step": 24656 }, { "epoch": 0.04372067210471117, "grad_norm": 0.490234375, "learning_rate": 0.0017384025351669687, "loss": 0.3871, "step": 24658 }, { "epoch": 0.04372421827002099, "grad_norm": 0.45703125, "learning_rate": 0.0017383605808398647, "loss": 0.1821, "step": 24660 }, { "epoch": 0.0437277644353308, "grad_norm": 1.109375, "learning_rate": 0.001738318623720942, "loss": 0.2207, "step": 24662 }, { "epoch": 0.043731310600640616, "grad_norm": 0.462890625, "learning_rate": 0.0017382766638103845, "loss": 0.2804, "step": 24664 }, { "epoch": 0.04373485676595043, "grad_norm": 0.5390625, "learning_rate": 0.0017382347011083755, "loss": 0.2181, "step": 24666 }, { "epoch": 0.043738402931260245, "grad_norm": 1.03125, "learning_rate": 0.0017381927356150987, "loss": 0.3116, "step": 24668 }, { "epoch": 0.04374194909657006, "grad_norm": 0.408203125, "learning_rate": 0.001738150767330738, "loss": 0.1624, "step": 24670 }, { "epoch": 0.043745495261879874, "grad_norm": 0.41015625, "learning_rate": 0.001738108796255476, "loss": 0.1911, "step": 24672 }, { "epoch": 0.04374904142718969, "grad_norm": 0.373046875, "learning_rate": 0.0017380668223894971, "loss": 0.1779, "step": 24674 }, { "epoch": 0.0437525875924995, "grad_norm": 0.50390625, "learning_rate": 0.0017380248457329844, "loss": 0.3178, "step": 24676 }, { "epoch": 0.04375613375780932, "grad_norm": 1.9921875, "learning_rate": 0.0017379828662861217, "loss": 0.2463, "step": 24678 }, { "epoch": 0.04375967992311914, "grad_norm": 0.71484375, "learning_rate": 0.0017379408840490927, "loss": 0.1763, "step": 24680 }, { "epoch": 0.043763226088428954, "grad_norm": 1.0859375, "learning_rate": 0.0017378988990220806, "loss": 0.4367, "step": 24682 }, { "epoch": 0.04376677225373877, "grad_norm": 0.77734375, "learning_rate": 0.0017378569112052693, "loss": 0.2131, "step": 24684 }, { "epoch": 0.04377031841904858, "grad_norm": 0.41015625, "learning_rate": 0.0017378149205988422, "loss": 0.2621, "step": 24686 }, { "epoch": 0.0437738645843584, "grad_norm": 0.59765625, "learning_rate": 0.0017377729272029837, "loss": 0.1956, "step": 24688 }, { "epoch": 0.04377741074966821, "grad_norm": 0.78515625, "learning_rate": 0.0017377309310178761, "loss": 0.1683, "step": 24690 }, { "epoch": 0.04378095691497803, "grad_norm": 0.404296875, "learning_rate": 0.0017376889320437044, "loss": 0.2212, "step": 24692 }, { "epoch": 0.04378450308028784, "grad_norm": 0.81640625, "learning_rate": 0.0017376469302806514, "loss": 0.1979, "step": 24694 }, { "epoch": 0.043788049245597656, "grad_norm": 0.54296875, "learning_rate": 0.0017376049257289013, "loss": 0.372, "step": 24696 }, { "epoch": 0.04379159541090747, "grad_norm": 1.03125, "learning_rate": 0.0017375629183886377, "loss": 0.4359, "step": 24698 }, { "epoch": 0.043795141576217285, "grad_norm": 0.84765625, "learning_rate": 0.0017375209082600438, "loss": 0.2125, "step": 24700 }, { "epoch": 0.0437986877415271, "grad_norm": 0.8359375, "learning_rate": 0.0017374788953433043, "loss": 0.2274, "step": 24702 }, { "epoch": 0.04380223390683692, "grad_norm": 0.2734375, "learning_rate": 0.0017374368796386022, "loss": 0.1753, "step": 24704 }, { "epoch": 0.043805780072146736, "grad_norm": 0.38671875, "learning_rate": 0.0017373948611461214, "loss": 0.22, "step": 24706 }, { "epoch": 0.04380932623745655, "grad_norm": 0.96875, "learning_rate": 0.0017373528398660458, "loss": 0.2047, "step": 24708 }, { "epoch": 0.043812872402766365, "grad_norm": 1.171875, "learning_rate": 0.001737310815798559, "loss": 0.2188, "step": 24710 }, { "epoch": 0.04381641856807618, "grad_norm": 0.5390625, "learning_rate": 0.001737268788943845, "loss": 0.2563, "step": 24712 }, { "epoch": 0.043819964733385994, "grad_norm": 0.45703125, "learning_rate": 0.0017372267593020875, "loss": 0.446, "step": 24714 }, { "epoch": 0.04382351089869581, "grad_norm": 0.447265625, "learning_rate": 0.0017371847268734702, "loss": 0.2102, "step": 24716 }, { "epoch": 0.04382705706400562, "grad_norm": 0.337890625, "learning_rate": 0.001737142691658177, "loss": 0.224, "step": 24718 }, { "epoch": 0.04383060322931544, "grad_norm": 0.68359375, "learning_rate": 0.0017371006536563917, "loss": 0.1864, "step": 24720 }, { "epoch": 0.04383414939462525, "grad_norm": 1.34375, "learning_rate": 0.0017370586128682983, "loss": 0.2079, "step": 24722 }, { "epoch": 0.043837695559935067, "grad_norm": 1.0703125, "learning_rate": 0.0017370165692940807, "loss": 0.1818, "step": 24724 }, { "epoch": 0.04384124172524489, "grad_norm": 0.2392578125, "learning_rate": 0.0017369745229339224, "loss": 0.1769, "step": 24726 }, { "epoch": 0.0438447878905547, "grad_norm": 0.41015625, "learning_rate": 0.0017369324737880078, "loss": 0.1988, "step": 24728 }, { "epoch": 0.04384833405586452, "grad_norm": 0.52734375, "learning_rate": 0.0017368904218565205, "loss": 0.1925, "step": 24730 }, { "epoch": 0.04385188022117433, "grad_norm": 0.2392578125, "learning_rate": 0.0017368483671396445, "loss": 0.2958, "step": 24732 }, { "epoch": 0.043855426386484146, "grad_norm": 0.66015625, "learning_rate": 0.0017368063096375634, "loss": 0.1985, "step": 24734 }, { "epoch": 0.04385897255179396, "grad_norm": 0.3671875, "learning_rate": 0.0017367642493504615, "loss": 0.2097, "step": 24736 }, { "epoch": 0.043862518717103775, "grad_norm": 1.734375, "learning_rate": 0.0017367221862785228, "loss": 0.3518, "step": 24738 }, { "epoch": 0.04386606488241359, "grad_norm": 0.3203125, "learning_rate": 0.0017366801204219309, "loss": 0.1801, "step": 24740 }, { "epoch": 0.043869611047723404, "grad_norm": 0.52734375, "learning_rate": 0.0017366380517808702, "loss": 0.2722, "step": 24742 }, { "epoch": 0.04387315721303322, "grad_norm": 0.3515625, "learning_rate": 0.0017365959803555243, "loss": 0.1475, "step": 24744 }, { "epoch": 0.043876703378343034, "grad_norm": 0.482421875, "learning_rate": 0.0017365539061460773, "loss": 0.2005, "step": 24746 }, { "epoch": 0.043880249543652855, "grad_norm": 0.40625, "learning_rate": 0.0017365118291527133, "loss": 0.2047, "step": 24748 }, { "epoch": 0.04388379570896267, "grad_norm": 0.71484375, "learning_rate": 0.0017364697493756165, "loss": 0.1838, "step": 24750 }, { "epoch": 0.043887341874272484, "grad_norm": 0.9765625, "learning_rate": 0.0017364276668149702, "loss": 0.3485, "step": 24752 }, { "epoch": 0.0438908880395823, "grad_norm": 0.6171875, "learning_rate": 0.00173638558147096, "loss": 0.2053, "step": 24754 }, { "epoch": 0.04389443420489211, "grad_norm": 0.1875, "learning_rate": 0.001736343493343768, "loss": 0.2038, "step": 24756 }, { "epoch": 0.04389798037020193, "grad_norm": 1.4375, "learning_rate": 0.0017363014024335795, "loss": 0.2865, "step": 24758 }, { "epoch": 0.04390152653551174, "grad_norm": 0.337890625, "learning_rate": 0.001736259308740578, "loss": 0.2944, "step": 24760 }, { "epoch": 0.04390507270082156, "grad_norm": 1.8359375, "learning_rate": 0.0017362172122649484, "loss": 0.1514, "step": 24762 }, { "epoch": 0.04390861886613137, "grad_norm": 1.109375, "learning_rate": 0.001736175113006874, "loss": 0.1965, "step": 24764 }, { "epoch": 0.043912165031441186, "grad_norm": 5.46875, "learning_rate": 0.0017361330109665393, "loss": 0.1965, "step": 24766 }, { "epoch": 0.043915711196751, "grad_norm": 0.341796875, "learning_rate": 0.0017360909061441279, "loss": 0.2025, "step": 24768 }, { "epoch": 0.043919257362060815, "grad_norm": 0.77734375, "learning_rate": 0.0017360487985398248, "loss": 0.214, "step": 24770 }, { "epoch": 0.04392280352737064, "grad_norm": 0.46875, "learning_rate": 0.0017360066881538135, "loss": 0.1446, "step": 24772 }, { "epoch": 0.04392634969268045, "grad_norm": 0.703125, "learning_rate": 0.0017359645749862784, "loss": 0.201, "step": 24774 }, { "epoch": 0.043929895857990266, "grad_norm": 1.0078125, "learning_rate": 0.0017359224590374036, "loss": 0.2279, "step": 24776 }, { "epoch": 0.04393344202330008, "grad_norm": 0.298828125, "learning_rate": 0.0017358803403073736, "loss": 0.2437, "step": 24778 }, { "epoch": 0.043936988188609895, "grad_norm": 0.251953125, "learning_rate": 0.0017358382187963725, "loss": 0.207, "step": 24780 }, { "epoch": 0.04394053435391971, "grad_norm": 0.361328125, "learning_rate": 0.0017357960945045836, "loss": 0.1772, "step": 24782 }, { "epoch": 0.043944080519229524, "grad_norm": 0.7890625, "learning_rate": 0.0017357539674321928, "loss": 0.1945, "step": 24784 }, { "epoch": 0.04394762668453934, "grad_norm": 0.41796875, "learning_rate": 0.0017357118375793828, "loss": 0.2121, "step": 24786 }, { "epoch": 0.04395117284984915, "grad_norm": 0.2578125, "learning_rate": 0.0017356697049463385, "loss": 0.1861, "step": 24788 }, { "epoch": 0.04395471901515897, "grad_norm": 0.298828125, "learning_rate": 0.0017356275695332444, "loss": 0.2258, "step": 24790 }, { "epoch": 0.04395826518046878, "grad_norm": 0.74609375, "learning_rate": 0.0017355854313402849, "loss": 0.2314, "step": 24792 }, { "epoch": 0.043961811345778604, "grad_norm": 1.2734375, "learning_rate": 0.0017355432903676433, "loss": 0.243, "step": 24794 }, { "epoch": 0.04396535751108842, "grad_norm": 0.609375, "learning_rate": 0.0017355011466155049, "loss": 0.2536, "step": 24796 }, { "epoch": 0.04396890367639823, "grad_norm": 0.482421875, "learning_rate": 0.0017354590000840532, "loss": 0.1508, "step": 24798 }, { "epoch": 0.04397244984170805, "grad_norm": 0.310546875, "learning_rate": 0.0017354168507734734, "loss": 0.1553, "step": 24800 }, { "epoch": 0.04397599600701786, "grad_norm": 2.390625, "learning_rate": 0.0017353746986839488, "loss": 0.1828, "step": 24802 }, { "epoch": 0.043979542172327676, "grad_norm": 0.515625, "learning_rate": 0.001735332543815665, "loss": 0.2022, "step": 24804 }, { "epoch": 0.04398308833763749, "grad_norm": 0.59765625, "learning_rate": 0.0017352903861688055, "loss": 0.1923, "step": 24806 }, { "epoch": 0.043986634502947305, "grad_norm": 0.3046875, "learning_rate": 0.001735248225743555, "loss": 0.2063, "step": 24808 }, { "epoch": 0.04399018066825712, "grad_norm": 0.248046875, "learning_rate": 0.0017352060625400972, "loss": 0.1802, "step": 24810 }, { "epoch": 0.043993726833566935, "grad_norm": 1.4921875, "learning_rate": 0.0017351638965586173, "loss": 0.2478, "step": 24812 }, { "epoch": 0.04399727299887675, "grad_norm": 0.6875, "learning_rate": 0.0017351217277992998, "loss": 0.1826, "step": 24814 }, { "epoch": 0.04400081916418657, "grad_norm": 0.59765625, "learning_rate": 0.0017350795562623286, "loss": 0.2102, "step": 24816 }, { "epoch": 0.044004365329496385, "grad_norm": 0.50390625, "learning_rate": 0.0017350373819478883, "loss": 0.231, "step": 24818 }, { "epoch": 0.0440079114948062, "grad_norm": 0.447265625, "learning_rate": 0.0017349952048561633, "loss": 0.2083, "step": 24820 }, { "epoch": 0.044011457660116014, "grad_norm": 0.423828125, "learning_rate": 0.001734953024987338, "loss": 0.2702, "step": 24822 }, { "epoch": 0.04401500382542583, "grad_norm": 0.78515625, "learning_rate": 0.0017349108423415974, "loss": 0.2296, "step": 24824 }, { "epoch": 0.04401854999073564, "grad_norm": 0.2734375, "learning_rate": 0.0017348686569191253, "loss": 0.1744, "step": 24826 }, { "epoch": 0.04402209615604546, "grad_norm": 0.6875, "learning_rate": 0.0017348264687201062, "loss": 0.3019, "step": 24828 }, { "epoch": 0.04402564232135527, "grad_norm": 1.796875, "learning_rate": 0.0017347842777447253, "loss": 0.2328, "step": 24830 }, { "epoch": 0.04402918848666509, "grad_norm": 0.69921875, "learning_rate": 0.0017347420839931666, "loss": 0.28, "step": 24832 }, { "epoch": 0.0440327346519749, "grad_norm": 1.1171875, "learning_rate": 0.0017346998874656148, "loss": 0.212, "step": 24834 }, { "epoch": 0.044036280817284716, "grad_norm": 3.234375, "learning_rate": 0.0017346576881622543, "loss": 0.2375, "step": 24836 }, { "epoch": 0.04403982698259453, "grad_norm": 0.2333984375, "learning_rate": 0.0017346154860832697, "loss": 0.1886, "step": 24838 }, { "epoch": 0.04404337314790435, "grad_norm": 0.76953125, "learning_rate": 0.0017345732812288454, "loss": 0.5268, "step": 24840 }, { "epoch": 0.04404691931321417, "grad_norm": 0.470703125, "learning_rate": 0.0017345310735991664, "loss": 0.1705, "step": 24842 }, { "epoch": 0.04405046547852398, "grad_norm": 0.890625, "learning_rate": 0.0017344888631944172, "loss": 0.2968, "step": 24844 }, { "epoch": 0.044054011643833796, "grad_norm": 0.5703125, "learning_rate": 0.001734446650014782, "loss": 0.1989, "step": 24846 }, { "epoch": 0.04405755780914361, "grad_norm": 0.734375, "learning_rate": 0.001734404434060446, "loss": 0.2312, "step": 24848 }, { "epoch": 0.044061103974453425, "grad_norm": 0.76171875, "learning_rate": 0.001734362215331593, "loss": 0.1963, "step": 24850 }, { "epoch": 0.04406465013976324, "grad_norm": 0.66796875, "learning_rate": 0.0017343199938284085, "loss": 0.2594, "step": 24852 }, { "epoch": 0.044068196305073054, "grad_norm": 0.330078125, "learning_rate": 0.0017342777695510767, "loss": 0.2067, "step": 24854 }, { "epoch": 0.04407174247038287, "grad_norm": 0.75, "learning_rate": 0.0017342355424997825, "loss": 0.17, "step": 24856 }, { "epoch": 0.04407528863569268, "grad_norm": 0.390625, "learning_rate": 0.0017341933126747103, "loss": 0.1956, "step": 24858 }, { "epoch": 0.0440788348010025, "grad_norm": 0.306640625, "learning_rate": 0.0017341510800760447, "loss": 0.2202, "step": 24860 }, { "epoch": 0.04408238096631232, "grad_norm": 0.28125, "learning_rate": 0.001734108844703971, "loss": 0.1989, "step": 24862 }, { "epoch": 0.044085927131622134, "grad_norm": 0.341796875, "learning_rate": 0.0017340666065586736, "loss": 0.1902, "step": 24864 }, { "epoch": 0.04408947329693195, "grad_norm": 2.453125, "learning_rate": 0.001734024365640337, "loss": 0.2089, "step": 24866 }, { "epoch": 0.04409301946224176, "grad_norm": 0.328125, "learning_rate": 0.0017339821219491463, "loss": 0.1732, "step": 24868 }, { "epoch": 0.04409656562755158, "grad_norm": 2.015625, "learning_rate": 0.001733939875485286, "loss": 0.242, "step": 24870 }, { "epoch": 0.04410011179286139, "grad_norm": 0.46875, "learning_rate": 0.0017338976262489406, "loss": 0.1847, "step": 24872 }, { "epoch": 0.04410365795817121, "grad_norm": 0.53125, "learning_rate": 0.0017338553742402953, "loss": 0.2285, "step": 24874 }, { "epoch": 0.04410720412348102, "grad_norm": 0.2451171875, "learning_rate": 0.001733813119459535, "loss": 0.168, "step": 24876 }, { "epoch": 0.044110750288790836, "grad_norm": 0.7421875, "learning_rate": 0.0017337708619068444, "loss": 0.1757, "step": 24878 }, { "epoch": 0.04411429645410065, "grad_norm": 0.30859375, "learning_rate": 0.0017337286015824077, "loss": 0.1575, "step": 24880 }, { "epoch": 0.044117842619410465, "grad_norm": 0.59765625, "learning_rate": 0.0017336863384864105, "loss": 0.2938, "step": 24882 }, { "epoch": 0.044121388784720286, "grad_norm": 1.09375, "learning_rate": 0.0017336440726190374, "loss": 0.2164, "step": 24884 }, { "epoch": 0.0441249349500301, "grad_norm": 0.8046875, "learning_rate": 0.0017336018039804733, "loss": 0.2116, "step": 24886 }, { "epoch": 0.044128481115339915, "grad_norm": 0.6328125, "learning_rate": 0.0017335595325709025, "loss": 0.183, "step": 24888 }, { "epoch": 0.04413202728064973, "grad_norm": 1.5546875, "learning_rate": 0.0017335172583905106, "loss": 0.1581, "step": 24890 }, { "epoch": 0.044135573445959544, "grad_norm": 1.0390625, "learning_rate": 0.0017334749814394822, "loss": 0.1948, "step": 24892 }, { "epoch": 0.04413911961126936, "grad_norm": 1.0078125, "learning_rate": 0.0017334327017180025, "loss": 0.3382, "step": 24894 }, { "epoch": 0.044142665776579174, "grad_norm": 4.8125, "learning_rate": 0.0017333904192262557, "loss": 0.2989, "step": 24896 }, { "epoch": 0.04414621194188899, "grad_norm": 0.255859375, "learning_rate": 0.0017333481339644272, "loss": 0.178, "step": 24898 }, { "epoch": 0.0441497581071988, "grad_norm": 0.396484375, "learning_rate": 0.0017333058459327018, "loss": 0.184, "step": 24900 }, { "epoch": 0.04415330427250862, "grad_norm": 0.392578125, "learning_rate": 0.0017332635551312646, "loss": 0.2037, "step": 24902 }, { "epoch": 0.04415685043781843, "grad_norm": 0.36328125, "learning_rate": 0.0017332212615603002, "loss": 0.2542, "step": 24904 }, { "epoch": 0.044160396603128246, "grad_norm": 0.32421875, "learning_rate": 0.0017331789652199941, "loss": 0.1737, "step": 24906 }, { "epoch": 0.04416394276843807, "grad_norm": 1.765625, "learning_rate": 0.001733136666110531, "loss": 0.2466, "step": 24908 }, { "epoch": 0.04416748893374788, "grad_norm": 0.6328125, "learning_rate": 0.001733094364232096, "loss": 0.2467, "step": 24910 }, { "epoch": 0.0441710350990577, "grad_norm": 0.84765625, "learning_rate": 0.0017330520595848736, "loss": 0.2927, "step": 24912 }, { "epoch": 0.04417458126436751, "grad_norm": 0.78125, "learning_rate": 0.0017330097521690497, "loss": 0.2484, "step": 24914 }, { "epoch": 0.044178127429677326, "grad_norm": 0.73046875, "learning_rate": 0.0017329674419848085, "loss": 0.2698, "step": 24916 }, { "epoch": 0.04418167359498714, "grad_norm": 0.5546875, "learning_rate": 0.0017329251290323353, "loss": 0.2154, "step": 24918 }, { "epoch": 0.044185219760296955, "grad_norm": 0.357421875, "learning_rate": 0.0017328828133118153, "loss": 0.3464, "step": 24920 }, { "epoch": 0.04418876592560677, "grad_norm": 0.51953125, "learning_rate": 0.0017328404948234338, "loss": 0.1822, "step": 24922 }, { "epoch": 0.044192312090916584, "grad_norm": 0.6015625, "learning_rate": 0.0017327981735673756, "loss": 0.2624, "step": 24924 }, { "epoch": 0.0441958582562264, "grad_norm": 2.609375, "learning_rate": 0.0017327558495438254, "loss": 0.1726, "step": 24926 }, { "epoch": 0.04419940442153621, "grad_norm": 0.2431640625, "learning_rate": 0.0017327135227529684, "loss": 0.253, "step": 24928 }, { "epoch": 0.044202950586846035, "grad_norm": 0.59375, "learning_rate": 0.0017326711931949905, "loss": 0.1939, "step": 24930 }, { "epoch": 0.04420649675215585, "grad_norm": 0.498046875, "learning_rate": 0.0017326288608700761, "loss": 0.2277, "step": 24932 }, { "epoch": 0.044210042917465664, "grad_norm": 0.609375, "learning_rate": 0.0017325865257784108, "loss": 0.1929, "step": 24934 }, { "epoch": 0.04421358908277548, "grad_norm": 0.55859375, "learning_rate": 0.0017325441879201793, "loss": 0.1885, "step": 24936 }, { "epoch": 0.04421713524808529, "grad_norm": 0.314453125, "learning_rate": 0.0017325018472955668, "loss": 0.2081, "step": 24938 }, { "epoch": 0.04422068141339511, "grad_norm": 0.3828125, "learning_rate": 0.0017324595039047588, "loss": 0.153, "step": 24940 }, { "epoch": 0.04422422757870492, "grad_norm": 0.51171875, "learning_rate": 0.0017324171577479403, "loss": 0.2743, "step": 24942 }, { "epoch": 0.04422777374401474, "grad_norm": 0.390625, "learning_rate": 0.0017323748088252965, "loss": 0.1951, "step": 24944 }, { "epoch": 0.04423131990932455, "grad_norm": 0.52734375, "learning_rate": 0.0017323324571370125, "loss": 0.2211, "step": 24946 }, { "epoch": 0.044234866074634366, "grad_norm": 0.4609375, "learning_rate": 0.0017322901026832736, "loss": 0.2157, "step": 24948 }, { "epoch": 0.04423841223994418, "grad_norm": 0.20703125, "learning_rate": 0.0017322477454642653, "loss": 0.1773, "step": 24950 }, { "epoch": 0.044241958405254, "grad_norm": 0.27734375, "learning_rate": 0.0017322053854801724, "loss": 0.1846, "step": 24952 }, { "epoch": 0.044245504570563816, "grad_norm": 0.609375, "learning_rate": 0.0017321630227311803, "loss": 0.2066, "step": 24954 }, { "epoch": 0.04424905073587363, "grad_norm": 0.333984375, "learning_rate": 0.001732120657217475, "loss": 0.2046, "step": 24956 }, { "epoch": 0.044252596901183446, "grad_norm": 0.62109375, "learning_rate": 0.0017320782889392403, "loss": 0.2003, "step": 24958 }, { "epoch": 0.04425614306649326, "grad_norm": 0.9609375, "learning_rate": 0.0017320359178966628, "loss": 0.4127, "step": 24960 }, { "epoch": 0.044259689231803075, "grad_norm": 0.380859375, "learning_rate": 0.0017319935440899272, "loss": 0.1933, "step": 24962 }, { "epoch": 0.04426323539711289, "grad_norm": 0.6015625, "learning_rate": 0.0017319511675192188, "loss": 0.1809, "step": 24964 }, { "epoch": 0.044266781562422704, "grad_norm": 0.5390625, "learning_rate": 0.001731908788184723, "loss": 0.1682, "step": 24966 }, { "epoch": 0.04427032772773252, "grad_norm": 0.462890625, "learning_rate": 0.0017318664060866256, "loss": 0.2287, "step": 24968 }, { "epoch": 0.04427387389304233, "grad_norm": 2.5, "learning_rate": 0.0017318240212251116, "loss": 0.2996, "step": 24970 }, { "epoch": 0.04427742005835215, "grad_norm": 0.357421875, "learning_rate": 0.001731781633600366, "loss": 0.1233, "step": 24972 }, { "epoch": 0.04428096622366196, "grad_norm": 1.25, "learning_rate": 0.0017317392432125746, "loss": 0.2858, "step": 24974 }, { "epoch": 0.04428451238897178, "grad_norm": 0.2421875, "learning_rate": 0.0017316968500619228, "loss": 0.1561, "step": 24976 }, { "epoch": 0.0442880585542816, "grad_norm": 0.41015625, "learning_rate": 0.0017316544541485957, "loss": 0.23, "step": 24978 }, { "epoch": 0.04429160471959141, "grad_norm": 1.6953125, "learning_rate": 0.0017316120554727791, "loss": 0.2432, "step": 24980 }, { "epoch": 0.04429515088490123, "grad_norm": 1.546875, "learning_rate": 0.0017315696540346583, "loss": 0.2294, "step": 24982 }, { "epoch": 0.04429869705021104, "grad_norm": 1.1796875, "learning_rate": 0.0017315272498344186, "loss": 0.1983, "step": 24984 }, { "epoch": 0.044302243215520856, "grad_norm": 0.416015625, "learning_rate": 0.0017314848428722457, "loss": 0.2116, "step": 24986 }, { "epoch": 0.04430578938083067, "grad_norm": 0.412109375, "learning_rate": 0.0017314424331483249, "loss": 0.1912, "step": 24988 }, { "epoch": 0.044309335546140485, "grad_norm": 0.2431640625, "learning_rate": 0.0017314000206628414, "loss": 0.2261, "step": 24990 }, { "epoch": 0.0443128817114503, "grad_norm": 1.3046875, "learning_rate": 0.0017313576054159812, "loss": 0.3155, "step": 24992 }, { "epoch": 0.044316427876760114, "grad_norm": 0.4140625, "learning_rate": 0.0017313151874079294, "loss": 0.3077, "step": 24994 }, { "epoch": 0.04431997404206993, "grad_norm": 0.2578125, "learning_rate": 0.0017312727666388716, "loss": 0.2133, "step": 24996 }, { "epoch": 0.04432352020737975, "grad_norm": 0.466796875, "learning_rate": 0.001731230343108994, "loss": 0.2026, "step": 24998 }, { "epoch": 0.044327066372689565, "grad_norm": 0.6015625, "learning_rate": 0.0017311879168184813, "loss": 0.1502, "step": 25000 }, { "epoch": 0.04433061253799938, "grad_norm": 0.63671875, "learning_rate": 0.0017311454877675188, "loss": 0.203, "step": 25002 }, { "epoch": 0.044334158703309194, "grad_norm": 1.5390625, "learning_rate": 0.0017311030559562929, "loss": 0.4451, "step": 25004 }, { "epoch": 0.04433770486861901, "grad_norm": 0.74609375, "learning_rate": 0.0017310606213849886, "loss": 0.181, "step": 25006 }, { "epoch": 0.04434125103392882, "grad_norm": 0.65234375, "learning_rate": 0.001731018184053792, "loss": 0.2011, "step": 25008 }, { "epoch": 0.04434479719923864, "grad_norm": 0.310546875, "learning_rate": 0.0017309757439628883, "loss": 0.18, "step": 25010 }, { "epoch": 0.04434834336454845, "grad_norm": 1.6796875, "learning_rate": 0.0017309333011124633, "loss": 0.4138, "step": 25012 }, { "epoch": 0.04435188952985827, "grad_norm": 0.54296875, "learning_rate": 0.0017308908555027022, "loss": 0.2245, "step": 25014 }, { "epoch": 0.04435543569516808, "grad_norm": 0.59765625, "learning_rate": 0.0017308484071337914, "loss": 0.2158, "step": 25016 }, { "epoch": 0.044358981860477896, "grad_norm": 0.5625, "learning_rate": 0.001730805956005916, "loss": 0.2051, "step": 25018 }, { "epoch": 0.04436252802578772, "grad_norm": 0.61328125, "learning_rate": 0.0017307635021192617, "loss": 0.2866, "step": 25020 }, { "epoch": 0.04436607419109753, "grad_norm": 0.77734375, "learning_rate": 0.001730721045474014, "loss": 0.2846, "step": 25022 }, { "epoch": 0.04436962035640735, "grad_norm": 0.625, "learning_rate": 0.0017306785860703594, "loss": 0.1775, "step": 25024 }, { "epoch": 0.04437316652171716, "grad_norm": 0.3359375, "learning_rate": 0.0017306361239084822, "loss": 0.1851, "step": 25026 }, { "epoch": 0.044376712687026976, "grad_norm": 1.2265625, "learning_rate": 0.0017305936589885693, "loss": 0.2471, "step": 25028 }, { "epoch": 0.04438025885233679, "grad_norm": 0.36328125, "learning_rate": 0.0017305511913108063, "loss": 0.2088, "step": 25030 }, { "epoch": 0.044383805017646605, "grad_norm": 0.57421875, "learning_rate": 0.0017305087208753782, "loss": 0.1501, "step": 25032 }, { "epoch": 0.04438735118295642, "grad_norm": 0.65234375, "learning_rate": 0.0017304662476824715, "loss": 0.4587, "step": 25034 }, { "epoch": 0.044390897348266234, "grad_norm": 0.8671875, "learning_rate": 0.0017304237717322717, "loss": 0.2408, "step": 25036 }, { "epoch": 0.04439444351357605, "grad_norm": 1.515625, "learning_rate": 0.0017303812930249643, "loss": 0.2906, "step": 25038 }, { "epoch": 0.04439798967888586, "grad_norm": 0.73828125, "learning_rate": 0.0017303388115607354, "loss": 0.4104, "step": 25040 }, { "epoch": 0.04440153584419568, "grad_norm": 0.478515625, "learning_rate": 0.0017302963273397706, "loss": 0.1979, "step": 25042 }, { "epoch": 0.0444050820095055, "grad_norm": 0.498046875, "learning_rate": 0.001730253840362256, "loss": 0.2338, "step": 25044 }, { "epoch": 0.044408628174815314, "grad_norm": 0.423828125, "learning_rate": 0.0017302113506283772, "loss": 0.2185, "step": 25046 }, { "epoch": 0.04441217434012513, "grad_norm": 0.546875, "learning_rate": 0.0017301688581383198, "loss": 0.2663, "step": 25048 }, { "epoch": 0.04441572050543494, "grad_norm": 0.83203125, "learning_rate": 0.0017301263628922699, "loss": 0.2094, "step": 25050 }, { "epoch": 0.04441926667074476, "grad_norm": 0.2333984375, "learning_rate": 0.001730083864890413, "loss": 0.1794, "step": 25052 }, { "epoch": 0.04442281283605457, "grad_norm": 0.9453125, "learning_rate": 0.0017300413641329357, "loss": 0.244, "step": 25054 }, { "epoch": 0.044426359001364386, "grad_norm": 0.6953125, "learning_rate": 0.0017299988606200233, "loss": 0.2548, "step": 25056 }, { "epoch": 0.0444299051666742, "grad_norm": 9.25, "learning_rate": 0.0017299563543518622, "loss": 0.3135, "step": 25058 }, { "epoch": 0.044433451331984015, "grad_norm": 1.03125, "learning_rate": 0.0017299138453286376, "loss": 0.2566, "step": 25060 }, { "epoch": 0.04443699749729383, "grad_norm": 0.302734375, "learning_rate": 0.0017298713335505357, "loss": 0.2064, "step": 25062 }, { "epoch": 0.044440543662603645, "grad_norm": 1.0234375, "learning_rate": 0.0017298288190177424, "loss": 0.2721, "step": 25064 }, { "epoch": 0.044444089827913466, "grad_norm": 0.287109375, "learning_rate": 0.0017297863017304439, "loss": 0.1798, "step": 25066 }, { "epoch": 0.04444763599322328, "grad_norm": 0.404296875, "learning_rate": 0.0017297437816888258, "loss": 0.2137, "step": 25068 }, { "epoch": 0.044451182158533095, "grad_norm": 0.71875, "learning_rate": 0.0017297012588930745, "loss": 0.3298, "step": 25070 }, { "epoch": 0.04445472832384291, "grad_norm": 0.330078125, "learning_rate": 0.0017296587333433752, "loss": 0.1751, "step": 25072 }, { "epoch": 0.044458274489152724, "grad_norm": 0.318359375, "learning_rate": 0.0017296162050399147, "loss": 0.1794, "step": 25074 }, { "epoch": 0.04446182065446254, "grad_norm": 0.7265625, "learning_rate": 0.0017295736739828783, "loss": 0.1732, "step": 25076 }, { "epoch": 0.04446536681977235, "grad_norm": 0.87109375, "learning_rate": 0.0017295311401724527, "loss": 0.205, "step": 25078 }, { "epoch": 0.04446891298508217, "grad_norm": 0.490234375, "learning_rate": 0.0017294886036088232, "loss": 0.1551, "step": 25080 }, { "epoch": 0.04447245915039198, "grad_norm": 0.35546875, "learning_rate": 0.0017294460642921766, "loss": 0.2524, "step": 25082 }, { "epoch": 0.0444760053157018, "grad_norm": 1.0859375, "learning_rate": 0.0017294035222226983, "loss": 0.1488, "step": 25084 }, { "epoch": 0.04447955148101161, "grad_norm": 0.76171875, "learning_rate": 0.0017293609774005746, "loss": 0.2564, "step": 25086 }, { "epoch": 0.04448309764632143, "grad_norm": 0.50390625, "learning_rate": 0.0017293184298259917, "loss": 0.2283, "step": 25088 }, { "epoch": 0.04448664381163125, "grad_norm": 0.5390625, "learning_rate": 0.0017292758794991354, "loss": 0.1896, "step": 25090 }, { "epoch": 0.04449018997694106, "grad_norm": 0.6640625, "learning_rate": 0.001729233326420192, "loss": 0.1801, "step": 25092 }, { "epoch": 0.04449373614225088, "grad_norm": 0.41015625, "learning_rate": 0.0017291907705893478, "loss": 0.2594, "step": 25094 }, { "epoch": 0.04449728230756069, "grad_norm": 1.234375, "learning_rate": 0.0017291482120067878, "loss": 0.3557, "step": 25096 }, { "epoch": 0.044500828472870506, "grad_norm": 0.5390625, "learning_rate": 0.0017291056506726998, "loss": 0.1286, "step": 25098 }, { "epoch": 0.04450437463818032, "grad_norm": 0.251953125, "learning_rate": 0.001729063086587269, "loss": 0.1593, "step": 25100 }, { "epoch": 0.044507920803490135, "grad_norm": 0.609375, "learning_rate": 0.0017290205197506815, "loss": 0.1626, "step": 25102 }, { "epoch": 0.04451146696879995, "grad_norm": 0.515625, "learning_rate": 0.0017289779501631238, "loss": 0.2343, "step": 25104 }, { "epoch": 0.044515013134109764, "grad_norm": 0.73046875, "learning_rate": 0.0017289353778247815, "loss": 0.2718, "step": 25106 }, { "epoch": 0.04451855929941958, "grad_norm": 1.9453125, "learning_rate": 0.0017288928027358412, "loss": 0.2221, "step": 25108 }, { "epoch": 0.04452210546472939, "grad_norm": 0.341796875, "learning_rate": 0.0017288502248964895, "loss": 0.1897, "step": 25110 }, { "epoch": 0.044525651630039215, "grad_norm": 0.91015625, "learning_rate": 0.001728807644306912, "loss": 0.2313, "step": 25112 }, { "epoch": 0.04452919779534903, "grad_norm": 0.71484375, "learning_rate": 0.0017287650609672952, "loss": 0.2257, "step": 25114 }, { "epoch": 0.044532743960658844, "grad_norm": 0.3984375, "learning_rate": 0.001728722474877825, "loss": 0.2012, "step": 25116 }, { "epoch": 0.04453629012596866, "grad_norm": 1.28125, "learning_rate": 0.001728679886038688, "loss": 0.1532, "step": 25118 }, { "epoch": 0.04453983629127847, "grad_norm": 1.203125, "learning_rate": 0.0017286372944500708, "loss": 0.2037, "step": 25120 }, { "epoch": 0.04454338245658829, "grad_norm": 2.515625, "learning_rate": 0.0017285947001121587, "loss": 0.2852, "step": 25122 }, { "epoch": 0.0445469286218981, "grad_norm": 1.3203125, "learning_rate": 0.0017285521030251387, "loss": 0.2982, "step": 25124 }, { "epoch": 0.04455047478720792, "grad_norm": 0.294921875, "learning_rate": 0.0017285095031891967, "loss": 0.1718, "step": 25126 }, { "epoch": 0.04455402095251773, "grad_norm": 0.7421875, "learning_rate": 0.0017284669006045195, "loss": 0.2023, "step": 25128 }, { "epoch": 0.044557567117827546, "grad_norm": 0.67578125, "learning_rate": 0.001728424295271293, "loss": 0.2458, "step": 25130 }, { "epoch": 0.04456111328313736, "grad_norm": 0.60546875, "learning_rate": 0.001728381687189704, "loss": 0.1828, "step": 25132 }, { "epoch": 0.04456465944844718, "grad_norm": 0.42578125, "learning_rate": 0.001728339076359938, "loss": 0.1949, "step": 25134 }, { "epoch": 0.044568205613756996, "grad_norm": 0.37890625, "learning_rate": 0.001728296462782182, "loss": 0.2538, "step": 25136 }, { "epoch": 0.04457175177906681, "grad_norm": 0.5703125, "learning_rate": 0.0017282538464566224, "loss": 0.1863, "step": 25138 }, { "epoch": 0.044575297944376625, "grad_norm": 1.109375, "learning_rate": 0.0017282112273834453, "loss": 0.291, "step": 25140 }, { "epoch": 0.04457884410968644, "grad_norm": 0.94140625, "learning_rate": 0.0017281686055628373, "loss": 0.2358, "step": 25142 }, { "epoch": 0.044582390274996254, "grad_norm": 0.48046875, "learning_rate": 0.0017281259809949845, "loss": 0.1753, "step": 25144 }, { "epoch": 0.04458593644030607, "grad_norm": 0.39453125, "learning_rate": 0.0017280833536800738, "loss": 0.3093, "step": 25146 }, { "epoch": 0.044589482605615884, "grad_norm": 0.515625, "learning_rate": 0.001728040723618291, "loss": 0.2193, "step": 25148 }, { "epoch": 0.0445930287709257, "grad_norm": 0.53515625, "learning_rate": 0.001727998090809823, "loss": 0.2426, "step": 25150 }, { "epoch": 0.04459657493623551, "grad_norm": 1.09375, "learning_rate": 0.0017279554552548564, "loss": 0.245, "step": 25152 }, { "epoch": 0.04460012110154533, "grad_norm": 0.462890625, "learning_rate": 0.0017279128169535771, "loss": 0.1978, "step": 25154 }, { "epoch": 0.04460366726685515, "grad_norm": 1.1171875, "learning_rate": 0.0017278701759061718, "loss": 0.148, "step": 25156 }, { "epoch": 0.04460721343216496, "grad_norm": 0.9765625, "learning_rate": 0.0017278275321128275, "loss": 0.3347, "step": 25158 }, { "epoch": 0.04461075959747478, "grad_norm": 0.470703125, "learning_rate": 0.00172778488557373, "loss": 0.2355, "step": 25160 }, { "epoch": 0.04461430576278459, "grad_norm": 1.0, "learning_rate": 0.0017277422362890658, "loss": 0.2109, "step": 25162 }, { "epoch": 0.04461785192809441, "grad_norm": 0.73046875, "learning_rate": 0.001727699584259022, "loss": 0.2239, "step": 25164 }, { "epoch": 0.04462139809340422, "grad_norm": 2.265625, "learning_rate": 0.001727656929483785, "loss": 0.3557, "step": 25166 }, { "epoch": 0.044624944258714036, "grad_norm": 0.46484375, "learning_rate": 0.001727614271963541, "loss": 0.2334, "step": 25168 }, { "epoch": 0.04462849042402385, "grad_norm": 0.423828125, "learning_rate": 0.0017275716116984766, "loss": 0.1854, "step": 25170 }, { "epoch": 0.044632036589333665, "grad_norm": 0.37109375, "learning_rate": 0.0017275289486887787, "loss": 0.169, "step": 25172 }, { "epoch": 0.04463558275464348, "grad_norm": 0.78515625, "learning_rate": 0.0017274862829346335, "loss": 0.2193, "step": 25174 }, { "epoch": 0.044639128919953294, "grad_norm": 0.3046875, "learning_rate": 0.001727443614436228, "loss": 0.1657, "step": 25176 }, { "epoch": 0.04464267508526311, "grad_norm": 0.357421875, "learning_rate": 0.0017274009431937484, "loss": 0.2131, "step": 25178 }, { "epoch": 0.04464622125057293, "grad_norm": 1.171875, "learning_rate": 0.0017273582692073817, "loss": 0.2232, "step": 25180 }, { "epoch": 0.044649767415882745, "grad_norm": 0.23828125, "learning_rate": 0.001727315592477314, "loss": 0.2346, "step": 25182 }, { "epoch": 0.04465331358119256, "grad_norm": 0.640625, "learning_rate": 0.0017272729130037325, "loss": 0.23, "step": 25184 }, { "epoch": 0.044656859746502374, "grad_norm": 0.3515625, "learning_rate": 0.0017272302307868236, "loss": 0.1772, "step": 25186 }, { "epoch": 0.04466040591181219, "grad_norm": 1.1796875, "learning_rate": 0.0017271875458267742, "loss": 0.1607, "step": 25188 }, { "epoch": 0.044663952077122, "grad_norm": 1.28125, "learning_rate": 0.0017271448581237707, "loss": 0.1981, "step": 25190 }, { "epoch": 0.04466749824243182, "grad_norm": 0.294921875, "learning_rate": 0.0017271021676779998, "loss": 0.1789, "step": 25192 }, { "epoch": 0.04467104440774163, "grad_norm": 2.375, "learning_rate": 0.0017270594744896485, "loss": 0.1912, "step": 25194 }, { "epoch": 0.04467459057305145, "grad_norm": 0.95703125, "learning_rate": 0.0017270167785589027, "loss": 0.2657, "step": 25196 }, { "epoch": 0.04467813673836126, "grad_norm": 0.296875, "learning_rate": 0.0017269740798859503, "loss": 0.2955, "step": 25198 }, { "epoch": 0.044681682903671076, "grad_norm": 0.40625, "learning_rate": 0.0017269313784709776, "loss": 0.2702, "step": 25200 }, { "epoch": 0.0446852290689809, "grad_norm": 0.330078125, "learning_rate": 0.0017268886743141707, "loss": 0.2156, "step": 25202 }, { "epoch": 0.04468877523429071, "grad_norm": 0.376953125, "learning_rate": 0.001726845967415717, "loss": 0.1633, "step": 25204 }, { "epoch": 0.044692321399600526, "grad_norm": 0.7578125, "learning_rate": 0.0017268032577758036, "loss": 0.2501, "step": 25206 }, { "epoch": 0.04469586756491034, "grad_norm": 0.6171875, "learning_rate": 0.0017267605453946163, "loss": 0.2585, "step": 25208 }, { "epoch": 0.044699413730220156, "grad_norm": 1.5546875, "learning_rate": 0.0017267178302723429, "loss": 0.2442, "step": 25210 }, { "epoch": 0.04470295989552997, "grad_norm": 0.5546875, "learning_rate": 0.001726675112409169, "loss": 0.2002, "step": 25212 }, { "epoch": 0.044706506060839785, "grad_norm": 0.8828125, "learning_rate": 0.001726632391805283, "loss": 0.1982, "step": 25214 }, { "epoch": 0.0447100522261496, "grad_norm": 0.74609375, "learning_rate": 0.0017265896684608704, "loss": 0.2583, "step": 25216 }, { "epoch": 0.044713598391459414, "grad_norm": 2.671875, "learning_rate": 0.0017265469423761186, "loss": 0.4628, "step": 25218 }, { "epoch": 0.04471714455676923, "grad_norm": 0.361328125, "learning_rate": 0.0017265042135512147, "loss": 0.1803, "step": 25220 }, { "epoch": 0.04472069072207904, "grad_norm": 0.61328125, "learning_rate": 0.001726461481986345, "loss": 0.3379, "step": 25222 }, { "epoch": 0.044724236887388864, "grad_norm": 0.455078125, "learning_rate": 0.001726418747681697, "loss": 0.2032, "step": 25224 }, { "epoch": 0.04472778305269868, "grad_norm": 0.474609375, "learning_rate": 0.001726376010637457, "loss": 0.1911, "step": 25226 }, { "epoch": 0.04473132921800849, "grad_norm": 0.447265625, "learning_rate": 0.0017263332708538125, "loss": 0.2561, "step": 25228 }, { "epoch": 0.04473487538331831, "grad_norm": 0.8046875, "learning_rate": 0.0017262905283309496, "loss": 0.2067, "step": 25230 }, { "epoch": 0.04473842154862812, "grad_norm": 0.376953125, "learning_rate": 0.001726247783069056, "loss": 0.2352, "step": 25232 }, { "epoch": 0.04474196771393794, "grad_norm": 1.5703125, "learning_rate": 0.0017262050350683182, "loss": 0.2136, "step": 25234 }, { "epoch": 0.04474551387924775, "grad_norm": 0.625, "learning_rate": 0.0017261622843289235, "loss": 0.1698, "step": 25236 }, { "epoch": 0.044749060044557566, "grad_norm": 0.51953125, "learning_rate": 0.0017261195308510586, "loss": 0.1719, "step": 25238 }, { "epoch": 0.04475260620986738, "grad_norm": 0.6328125, "learning_rate": 0.0017260767746349107, "loss": 0.2408, "step": 25240 }, { "epoch": 0.044756152375177195, "grad_norm": 0.361328125, "learning_rate": 0.0017260340156806666, "loss": 0.3159, "step": 25242 }, { "epoch": 0.04475969854048701, "grad_norm": 0.5703125, "learning_rate": 0.0017259912539885135, "loss": 0.2247, "step": 25244 }, { "epoch": 0.044763244705796824, "grad_norm": 0.26953125, "learning_rate": 0.001725948489558638, "loss": 0.1638, "step": 25246 }, { "epoch": 0.044766790871106646, "grad_norm": 0.5859375, "learning_rate": 0.0017259057223912274, "loss": 0.1935, "step": 25248 }, { "epoch": 0.04477033703641646, "grad_norm": 0.431640625, "learning_rate": 0.001725862952486469, "loss": 0.1959, "step": 25250 }, { "epoch": 0.044773883201726275, "grad_norm": 0.390625, "learning_rate": 0.0017258201798445493, "loss": 0.2547, "step": 25252 }, { "epoch": 0.04477742936703609, "grad_norm": 0.74609375, "learning_rate": 0.0017257774044656556, "loss": 0.1704, "step": 25254 }, { "epoch": 0.044780975532345904, "grad_norm": 0.2314453125, "learning_rate": 0.0017257346263499752, "loss": 0.2106, "step": 25256 }, { "epoch": 0.04478452169765572, "grad_norm": 0.6796875, "learning_rate": 0.0017256918454976952, "loss": 0.2778, "step": 25258 }, { "epoch": 0.04478806786296553, "grad_norm": 0.83203125, "learning_rate": 0.001725649061909002, "loss": 0.2158, "step": 25260 }, { "epoch": 0.04479161402827535, "grad_norm": 0.2109375, "learning_rate": 0.0017256062755840837, "loss": 0.2246, "step": 25262 }, { "epoch": 0.04479516019358516, "grad_norm": 0.455078125, "learning_rate": 0.0017255634865231267, "loss": 0.1648, "step": 25264 }, { "epoch": 0.04479870635889498, "grad_norm": 2.15625, "learning_rate": 0.0017255206947263185, "loss": 0.3511, "step": 25266 }, { "epoch": 0.04480225252420479, "grad_norm": 1.2734375, "learning_rate": 0.0017254779001938461, "loss": 0.2226, "step": 25268 }, { "epoch": 0.04480579868951461, "grad_norm": 0.76953125, "learning_rate": 0.0017254351029258967, "loss": 0.1895, "step": 25270 }, { "epoch": 0.04480934485482443, "grad_norm": 1.9765625, "learning_rate": 0.0017253923029226573, "loss": 0.3192, "step": 25272 }, { "epoch": 0.04481289102013424, "grad_norm": 0.78125, "learning_rate": 0.0017253495001843156, "loss": 0.2938, "step": 25274 }, { "epoch": 0.04481643718544406, "grad_norm": 0.5390625, "learning_rate": 0.001725306694711058, "loss": 0.1547, "step": 25276 }, { "epoch": 0.04481998335075387, "grad_norm": 0.765625, "learning_rate": 0.0017252638865030724, "loss": 0.2048, "step": 25278 }, { "epoch": 0.044823529516063686, "grad_norm": 0.36328125, "learning_rate": 0.001725221075560546, "loss": 0.1902, "step": 25280 }, { "epoch": 0.0448270756813735, "grad_norm": 0.55859375, "learning_rate": 0.001725178261883665, "loss": 0.1282, "step": 25282 }, { "epoch": 0.044830621846683315, "grad_norm": 1.03125, "learning_rate": 0.0017251354454726182, "loss": 0.2068, "step": 25284 }, { "epoch": 0.04483416801199313, "grad_norm": 0.61328125, "learning_rate": 0.001725092626327592, "loss": 0.2783, "step": 25286 }, { "epoch": 0.044837714177302944, "grad_norm": 0.56640625, "learning_rate": 0.0017250498044487732, "loss": 0.3639, "step": 25288 }, { "epoch": 0.04484126034261276, "grad_norm": 0.75, "learning_rate": 0.00172500697983635, "loss": 0.211, "step": 25290 }, { "epoch": 0.04484480650792258, "grad_norm": 0.4765625, "learning_rate": 0.0017249641524905092, "loss": 0.1993, "step": 25292 }, { "epoch": 0.044848352673232394, "grad_norm": 0.77734375, "learning_rate": 0.0017249213224114384, "loss": 0.2579, "step": 25294 }, { "epoch": 0.04485189883854221, "grad_norm": 0.310546875, "learning_rate": 0.0017248784895993246, "loss": 0.1885, "step": 25296 }, { "epoch": 0.044855445003852024, "grad_norm": 0.20703125, "learning_rate": 0.0017248356540543555, "loss": 0.3486, "step": 25298 }, { "epoch": 0.04485899116916184, "grad_norm": 4.3125, "learning_rate": 0.001724792815776718, "loss": 0.1858, "step": 25300 }, { "epoch": 0.04486253733447165, "grad_norm": 0.365234375, "learning_rate": 0.0017247499747665995, "loss": 0.1691, "step": 25302 }, { "epoch": 0.04486608349978147, "grad_norm": 0.5078125, "learning_rate": 0.0017247071310241878, "loss": 0.2438, "step": 25304 }, { "epoch": 0.04486962966509128, "grad_norm": 0.57421875, "learning_rate": 0.0017246642845496697, "loss": 0.2318, "step": 25306 }, { "epoch": 0.044873175830401096, "grad_norm": 0.392578125, "learning_rate": 0.0017246214353432332, "loss": 0.2326, "step": 25308 }, { "epoch": 0.04487672199571091, "grad_norm": 0.37890625, "learning_rate": 0.0017245785834050652, "loss": 0.3474, "step": 25310 }, { "epoch": 0.044880268161020725, "grad_norm": 0.53125, "learning_rate": 0.0017245357287353532, "loss": 0.2132, "step": 25312 }, { "epoch": 0.04488381432633054, "grad_norm": 0.31640625, "learning_rate": 0.0017244928713342849, "loss": 0.3875, "step": 25314 }, { "epoch": 0.04488736049164036, "grad_norm": 0.416015625, "learning_rate": 0.0017244500112020473, "loss": 0.2385, "step": 25316 }, { "epoch": 0.044890906656950176, "grad_norm": 1.7421875, "learning_rate": 0.001724407148338828, "loss": 0.1813, "step": 25318 }, { "epoch": 0.04489445282225999, "grad_norm": 0.3515625, "learning_rate": 0.0017243642827448147, "loss": 0.1964, "step": 25320 }, { "epoch": 0.044897998987569805, "grad_norm": 0.375, "learning_rate": 0.0017243214144201946, "loss": 0.1915, "step": 25322 }, { "epoch": 0.04490154515287962, "grad_norm": 0.404296875, "learning_rate": 0.0017242785433651555, "loss": 0.1782, "step": 25324 }, { "epoch": 0.044905091318189434, "grad_norm": 0.259765625, "learning_rate": 0.0017242356695798843, "loss": 0.1672, "step": 25326 }, { "epoch": 0.04490863748349925, "grad_norm": 0.5078125, "learning_rate": 0.0017241927930645694, "loss": 0.1473, "step": 25328 }, { "epoch": 0.04491218364880906, "grad_norm": 0.365234375, "learning_rate": 0.0017241499138193974, "loss": 0.1786, "step": 25330 }, { "epoch": 0.04491572981411888, "grad_norm": 1.203125, "learning_rate": 0.0017241070318445566, "loss": 0.1742, "step": 25332 }, { "epoch": 0.04491927597942869, "grad_norm": 0.9921875, "learning_rate": 0.0017240641471402336, "loss": 0.2284, "step": 25334 }, { "epoch": 0.04492282214473851, "grad_norm": 0.36328125, "learning_rate": 0.001724021259706617, "loss": 0.1995, "step": 25336 }, { "epoch": 0.04492636831004833, "grad_norm": 1.0546875, "learning_rate": 0.0017239783695438935, "loss": 0.2411, "step": 25338 }, { "epoch": 0.04492991447535814, "grad_norm": 0.384765625, "learning_rate": 0.0017239354766522515, "loss": 0.1829, "step": 25340 }, { "epoch": 0.04493346064066796, "grad_norm": 0.34765625, "learning_rate": 0.0017238925810318779, "loss": 0.1963, "step": 25342 }, { "epoch": 0.04493700680597777, "grad_norm": 1.5859375, "learning_rate": 0.0017238496826829605, "loss": 0.3664, "step": 25344 }, { "epoch": 0.04494055297128759, "grad_norm": 2.015625, "learning_rate": 0.0017238067816056871, "loss": 0.3184, "step": 25346 }, { "epoch": 0.0449440991365974, "grad_norm": 0.326171875, "learning_rate": 0.0017237638778002452, "loss": 0.2, "step": 25348 }, { "epoch": 0.044947645301907216, "grad_norm": 0.8046875, "learning_rate": 0.0017237209712668224, "loss": 0.3216, "step": 25350 }, { "epoch": 0.04495119146721703, "grad_norm": 0.2412109375, "learning_rate": 0.0017236780620056064, "loss": 0.2209, "step": 25352 }, { "epoch": 0.044954737632526845, "grad_norm": 0.490234375, "learning_rate": 0.0017236351500167846, "loss": 0.1877, "step": 25354 }, { "epoch": 0.04495828379783666, "grad_norm": 1.6484375, "learning_rate": 0.0017235922353005452, "loss": 0.3096, "step": 25356 }, { "epoch": 0.044961829963146474, "grad_norm": 0.7265625, "learning_rate": 0.0017235493178570753, "loss": 0.2528, "step": 25358 }, { "epoch": 0.044965376128456296, "grad_norm": 0.25, "learning_rate": 0.0017235063976865628, "loss": 0.2663, "step": 25360 }, { "epoch": 0.04496892229376611, "grad_norm": 0.5546875, "learning_rate": 0.0017234634747891962, "loss": 0.3093, "step": 25362 }, { "epoch": 0.044972468459075925, "grad_norm": 0.43359375, "learning_rate": 0.0017234205491651615, "loss": 0.1628, "step": 25364 }, { "epoch": 0.04497601462438574, "grad_norm": 0.31640625, "learning_rate": 0.0017233776208146483, "loss": 0.1997, "step": 25366 }, { "epoch": 0.044979560789695554, "grad_norm": 0.28515625, "learning_rate": 0.001723334689737843, "loss": 0.1321, "step": 25368 }, { "epoch": 0.04498310695500537, "grad_norm": 0.609375, "learning_rate": 0.001723291755934934, "loss": 0.2833, "step": 25370 }, { "epoch": 0.04498665312031518, "grad_norm": 1.8203125, "learning_rate": 0.0017232488194061089, "loss": 0.2644, "step": 25372 }, { "epoch": 0.044990199285625, "grad_norm": 0.3125, "learning_rate": 0.0017232058801515558, "loss": 0.2767, "step": 25374 }, { "epoch": 0.04499374545093481, "grad_norm": 0.466796875, "learning_rate": 0.0017231629381714618, "loss": 0.1822, "step": 25376 }, { "epoch": 0.04499729161624463, "grad_norm": 0.275390625, "learning_rate": 0.0017231199934660151, "loss": 0.2472, "step": 25378 }, { "epoch": 0.04500083778155444, "grad_norm": 0.455078125, "learning_rate": 0.0017230770460354035, "loss": 0.2009, "step": 25380 }, { "epoch": 0.045004383946864256, "grad_norm": 0.765625, "learning_rate": 0.001723034095879815, "loss": 0.2155, "step": 25382 }, { "epoch": 0.04500793011217408, "grad_norm": 0.80078125, "learning_rate": 0.001722991142999437, "loss": 0.2195, "step": 25384 }, { "epoch": 0.04501147627748389, "grad_norm": 0.8515625, "learning_rate": 0.001722948187394458, "loss": 0.1754, "step": 25386 }, { "epoch": 0.045015022442793706, "grad_norm": 0.51953125, "learning_rate": 0.0017229052290650651, "loss": 0.3058, "step": 25388 }, { "epoch": 0.04501856860810352, "grad_norm": 0.283203125, "learning_rate": 0.0017228622680114467, "loss": 0.2018, "step": 25390 }, { "epoch": 0.045022114773413335, "grad_norm": 0.515625, "learning_rate": 0.0017228193042337907, "loss": 0.2135, "step": 25392 }, { "epoch": 0.04502566093872315, "grad_norm": 0.6484375, "learning_rate": 0.0017227763377322847, "loss": 0.2468, "step": 25394 }, { "epoch": 0.045029207104032964, "grad_norm": 0.40625, "learning_rate": 0.0017227333685071167, "loss": 0.2182, "step": 25396 }, { "epoch": 0.04503275326934278, "grad_norm": 1.1640625, "learning_rate": 0.0017226903965584749, "loss": 0.2119, "step": 25398 }, { "epoch": 0.045036299434652594, "grad_norm": 0.228515625, "learning_rate": 0.0017226474218865466, "loss": 0.2165, "step": 25400 }, { "epoch": 0.04503984559996241, "grad_norm": 0.66015625, "learning_rate": 0.0017226044444915206, "loss": 0.2019, "step": 25402 }, { "epoch": 0.04504339176527222, "grad_norm": 0.546875, "learning_rate": 0.0017225614643735843, "loss": 0.1597, "step": 25404 }, { "epoch": 0.045046937930582044, "grad_norm": 1.109375, "learning_rate": 0.0017225184815329257, "loss": 0.1677, "step": 25406 }, { "epoch": 0.04505048409589186, "grad_norm": 0.5546875, "learning_rate": 0.0017224754959697327, "loss": 0.279, "step": 25408 }, { "epoch": 0.04505403026120167, "grad_norm": 2.015625, "learning_rate": 0.0017224325076841936, "loss": 0.4587, "step": 25410 }, { "epoch": 0.04505757642651149, "grad_norm": 1.1328125, "learning_rate": 0.0017223895166764962, "loss": 0.3143, "step": 25412 }, { "epoch": 0.0450611225918213, "grad_norm": 0.68359375, "learning_rate": 0.0017223465229468287, "loss": 0.173, "step": 25414 }, { "epoch": 0.04506466875713112, "grad_norm": 0.640625, "learning_rate": 0.0017223035264953789, "loss": 0.2514, "step": 25416 }, { "epoch": 0.04506821492244093, "grad_norm": 0.484375, "learning_rate": 0.001722260527322335, "loss": 0.1549, "step": 25418 }, { "epoch": 0.045071761087750746, "grad_norm": 0.2265625, "learning_rate": 0.001722217525427885, "loss": 0.1817, "step": 25420 }, { "epoch": 0.04507530725306056, "grad_norm": 0.84765625, "learning_rate": 0.001722174520812217, "loss": 0.2271, "step": 25422 }, { "epoch": 0.045078853418370375, "grad_norm": 0.8671875, "learning_rate": 0.001722131513475519, "loss": 0.2019, "step": 25424 }, { "epoch": 0.04508239958368019, "grad_norm": 1.609375, "learning_rate": 0.0017220885034179793, "loss": 0.2505, "step": 25426 }, { "epoch": 0.04508594574899001, "grad_norm": 0.375, "learning_rate": 0.0017220454906397855, "loss": 0.2105, "step": 25428 }, { "epoch": 0.045089491914299826, "grad_norm": 0.375, "learning_rate": 0.0017220024751411264, "loss": 0.1876, "step": 25430 }, { "epoch": 0.04509303807960964, "grad_norm": 0.515625, "learning_rate": 0.0017219594569221894, "loss": 0.1667, "step": 25432 }, { "epoch": 0.045096584244919455, "grad_norm": 0.79296875, "learning_rate": 0.0017219164359831636, "loss": 0.1579, "step": 25434 }, { "epoch": 0.04510013041022927, "grad_norm": 0.671875, "learning_rate": 0.001721873412324236, "loss": 0.2341, "step": 25436 }, { "epoch": 0.045103676575539084, "grad_norm": 0.32421875, "learning_rate": 0.0017218303859455957, "loss": 0.2081, "step": 25438 }, { "epoch": 0.0451072227408489, "grad_norm": 0.6875, "learning_rate": 0.0017217873568474303, "loss": 0.2764, "step": 25440 }, { "epoch": 0.04511076890615871, "grad_norm": 0.25390625, "learning_rate": 0.0017217443250299282, "loss": 0.1304, "step": 25442 }, { "epoch": 0.04511431507146853, "grad_norm": 0.263671875, "learning_rate": 0.0017217012904932774, "loss": 0.158, "step": 25444 }, { "epoch": 0.04511786123677834, "grad_norm": 0.546875, "learning_rate": 0.0017216582532376666, "loss": 0.1919, "step": 25446 }, { "epoch": 0.04512140740208816, "grad_norm": 0.67578125, "learning_rate": 0.0017216152132632835, "loss": 0.2029, "step": 25448 }, { "epoch": 0.04512495356739797, "grad_norm": 0.5859375, "learning_rate": 0.0017215721705703165, "loss": 0.1775, "step": 25450 }, { "epoch": 0.04512849973270779, "grad_norm": 1.75, "learning_rate": 0.001721529125158954, "loss": 0.2798, "step": 25452 }, { "epoch": 0.04513204589801761, "grad_norm": 0.3515625, "learning_rate": 0.0017214860770293841, "loss": 0.5013, "step": 25454 }, { "epoch": 0.04513559206332742, "grad_norm": 0.75390625, "learning_rate": 0.0017214430261817952, "loss": 0.1969, "step": 25456 }, { "epoch": 0.045139138228637236, "grad_norm": 0.9765625, "learning_rate": 0.0017213999726163752, "loss": 0.1888, "step": 25458 }, { "epoch": 0.04514268439394705, "grad_norm": 0.353515625, "learning_rate": 0.001721356916333313, "loss": 0.1652, "step": 25460 }, { "epoch": 0.045146230559256866, "grad_norm": 0.458984375, "learning_rate": 0.0017213138573327965, "loss": 0.1664, "step": 25462 }, { "epoch": 0.04514977672456668, "grad_norm": 0.4140625, "learning_rate": 0.001721270795615014, "loss": 0.2046, "step": 25464 }, { "epoch": 0.045153322889876495, "grad_norm": 0.2890625, "learning_rate": 0.0017212277311801538, "loss": 0.1738, "step": 25466 }, { "epoch": 0.04515686905518631, "grad_norm": 0.265625, "learning_rate": 0.0017211846640284045, "loss": 0.2052, "step": 25468 }, { "epoch": 0.045160415220496124, "grad_norm": 0.4453125, "learning_rate": 0.0017211415941599543, "loss": 0.2385, "step": 25470 }, { "epoch": 0.04516396138580594, "grad_norm": 0.380859375, "learning_rate": 0.0017210985215749919, "loss": 0.1935, "step": 25472 }, { "epoch": 0.04516750755111576, "grad_norm": 0.302734375, "learning_rate": 0.0017210554462737046, "loss": 0.2098, "step": 25474 }, { "epoch": 0.045171053716425574, "grad_norm": 1.734375, "learning_rate": 0.001721012368256282, "loss": 0.3009, "step": 25476 }, { "epoch": 0.04517459988173539, "grad_norm": 0.408203125, "learning_rate": 0.001720969287522912, "loss": 0.1868, "step": 25478 }, { "epoch": 0.0451781460470452, "grad_norm": 0.34765625, "learning_rate": 0.0017209262040737833, "loss": 0.2503, "step": 25480 }, { "epoch": 0.04518169221235502, "grad_norm": 0.80859375, "learning_rate": 0.0017208831179090838, "loss": 0.2286, "step": 25482 }, { "epoch": 0.04518523837766483, "grad_norm": 0.283203125, "learning_rate": 0.001720840029029002, "loss": 0.2539, "step": 25484 }, { "epoch": 0.04518878454297465, "grad_norm": 0.7578125, "learning_rate": 0.0017207969374337265, "loss": 0.1986, "step": 25486 }, { "epoch": 0.04519233070828446, "grad_norm": 0.8046875, "learning_rate": 0.0017207538431234463, "loss": 0.3149, "step": 25488 }, { "epoch": 0.045195876873594276, "grad_norm": 1.21875, "learning_rate": 0.001720710746098349, "loss": 0.2551, "step": 25490 }, { "epoch": 0.04519942303890409, "grad_norm": 0.486328125, "learning_rate": 0.0017206676463586235, "loss": 0.1997, "step": 25492 }, { "epoch": 0.045202969204213905, "grad_norm": 1.515625, "learning_rate": 0.0017206245439044582, "loss": 0.284, "step": 25494 }, { "epoch": 0.04520651536952373, "grad_norm": 0.267578125, "learning_rate": 0.0017205814387360417, "loss": 0.215, "step": 25496 }, { "epoch": 0.04521006153483354, "grad_norm": 2.25, "learning_rate": 0.0017205383308535624, "loss": 0.2639, "step": 25498 }, { "epoch": 0.045213607700143356, "grad_norm": 0.31640625, "learning_rate": 0.001720495220257209, "loss": 0.2499, "step": 25500 }, { "epoch": 0.04521715386545317, "grad_norm": 2.1875, "learning_rate": 0.00172045210694717, "loss": 0.2524, "step": 25502 }, { "epoch": 0.045220700030762985, "grad_norm": 0.63671875, "learning_rate": 0.0017204089909236338, "loss": 0.1922, "step": 25504 }, { "epoch": 0.0452242461960728, "grad_norm": 0.3671875, "learning_rate": 0.001720365872186789, "loss": 0.2061, "step": 25506 }, { "epoch": 0.045227792361382614, "grad_norm": 0.34375, "learning_rate": 0.0017203227507368243, "loss": 0.2378, "step": 25508 }, { "epoch": 0.04523133852669243, "grad_norm": 0.455078125, "learning_rate": 0.001720279626573928, "loss": 0.1828, "step": 25510 }, { "epoch": 0.04523488469200224, "grad_norm": 0.64453125, "learning_rate": 0.0017202364996982892, "loss": 0.2122, "step": 25512 }, { "epoch": 0.04523843085731206, "grad_norm": 0.494140625, "learning_rate": 0.001720193370110096, "loss": 0.2354, "step": 25514 }, { "epoch": 0.04524197702262187, "grad_norm": 0.3359375, "learning_rate": 0.0017201502378095374, "loss": 0.1997, "step": 25516 }, { "epoch": 0.04524552318793169, "grad_norm": 0.1787109375, "learning_rate": 0.0017201071027968021, "loss": 0.1815, "step": 25518 }, { "epoch": 0.04524906935324151, "grad_norm": 0.2890625, "learning_rate": 0.0017200639650720782, "loss": 0.2294, "step": 25520 }, { "epoch": 0.04525261551855132, "grad_norm": 0.44140625, "learning_rate": 0.0017200208246355546, "loss": 0.2029, "step": 25522 }, { "epoch": 0.04525616168386114, "grad_norm": 0.25390625, "learning_rate": 0.0017199776814874205, "loss": 0.1412, "step": 25524 }, { "epoch": 0.04525970784917095, "grad_norm": 0.609375, "learning_rate": 0.0017199345356278637, "loss": 0.1894, "step": 25526 }, { "epoch": 0.04526325401448077, "grad_norm": 3.34375, "learning_rate": 0.0017198913870570735, "loss": 0.2968, "step": 25528 }, { "epoch": 0.04526680017979058, "grad_norm": 0.365234375, "learning_rate": 0.0017198482357752385, "loss": 0.4749, "step": 25530 }, { "epoch": 0.045270346345100396, "grad_norm": 0.38671875, "learning_rate": 0.0017198050817825473, "loss": 0.1996, "step": 25532 }, { "epoch": 0.04527389251041021, "grad_norm": 1.234375, "learning_rate": 0.0017197619250791888, "loss": 0.1999, "step": 25534 }, { "epoch": 0.045277438675720025, "grad_norm": 0.6875, "learning_rate": 0.0017197187656653515, "loss": 0.1493, "step": 25536 }, { "epoch": 0.04528098484102984, "grad_norm": 0.40234375, "learning_rate": 0.0017196756035412243, "loss": 0.1705, "step": 25538 }, { "epoch": 0.045284531006339654, "grad_norm": 0.6796875, "learning_rate": 0.001719632438706996, "loss": 0.2438, "step": 25540 }, { "epoch": 0.045288077171649475, "grad_norm": 0.328125, "learning_rate": 0.0017195892711628557, "loss": 0.1797, "step": 25542 }, { "epoch": 0.04529162333695929, "grad_norm": 0.4140625, "learning_rate": 0.0017195461009089912, "loss": 0.2461, "step": 25544 }, { "epoch": 0.045295169502269104, "grad_norm": 0.4609375, "learning_rate": 0.0017195029279455925, "loss": 0.2392, "step": 25546 }, { "epoch": 0.04529871566757892, "grad_norm": 0.228515625, "learning_rate": 0.0017194597522728475, "loss": 0.1433, "step": 25548 }, { "epoch": 0.045302261832888734, "grad_norm": 0.353515625, "learning_rate": 0.0017194165738909455, "loss": 0.1984, "step": 25550 }, { "epoch": 0.04530580799819855, "grad_norm": 0.283203125, "learning_rate": 0.001719373392800075, "loss": 0.2304, "step": 25552 }, { "epoch": 0.04530935416350836, "grad_norm": 0.2578125, "learning_rate": 0.0017193302090004253, "loss": 0.1826, "step": 25554 }, { "epoch": 0.04531290032881818, "grad_norm": 0.31640625, "learning_rate": 0.0017192870224921849, "loss": 0.2292, "step": 25556 }, { "epoch": 0.04531644649412799, "grad_norm": 0.390625, "learning_rate": 0.0017192438332755428, "loss": 0.3133, "step": 25558 }, { "epoch": 0.045319992659437806, "grad_norm": 0.33203125, "learning_rate": 0.001719200641350688, "loss": 0.2361, "step": 25560 }, { "epoch": 0.04532353882474762, "grad_norm": 0.6953125, "learning_rate": 0.001719157446717809, "loss": 0.3206, "step": 25562 }, { "epoch": 0.04532708499005744, "grad_norm": 1.0703125, "learning_rate": 0.001719114249377095, "loss": 0.1806, "step": 25564 }, { "epoch": 0.04533063115536726, "grad_norm": 0.38671875, "learning_rate": 0.001719071049328735, "loss": 0.1894, "step": 25566 }, { "epoch": 0.04533417732067707, "grad_norm": 0.51953125, "learning_rate": 0.001719027846572918, "loss": 0.307, "step": 25568 }, { "epoch": 0.045337723485986886, "grad_norm": 0.7734375, "learning_rate": 0.001718984641109833, "loss": 0.1914, "step": 25570 }, { "epoch": 0.0453412696512967, "grad_norm": 0.609375, "learning_rate": 0.001718941432939668, "loss": 0.4102, "step": 25572 }, { "epoch": 0.045344815816606515, "grad_norm": 0.2109375, "learning_rate": 0.001718898222062613, "loss": 0.1255, "step": 25574 }, { "epoch": 0.04534836198191633, "grad_norm": 0.3203125, "learning_rate": 0.0017188550084788568, "loss": 0.1721, "step": 25576 }, { "epoch": 0.045351908147226144, "grad_norm": 0.84375, "learning_rate": 0.0017188117921885884, "loss": 0.2043, "step": 25578 }, { "epoch": 0.04535545431253596, "grad_norm": 0.416015625, "learning_rate": 0.0017187685731919963, "loss": 0.1694, "step": 25580 }, { "epoch": 0.04535900047784577, "grad_norm": 0.2041015625, "learning_rate": 0.0017187253514892701, "loss": 0.238, "step": 25582 }, { "epoch": 0.04536254664315559, "grad_norm": 0.318359375, "learning_rate": 0.0017186821270805985, "loss": 0.1665, "step": 25584 }, { "epoch": 0.0453660928084654, "grad_norm": 0.259765625, "learning_rate": 0.0017186388999661708, "loss": 0.1894, "step": 25586 }, { "epoch": 0.045369638973775224, "grad_norm": 0.30859375, "learning_rate": 0.0017185956701461758, "loss": 0.2025, "step": 25588 }, { "epoch": 0.04537318513908504, "grad_norm": 1.390625, "learning_rate": 0.0017185524376208028, "loss": 0.2124, "step": 25590 }, { "epoch": 0.04537673130439485, "grad_norm": 0.765625, "learning_rate": 0.0017185092023902404, "loss": 0.2002, "step": 25592 }, { "epoch": 0.04538027746970467, "grad_norm": 0.71875, "learning_rate": 0.0017184659644546782, "loss": 0.1997, "step": 25594 }, { "epoch": 0.04538382363501448, "grad_norm": 0.46875, "learning_rate": 0.001718422723814305, "loss": 0.2049, "step": 25596 }, { "epoch": 0.0453873698003243, "grad_norm": 0.29296875, "learning_rate": 0.0017183794804693105, "loss": 0.1884, "step": 25598 }, { "epoch": 0.04539091596563411, "grad_norm": 0.3515625, "learning_rate": 0.0017183362344198828, "loss": 0.2467, "step": 25600 }, { "epoch": 0.045394462130943926, "grad_norm": 0.6484375, "learning_rate": 0.0017182929856662122, "loss": 0.2209, "step": 25602 }, { "epoch": 0.04539800829625374, "grad_norm": 0.291015625, "learning_rate": 0.0017182497342084866, "loss": 0.1821, "step": 25604 }, { "epoch": 0.045401554461563555, "grad_norm": 0.65625, "learning_rate": 0.001718206480046896, "loss": 0.2772, "step": 25606 }, { "epoch": 0.04540510062687337, "grad_norm": 0.390625, "learning_rate": 0.0017181632231816294, "loss": 0.2159, "step": 25608 }, { "epoch": 0.04540864679218319, "grad_norm": 0.52734375, "learning_rate": 0.0017181199636128758, "loss": 0.2839, "step": 25610 }, { "epoch": 0.045412192957493006, "grad_norm": 0.412109375, "learning_rate": 0.0017180767013408246, "loss": 0.1547, "step": 25612 }, { "epoch": 0.04541573912280282, "grad_norm": 0.55859375, "learning_rate": 0.001718033436365665, "loss": 0.1818, "step": 25614 }, { "epoch": 0.045419285288112635, "grad_norm": 1.5078125, "learning_rate": 0.001717990168687586, "loss": 0.3927, "step": 25616 }, { "epoch": 0.04542283145342245, "grad_norm": 0.234375, "learning_rate": 0.0017179468983067774, "loss": 0.1788, "step": 25618 }, { "epoch": 0.045426377618732264, "grad_norm": 0.26953125, "learning_rate": 0.0017179036252234275, "loss": 0.1303, "step": 25620 }, { "epoch": 0.04542992378404208, "grad_norm": 0.5625, "learning_rate": 0.0017178603494377262, "loss": 0.2304, "step": 25622 }, { "epoch": 0.04543346994935189, "grad_norm": 0.5078125, "learning_rate": 0.001717817070949863, "loss": 0.2297, "step": 25624 }, { "epoch": 0.04543701611466171, "grad_norm": 0.412109375, "learning_rate": 0.0017177737897600264, "loss": 0.2096, "step": 25626 }, { "epoch": 0.04544056227997152, "grad_norm": 0.77734375, "learning_rate": 0.001717730505868406, "loss": 0.1753, "step": 25628 }, { "epoch": 0.045444108445281337, "grad_norm": 0.5234375, "learning_rate": 0.0017176872192751916, "loss": 0.3097, "step": 25630 }, { "epoch": 0.04544765461059116, "grad_norm": 0.66796875, "learning_rate": 0.0017176439299805718, "loss": 0.1519, "step": 25632 }, { "epoch": 0.04545120077590097, "grad_norm": 0.50390625, "learning_rate": 0.0017176006379847363, "loss": 0.2172, "step": 25634 }, { "epoch": 0.04545474694121079, "grad_norm": 0.375, "learning_rate": 0.0017175573432878745, "loss": 0.1886, "step": 25636 }, { "epoch": 0.0454582931065206, "grad_norm": 0.5078125, "learning_rate": 0.0017175140458901752, "loss": 0.1767, "step": 25638 }, { "epoch": 0.045461839271830416, "grad_norm": 0.279296875, "learning_rate": 0.0017174707457918287, "loss": 0.1944, "step": 25640 }, { "epoch": 0.04546538543714023, "grad_norm": 1.0, "learning_rate": 0.0017174274429930232, "loss": 0.1938, "step": 25642 }, { "epoch": 0.045468931602450045, "grad_norm": 0.55078125, "learning_rate": 0.0017173841374939494, "loss": 0.1942, "step": 25644 }, { "epoch": 0.04547247776775986, "grad_norm": 0.61328125, "learning_rate": 0.0017173408292947953, "loss": 0.2176, "step": 25646 }, { "epoch": 0.045476023933069674, "grad_norm": 0.435546875, "learning_rate": 0.0017172975183957515, "loss": 0.2022, "step": 25648 }, { "epoch": 0.04547957009837949, "grad_norm": 0.494140625, "learning_rate": 0.0017172542047970067, "loss": 0.1694, "step": 25650 }, { "epoch": 0.045483116263689304, "grad_norm": 7.84375, "learning_rate": 0.0017172108884987507, "loss": 0.1949, "step": 25652 }, { "epoch": 0.04548666242899912, "grad_norm": 1.109375, "learning_rate": 0.0017171675695011727, "loss": 0.2864, "step": 25654 }, { "epoch": 0.04549020859430894, "grad_norm": 1.125, "learning_rate": 0.0017171242478044625, "loss": 0.2368, "step": 25656 }, { "epoch": 0.045493754759618754, "grad_norm": 0.58984375, "learning_rate": 0.001717080923408809, "loss": 0.2447, "step": 25658 }, { "epoch": 0.04549730092492857, "grad_norm": 1.1328125, "learning_rate": 0.0017170375963144018, "loss": 0.2481, "step": 25660 }, { "epoch": 0.04550084709023838, "grad_norm": 0.349609375, "learning_rate": 0.0017169942665214311, "loss": 0.1743, "step": 25662 }, { "epoch": 0.0455043932555482, "grad_norm": 1.0859375, "learning_rate": 0.0017169509340300856, "loss": 0.1963, "step": 25664 }, { "epoch": 0.04550793942085801, "grad_norm": 0.416015625, "learning_rate": 0.001716907598840555, "loss": 0.1734, "step": 25666 }, { "epoch": 0.04551148558616783, "grad_norm": 2.3125, "learning_rate": 0.001716864260953029, "loss": 0.1648, "step": 25668 }, { "epoch": 0.04551503175147764, "grad_norm": 1.296875, "learning_rate": 0.001716820920367697, "loss": 0.2811, "step": 25670 }, { "epoch": 0.045518577916787456, "grad_norm": 1.046875, "learning_rate": 0.0017167775770847488, "loss": 0.2303, "step": 25672 }, { "epoch": 0.04552212408209727, "grad_norm": 0.41015625, "learning_rate": 0.0017167342311043732, "loss": 0.1969, "step": 25674 }, { "epoch": 0.045525670247407085, "grad_norm": 1.2578125, "learning_rate": 0.0017166908824267609, "loss": 0.2926, "step": 25676 }, { "epoch": 0.04552921641271691, "grad_norm": 0.75390625, "learning_rate": 0.0017166475310521005, "loss": 0.1331, "step": 25678 }, { "epoch": 0.04553276257802672, "grad_norm": 0.466796875, "learning_rate": 0.0017166041769805821, "loss": 0.2177, "step": 25680 }, { "epoch": 0.045536308743336536, "grad_norm": 0.8828125, "learning_rate": 0.0017165608202123952, "loss": 0.2342, "step": 25682 }, { "epoch": 0.04553985490864635, "grad_norm": 0.27734375, "learning_rate": 0.0017165174607477298, "loss": 0.2007, "step": 25684 }, { "epoch": 0.045543401073956165, "grad_norm": 2.078125, "learning_rate": 0.0017164740985867745, "loss": 0.3573, "step": 25686 }, { "epoch": 0.04554694723926598, "grad_norm": 0.6796875, "learning_rate": 0.0017164307337297201, "loss": 0.1687, "step": 25688 }, { "epoch": 0.045550493404575794, "grad_norm": 1.7890625, "learning_rate": 0.0017163873661767556, "loss": 0.2206, "step": 25690 }, { "epoch": 0.04555403956988561, "grad_norm": 0.91796875, "learning_rate": 0.0017163439959280706, "loss": 0.1627, "step": 25692 }, { "epoch": 0.04555758573519542, "grad_norm": 0.5078125, "learning_rate": 0.001716300622983855, "loss": 0.2677, "step": 25694 }, { "epoch": 0.04556113190050524, "grad_norm": 1.0625, "learning_rate": 0.0017162572473442988, "loss": 0.2084, "step": 25696 }, { "epoch": 0.04556467806581505, "grad_norm": 0.5703125, "learning_rate": 0.001716213869009591, "loss": 0.2047, "step": 25698 }, { "epoch": 0.045568224231124874, "grad_norm": 0.875, "learning_rate": 0.0017161704879799218, "loss": 0.1909, "step": 25700 }, { "epoch": 0.04557177039643469, "grad_norm": 0.80078125, "learning_rate": 0.001716127104255481, "loss": 0.2332, "step": 25702 }, { "epoch": 0.0455753165617445, "grad_norm": 0.498046875, "learning_rate": 0.0017160837178364576, "loss": 0.1941, "step": 25704 }, { "epoch": 0.04557886272705432, "grad_norm": 0.5546875, "learning_rate": 0.0017160403287230423, "loss": 0.187, "step": 25706 }, { "epoch": 0.04558240889236413, "grad_norm": 0.515625, "learning_rate": 0.0017159969369154244, "loss": 0.1707, "step": 25708 }, { "epoch": 0.045585955057673946, "grad_norm": 0.5390625, "learning_rate": 0.0017159535424137937, "loss": 0.4781, "step": 25710 }, { "epoch": 0.04558950122298376, "grad_norm": 0.267578125, "learning_rate": 0.00171591014521834, "loss": 0.1972, "step": 25712 }, { "epoch": 0.045593047388293576, "grad_norm": 0.373046875, "learning_rate": 0.001715866745329253, "loss": 0.2256, "step": 25714 }, { "epoch": 0.04559659355360339, "grad_norm": 0.9765625, "learning_rate": 0.0017158233427467227, "loss": 0.2193, "step": 25716 }, { "epoch": 0.045600139718913205, "grad_norm": 0.421875, "learning_rate": 0.0017157799374709392, "loss": 0.1821, "step": 25718 }, { "epoch": 0.04560368588422302, "grad_norm": 0.419921875, "learning_rate": 0.0017157365295020913, "loss": 0.2137, "step": 25720 }, { "epoch": 0.045607232049532834, "grad_norm": 0.37890625, "learning_rate": 0.0017156931188403695, "loss": 0.1978, "step": 25722 }, { "epoch": 0.045610778214842655, "grad_norm": 0.34375, "learning_rate": 0.0017156497054859636, "loss": 0.1728, "step": 25724 }, { "epoch": 0.04561432438015247, "grad_norm": 0.984375, "learning_rate": 0.001715606289439064, "loss": 0.3566, "step": 25726 }, { "epoch": 0.045617870545462284, "grad_norm": 0.90234375, "learning_rate": 0.0017155628706998598, "loss": 0.1888, "step": 25728 }, { "epoch": 0.0456214167107721, "grad_norm": 0.5, "learning_rate": 0.0017155194492685414, "loss": 0.1704, "step": 25730 }, { "epoch": 0.04562496287608191, "grad_norm": 0.6484375, "learning_rate": 0.001715476025145298, "loss": 0.2041, "step": 25732 }, { "epoch": 0.04562850904139173, "grad_norm": 0.5, "learning_rate": 0.0017154325983303202, "loss": 0.1611, "step": 25734 }, { "epoch": 0.04563205520670154, "grad_norm": 2.390625, "learning_rate": 0.0017153891688237977, "loss": 0.2167, "step": 25736 }, { "epoch": 0.04563560137201136, "grad_norm": 10.6875, "learning_rate": 0.0017153457366259207, "loss": 0.3996, "step": 25738 }, { "epoch": 0.04563914753732117, "grad_norm": 0.359375, "learning_rate": 0.0017153023017368785, "loss": 0.2025, "step": 25740 }, { "epoch": 0.045642693702630986, "grad_norm": 1.7265625, "learning_rate": 0.0017152588641568616, "loss": 0.2032, "step": 25742 }, { "epoch": 0.0456462398679408, "grad_norm": 0.337890625, "learning_rate": 0.0017152154238860597, "loss": 0.2278, "step": 25744 }, { "epoch": 0.04564978603325062, "grad_norm": 1.171875, "learning_rate": 0.001715171980924663, "loss": 0.2413, "step": 25746 }, { "epoch": 0.04565333219856044, "grad_norm": 1.1640625, "learning_rate": 0.0017151285352728616, "loss": 0.3813, "step": 25748 }, { "epoch": 0.04565687836387025, "grad_norm": 0.2490234375, "learning_rate": 0.001715085086930845, "loss": 0.1673, "step": 25750 }, { "epoch": 0.045660424529180066, "grad_norm": 0.640625, "learning_rate": 0.0017150416358988033, "loss": 0.2437, "step": 25752 }, { "epoch": 0.04566397069448988, "grad_norm": 0.484375, "learning_rate": 0.0017149981821769273, "loss": 0.1634, "step": 25754 }, { "epoch": 0.045667516859799695, "grad_norm": 1.6484375, "learning_rate": 0.0017149547257654062, "loss": 0.2959, "step": 25756 }, { "epoch": 0.04567106302510951, "grad_norm": 0.6015625, "learning_rate": 0.0017149112666644305, "loss": 0.169, "step": 25758 }, { "epoch": 0.045674609190419324, "grad_norm": 0.287109375, "learning_rate": 0.00171486780487419, "loss": 0.1664, "step": 25760 }, { "epoch": 0.04567815535572914, "grad_norm": 0.455078125, "learning_rate": 0.0017148243403948746, "loss": 0.4258, "step": 25762 }, { "epoch": 0.04568170152103895, "grad_norm": 0.35546875, "learning_rate": 0.001714780873226675, "loss": 0.2398, "step": 25764 }, { "epoch": 0.04568524768634877, "grad_norm": 0.7734375, "learning_rate": 0.001714737403369781, "loss": 0.2238, "step": 25766 }, { "epoch": 0.04568879385165859, "grad_norm": 0.62109375, "learning_rate": 0.0017146939308243824, "loss": 0.1732, "step": 25768 }, { "epoch": 0.045692340016968404, "grad_norm": 1.1640625, "learning_rate": 0.0017146504555906696, "loss": 0.1884, "step": 25770 }, { "epoch": 0.04569588618227822, "grad_norm": 0.255859375, "learning_rate": 0.0017146069776688329, "loss": 0.2043, "step": 25772 }, { "epoch": 0.04569943234758803, "grad_norm": 0.1943359375, "learning_rate": 0.0017145634970590624, "loss": 0.2506, "step": 25774 }, { "epoch": 0.04570297851289785, "grad_norm": 0.6796875, "learning_rate": 0.001714520013761548, "loss": 0.1863, "step": 25776 }, { "epoch": 0.04570652467820766, "grad_norm": 0.443359375, "learning_rate": 0.0017144765277764802, "loss": 0.2049, "step": 25778 }, { "epoch": 0.04571007084351748, "grad_norm": 1.296875, "learning_rate": 0.0017144330391040486, "loss": 0.1928, "step": 25780 }, { "epoch": 0.04571361700882729, "grad_norm": 0.51171875, "learning_rate": 0.001714389547744444, "loss": 0.2556, "step": 25782 }, { "epoch": 0.045717163174137106, "grad_norm": 1.0625, "learning_rate": 0.0017143460536978565, "loss": 0.1861, "step": 25784 }, { "epoch": 0.04572070933944692, "grad_norm": 1.1640625, "learning_rate": 0.0017143025569644759, "loss": 0.2131, "step": 25786 }, { "epoch": 0.045724255504756735, "grad_norm": 0.412109375, "learning_rate": 0.0017142590575444931, "loss": 0.1781, "step": 25788 }, { "epoch": 0.04572780167006655, "grad_norm": 0.4921875, "learning_rate": 0.001714215555438098, "loss": 0.2433, "step": 25790 }, { "epoch": 0.04573134783537637, "grad_norm": 1.0078125, "learning_rate": 0.0017141720506454806, "loss": 0.6866, "step": 25792 }, { "epoch": 0.045734894000686185, "grad_norm": 0.57421875, "learning_rate": 0.0017141285431668317, "loss": 0.1471, "step": 25794 }, { "epoch": 0.045738440165996, "grad_norm": 0.314453125, "learning_rate": 0.0017140850330023413, "loss": 0.2181, "step": 25796 }, { "epoch": 0.045741986331305814, "grad_norm": 0.36328125, "learning_rate": 0.001714041520152199, "loss": 0.2128, "step": 25798 }, { "epoch": 0.04574553249661563, "grad_norm": 0.2392578125, "learning_rate": 0.0017139980046165964, "loss": 0.1923, "step": 25800 }, { "epoch": 0.045749078661925444, "grad_norm": 0.39453125, "learning_rate": 0.0017139544863957231, "loss": 0.2169, "step": 25802 }, { "epoch": 0.04575262482723526, "grad_norm": 0.255859375, "learning_rate": 0.0017139109654897694, "loss": 0.1872, "step": 25804 }, { "epoch": 0.04575617099254507, "grad_norm": 0.435546875, "learning_rate": 0.0017138674418989256, "loss": 0.243, "step": 25806 }, { "epoch": 0.04575971715785489, "grad_norm": 4.53125, "learning_rate": 0.0017138239156233824, "loss": 0.5334, "step": 25808 }, { "epoch": 0.0457632633231647, "grad_norm": 5.46875, "learning_rate": 0.0017137803866633299, "loss": 0.3189, "step": 25810 }, { "epoch": 0.045766809488474516, "grad_norm": 0.236328125, "learning_rate": 0.0017137368550189584, "loss": 0.1711, "step": 25812 }, { "epoch": 0.04577035565378434, "grad_norm": 0.359375, "learning_rate": 0.0017136933206904586, "loss": 0.1747, "step": 25814 }, { "epoch": 0.04577390181909415, "grad_norm": 0.271484375, "learning_rate": 0.0017136497836780207, "loss": 0.1784, "step": 25816 }, { "epoch": 0.04577744798440397, "grad_norm": 3.140625, "learning_rate": 0.0017136062439818346, "loss": 0.3431, "step": 25818 }, { "epoch": 0.04578099414971378, "grad_norm": 0.81640625, "learning_rate": 0.001713562701602092, "loss": 0.1427, "step": 25820 }, { "epoch": 0.045784540315023596, "grad_norm": 1.046875, "learning_rate": 0.0017135191565389818, "loss": 0.4024, "step": 25822 }, { "epoch": 0.04578808648033341, "grad_norm": 0.69921875, "learning_rate": 0.0017134756087926956, "loss": 0.1606, "step": 25824 }, { "epoch": 0.045791632645643225, "grad_norm": 0.3515625, "learning_rate": 0.0017134320583634234, "loss": 0.1576, "step": 25826 }, { "epoch": 0.04579517881095304, "grad_norm": 2.03125, "learning_rate": 0.0017133885052513559, "loss": 0.2849, "step": 25828 }, { "epoch": 0.045798724976262854, "grad_norm": 1.1875, "learning_rate": 0.001713344949456683, "loss": 0.2268, "step": 25830 }, { "epoch": 0.04580227114157267, "grad_norm": 0.99609375, "learning_rate": 0.001713301390979596, "loss": 0.1704, "step": 25832 }, { "epoch": 0.04580581730688248, "grad_norm": 0.498046875, "learning_rate": 0.0017132578298202845, "loss": 0.1936, "step": 25834 }, { "epoch": 0.045809363472192305, "grad_norm": 0.578125, "learning_rate": 0.0017132142659789396, "loss": 0.2522, "step": 25836 }, { "epoch": 0.04581290963750212, "grad_norm": 0.40234375, "learning_rate": 0.0017131706994557518, "loss": 0.1717, "step": 25838 }, { "epoch": 0.045816455802811934, "grad_norm": 0.5390625, "learning_rate": 0.0017131271302509115, "loss": 0.2075, "step": 25840 }, { "epoch": 0.04582000196812175, "grad_norm": 0.98046875, "learning_rate": 0.001713083558364609, "loss": 0.2542, "step": 25842 }, { "epoch": 0.04582354813343156, "grad_norm": 0.5546875, "learning_rate": 0.0017130399837970356, "loss": 0.2296, "step": 25844 }, { "epoch": 0.04582709429874138, "grad_norm": 0.5859375, "learning_rate": 0.001712996406548381, "loss": 0.2175, "step": 25846 }, { "epoch": 0.04583064046405119, "grad_norm": 1.171875, "learning_rate": 0.0017129528266188365, "loss": 0.3051, "step": 25848 }, { "epoch": 0.04583418662936101, "grad_norm": 0.259765625, "learning_rate": 0.0017129092440085923, "loss": 0.2195, "step": 25850 }, { "epoch": 0.04583773279467082, "grad_norm": 0.69921875, "learning_rate": 0.0017128656587178388, "loss": 0.2553, "step": 25852 }, { "epoch": 0.045841278959980636, "grad_norm": 0.85546875, "learning_rate": 0.0017128220707467673, "loss": 0.1638, "step": 25854 }, { "epoch": 0.04584482512529045, "grad_norm": 0.51953125, "learning_rate": 0.001712778480095568, "loss": 0.2151, "step": 25856 }, { "epoch": 0.045848371290600265, "grad_norm": 2.4375, "learning_rate": 0.0017127348867644315, "loss": 0.2617, "step": 25858 }, { "epoch": 0.045851917455910086, "grad_norm": 0.41015625, "learning_rate": 0.0017126912907535483, "loss": 0.2414, "step": 25860 }, { "epoch": 0.0458554636212199, "grad_norm": 0.43359375, "learning_rate": 0.0017126476920631096, "loss": 0.182, "step": 25862 }, { "epoch": 0.045859009786529716, "grad_norm": 0.6953125, "learning_rate": 0.0017126040906933057, "loss": 0.3261, "step": 25864 }, { "epoch": 0.04586255595183953, "grad_norm": 0.515625, "learning_rate": 0.0017125604866443272, "loss": 0.2803, "step": 25866 }, { "epoch": 0.045866102117149345, "grad_norm": 0.44921875, "learning_rate": 0.001712516879916365, "loss": 0.2051, "step": 25868 }, { "epoch": 0.04586964828245916, "grad_norm": 0.443359375, "learning_rate": 0.0017124732705096097, "loss": 0.1855, "step": 25870 }, { "epoch": 0.045873194447768974, "grad_norm": 0.390625, "learning_rate": 0.001712429658424252, "loss": 0.1979, "step": 25872 }, { "epoch": 0.04587674061307879, "grad_norm": 0.609375, "learning_rate": 0.0017123860436604832, "loss": 0.2321, "step": 25874 }, { "epoch": 0.0458802867783886, "grad_norm": 0.1669921875, "learning_rate": 0.0017123424262184932, "loss": 0.1906, "step": 25876 }, { "epoch": 0.04588383294369842, "grad_norm": 0.5, "learning_rate": 0.0017122988060984732, "loss": 0.192, "step": 25878 }, { "epoch": 0.04588737910900823, "grad_norm": 0.3828125, "learning_rate": 0.0017122551833006136, "loss": 0.1685, "step": 25880 }, { "epoch": 0.04589092527431805, "grad_norm": 0.51953125, "learning_rate": 0.0017122115578251061, "loss": 0.2093, "step": 25882 }, { "epoch": 0.04589447143962787, "grad_norm": 0.43359375, "learning_rate": 0.0017121679296721404, "loss": 0.1906, "step": 25884 }, { "epoch": 0.04589801760493768, "grad_norm": 0.32421875, "learning_rate": 0.0017121242988419078, "loss": 0.1523, "step": 25886 }, { "epoch": 0.0459015637702475, "grad_norm": 0.453125, "learning_rate": 0.001712080665334599, "loss": 0.2256, "step": 25888 }, { "epoch": 0.04590510993555731, "grad_norm": 2.0, "learning_rate": 0.0017120370291504052, "loss": 0.3778, "step": 25890 }, { "epoch": 0.045908656100867126, "grad_norm": 0.65234375, "learning_rate": 0.0017119933902895168, "loss": 0.1916, "step": 25892 }, { "epoch": 0.04591220226617694, "grad_norm": 0.5703125, "learning_rate": 0.0017119497487521247, "loss": 0.294, "step": 25894 }, { "epoch": 0.045915748431486755, "grad_norm": 0.7734375, "learning_rate": 0.0017119061045384198, "loss": 0.2407, "step": 25896 }, { "epoch": 0.04591929459679657, "grad_norm": 0.37890625, "learning_rate": 0.0017118624576485935, "loss": 0.1824, "step": 25898 }, { "epoch": 0.045922840762106384, "grad_norm": 0.9140625, "learning_rate": 0.0017118188080828356, "loss": 0.246, "step": 25900 }, { "epoch": 0.0459263869274162, "grad_norm": 0.5234375, "learning_rate": 0.0017117751558413381, "loss": 0.2198, "step": 25902 }, { "epoch": 0.04592993309272602, "grad_norm": 0.8046875, "learning_rate": 0.0017117315009242914, "loss": 0.1699, "step": 25904 }, { "epoch": 0.045933479258035835, "grad_norm": 0.31640625, "learning_rate": 0.0017116878433318862, "loss": 0.2413, "step": 25906 }, { "epoch": 0.04593702542334565, "grad_norm": 1.0234375, "learning_rate": 0.0017116441830643137, "loss": 0.3057, "step": 25908 }, { "epoch": 0.045940571588655464, "grad_norm": 0.34375, "learning_rate": 0.001711600520121765, "loss": 0.2181, "step": 25910 }, { "epoch": 0.04594411775396528, "grad_norm": 0.4921875, "learning_rate": 0.001711556854504431, "loss": 0.159, "step": 25912 }, { "epoch": 0.04594766391927509, "grad_norm": 0.447265625, "learning_rate": 0.0017115131862125022, "loss": 0.2072, "step": 25914 }, { "epoch": 0.04595121008458491, "grad_norm": 0.79296875, "learning_rate": 0.0017114695152461701, "loss": 0.3051, "step": 25916 }, { "epoch": 0.04595475624989472, "grad_norm": 0.859375, "learning_rate": 0.0017114258416056256, "loss": 0.2815, "step": 25918 }, { "epoch": 0.04595830241520454, "grad_norm": 0.46484375, "learning_rate": 0.0017113821652910593, "loss": 0.1734, "step": 25920 }, { "epoch": 0.04596184858051435, "grad_norm": 0.53515625, "learning_rate": 0.0017113384863026628, "loss": 0.1549, "step": 25922 }, { "epoch": 0.045965394745824166, "grad_norm": 0.5546875, "learning_rate": 0.0017112948046406268, "loss": 0.2272, "step": 25924 }, { "epoch": 0.04596894091113398, "grad_norm": 0.76171875, "learning_rate": 0.0017112511203051426, "loss": 0.2298, "step": 25926 }, { "epoch": 0.0459724870764438, "grad_norm": 1.6796875, "learning_rate": 0.001711207433296401, "loss": 0.2439, "step": 25928 }, { "epoch": 0.04597603324175362, "grad_norm": 0.376953125, "learning_rate": 0.001711163743614593, "loss": 0.2151, "step": 25930 }, { "epoch": 0.04597957940706343, "grad_norm": 0.3125, "learning_rate": 0.00171112005125991, "loss": 0.1682, "step": 25932 }, { "epoch": 0.045983125572373246, "grad_norm": 0.5234375, "learning_rate": 0.0017110763562325426, "loss": 0.1884, "step": 25934 }, { "epoch": 0.04598667173768306, "grad_norm": 0.5078125, "learning_rate": 0.0017110326585326823, "loss": 0.2387, "step": 25936 }, { "epoch": 0.045990217902992875, "grad_norm": 1.9921875, "learning_rate": 0.00171098895816052, "loss": 0.1565, "step": 25938 }, { "epoch": 0.04599376406830269, "grad_norm": 0.416015625, "learning_rate": 0.001710945255116247, "loss": 0.1911, "step": 25940 }, { "epoch": 0.045997310233612504, "grad_norm": 0.482421875, "learning_rate": 0.0017109015494000542, "loss": 0.2056, "step": 25942 }, { "epoch": 0.04600085639892232, "grad_norm": 0.25390625, "learning_rate": 0.0017108578410121329, "loss": 0.1775, "step": 25944 }, { "epoch": 0.04600440256423213, "grad_norm": 1.3125, "learning_rate": 0.0017108141299526744, "loss": 0.2414, "step": 25946 }, { "epoch": 0.04600794872954195, "grad_norm": 0.5703125, "learning_rate": 0.0017107704162218696, "loss": 0.2084, "step": 25948 }, { "epoch": 0.04601149489485177, "grad_norm": 0.384765625, "learning_rate": 0.0017107266998199098, "loss": 0.2119, "step": 25950 }, { "epoch": 0.046015041060161584, "grad_norm": 0.58984375, "learning_rate": 0.001710682980746986, "loss": 0.2097, "step": 25952 }, { "epoch": 0.0460185872254714, "grad_norm": 0.96484375, "learning_rate": 0.0017106392590032899, "loss": 0.2913, "step": 25954 }, { "epoch": 0.04602213339078121, "grad_norm": 0.625, "learning_rate": 0.001710595534589012, "loss": 0.3591, "step": 25956 }, { "epoch": 0.04602567955609103, "grad_norm": 0.62109375, "learning_rate": 0.001710551807504344, "loss": 0.2215, "step": 25958 }, { "epoch": 0.04602922572140084, "grad_norm": 1.109375, "learning_rate": 0.0017105080777494773, "loss": 0.1744, "step": 25960 }, { "epoch": 0.046032771886710656, "grad_norm": 1.046875, "learning_rate": 0.0017104643453246028, "loss": 0.2341, "step": 25962 }, { "epoch": 0.04603631805202047, "grad_norm": 0.56640625, "learning_rate": 0.0017104206102299118, "loss": 0.2023, "step": 25964 }, { "epoch": 0.046039864217330286, "grad_norm": 1.234375, "learning_rate": 0.0017103768724655954, "loss": 0.2139, "step": 25966 }, { "epoch": 0.0460434103826401, "grad_norm": 0.76953125, "learning_rate": 0.0017103331320318456, "loss": 0.208, "step": 25968 }, { "epoch": 0.046046956547949915, "grad_norm": 2.171875, "learning_rate": 0.001710289388928853, "loss": 0.4158, "step": 25970 }, { "epoch": 0.046050502713259736, "grad_norm": 0.390625, "learning_rate": 0.001710245643156809, "loss": 0.2111, "step": 25972 }, { "epoch": 0.04605404887856955, "grad_norm": 0.2041015625, "learning_rate": 0.001710201894715905, "loss": 0.1771, "step": 25974 }, { "epoch": 0.046057595043879365, "grad_norm": 0.6953125, "learning_rate": 0.0017101581436063326, "loss": 0.2738, "step": 25976 }, { "epoch": 0.04606114120918918, "grad_norm": 0.55859375, "learning_rate": 0.0017101143898282827, "loss": 0.262, "step": 25978 }, { "epoch": 0.046064687374498994, "grad_norm": 0.431640625, "learning_rate": 0.0017100706333819469, "loss": 0.2294, "step": 25980 }, { "epoch": 0.04606823353980881, "grad_norm": 0.40234375, "learning_rate": 0.0017100268742675164, "loss": 0.1774, "step": 25982 }, { "epoch": 0.04607177970511862, "grad_norm": 0.80859375, "learning_rate": 0.001709983112485183, "loss": 0.1972, "step": 25984 }, { "epoch": 0.04607532587042844, "grad_norm": 0.734375, "learning_rate": 0.0017099393480351373, "loss": 0.281, "step": 25986 }, { "epoch": 0.04607887203573825, "grad_norm": 0.359375, "learning_rate": 0.0017098955809175715, "loss": 0.2037, "step": 25988 }, { "epoch": 0.04608241820104807, "grad_norm": 0.455078125, "learning_rate": 0.0017098518111326767, "loss": 0.2018, "step": 25990 }, { "epoch": 0.04608596436635788, "grad_norm": 1.640625, "learning_rate": 0.0017098080386806446, "loss": 0.3368, "step": 25992 }, { "epoch": 0.046089510531667696, "grad_norm": 0.59765625, "learning_rate": 0.0017097642635616657, "loss": 0.1961, "step": 25994 }, { "epoch": 0.04609305669697752, "grad_norm": 0.94140625, "learning_rate": 0.0017097204857759328, "loss": 0.2178, "step": 25996 }, { "epoch": 0.04609660286228733, "grad_norm": 0.62890625, "learning_rate": 0.001709676705323636, "loss": 0.1715, "step": 25998 }, { "epoch": 0.04610014902759715, "grad_norm": 0.41015625, "learning_rate": 0.0017096329222049678, "loss": 0.1827, "step": 26000 }, { "epoch": 0.04610369519290696, "grad_norm": 0.34375, "learning_rate": 0.0017095891364201192, "loss": 0.157, "step": 26002 }, { "epoch": 0.046107241358216776, "grad_norm": 0.44921875, "learning_rate": 0.001709545347969282, "loss": 0.3541, "step": 26004 }, { "epoch": 0.04611078752352659, "grad_norm": 0.96875, "learning_rate": 0.0017095015568526473, "loss": 0.2669, "step": 26006 }, { "epoch": 0.046114333688836405, "grad_norm": 0.4375, "learning_rate": 0.0017094577630704072, "loss": 0.2086, "step": 26008 }, { "epoch": 0.04611787985414622, "grad_norm": 0.67578125, "learning_rate": 0.0017094139666227524, "loss": 0.153, "step": 26010 }, { "epoch": 0.046121426019456034, "grad_norm": 0.8203125, "learning_rate": 0.0017093701675098753, "loss": 0.1675, "step": 26012 }, { "epoch": 0.04612497218476585, "grad_norm": 0.39453125, "learning_rate": 0.0017093263657319666, "loss": 0.2848, "step": 26014 }, { "epoch": 0.04612851835007566, "grad_norm": 1.0, "learning_rate": 0.0017092825612892186, "loss": 0.2153, "step": 26016 }, { "epoch": 0.046132064515385485, "grad_norm": 0.59765625, "learning_rate": 0.0017092387541818225, "loss": 0.1468, "step": 26018 }, { "epoch": 0.0461356106806953, "grad_norm": 0.283203125, "learning_rate": 0.0017091949444099702, "loss": 0.2116, "step": 26020 }, { "epoch": 0.046139156846005114, "grad_norm": 0.2890625, "learning_rate": 0.0017091511319738525, "loss": 0.1973, "step": 26022 }, { "epoch": 0.04614270301131493, "grad_norm": 0.345703125, "learning_rate": 0.0017091073168736624, "loss": 0.2215, "step": 26024 }, { "epoch": 0.04614624917662474, "grad_norm": 1.1484375, "learning_rate": 0.00170906349910959, "loss": 0.1505, "step": 26026 }, { "epoch": 0.04614979534193456, "grad_norm": 1.4765625, "learning_rate": 0.0017090196786818282, "loss": 0.2872, "step": 26028 }, { "epoch": 0.04615334150724437, "grad_norm": 2.5625, "learning_rate": 0.0017089758555905677, "loss": 0.4636, "step": 26030 }, { "epoch": 0.04615688767255419, "grad_norm": 1.3359375, "learning_rate": 0.0017089320298360007, "loss": 0.4287, "step": 26032 }, { "epoch": 0.046160433837864, "grad_norm": 0.7890625, "learning_rate": 0.0017088882014183186, "loss": 0.1669, "step": 26034 }, { "epoch": 0.046163980003173816, "grad_norm": 0.455078125, "learning_rate": 0.0017088443703377131, "loss": 0.215, "step": 26036 }, { "epoch": 0.04616752616848363, "grad_norm": 1.0625, "learning_rate": 0.0017088005365943767, "loss": 0.2821, "step": 26038 }, { "epoch": 0.04617107233379345, "grad_norm": 0.74609375, "learning_rate": 0.0017087567001884995, "loss": 0.1614, "step": 26040 }, { "epoch": 0.046174618499103266, "grad_norm": 0.337890625, "learning_rate": 0.0017087128611202745, "loss": 0.2507, "step": 26042 }, { "epoch": 0.04617816466441308, "grad_norm": 0.451171875, "learning_rate": 0.0017086690193898928, "loss": 0.2067, "step": 26044 }, { "epoch": 0.046181710829722895, "grad_norm": 0.91015625, "learning_rate": 0.0017086251749975467, "loss": 0.2486, "step": 26046 }, { "epoch": 0.04618525699503271, "grad_norm": 0.75390625, "learning_rate": 0.0017085813279434274, "loss": 0.2421, "step": 26048 }, { "epoch": 0.046188803160342524, "grad_norm": 0.7890625, "learning_rate": 0.001708537478227727, "loss": 0.2234, "step": 26050 }, { "epoch": 0.04619234932565234, "grad_norm": 0.52734375, "learning_rate": 0.0017084936258506369, "loss": 0.2215, "step": 26052 }, { "epoch": 0.046195895490962154, "grad_norm": 0.333984375, "learning_rate": 0.0017084497708123494, "loss": 0.2059, "step": 26054 }, { "epoch": 0.04619944165627197, "grad_norm": 0.400390625, "learning_rate": 0.001708405913113056, "loss": 0.1825, "step": 26056 }, { "epoch": 0.04620298782158178, "grad_norm": 0.326171875, "learning_rate": 0.0017083620527529484, "loss": 0.2069, "step": 26058 }, { "epoch": 0.0462065339868916, "grad_norm": 0.7265625, "learning_rate": 0.0017083181897322188, "loss": 0.1857, "step": 26060 }, { "epoch": 0.04621008015220141, "grad_norm": 0.546875, "learning_rate": 0.0017082743240510586, "loss": 0.2015, "step": 26062 }, { "epoch": 0.04621362631751123, "grad_norm": 0.83203125, "learning_rate": 0.00170823045570966, "loss": 0.176, "step": 26064 }, { "epoch": 0.04621717248282105, "grad_norm": 0.265625, "learning_rate": 0.0017081865847082143, "loss": 0.1459, "step": 26066 }, { "epoch": 0.04622071864813086, "grad_norm": 0.51953125, "learning_rate": 0.001708142711046914, "loss": 0.2153, "step": 26068 }, { "epoch": 0.04622426481344068, "grad_norm": 0.283203125, "learning_rate": 0.001708098834725951, "loss": 0.1432, "step": 26070 }, { "epoch": 0.04622781097875049, "grad_norm": 0.291015625, "learning_rate": 0.0017080549557455167, "loss": 0.178, "step": 26072 }, { "epoch": 0.046231357144060306, "grad_norm": 0.6953125, "learning_rate": 0.001708011074105803, "loss": 0.312, "step": 26074 }, { "epoch": 0.04623490330937012, "grad_norm": 1.8046875, "learning_rate": 0.0017079671898070023, "loss": 0.2437, "step": 26076 }, { "epoch": 0.046238449474679935, "grad_norm": 2.140625, "learning_rate": 0.0017079233028493063, "loss": 0.3157, "step": 26078 }, { "epoch": 0.04624199563998975, "grad_norm": 1.2578125, "learning_rate": 0.0017078794132329067, "loss": 0.3413, "step": 26080 }, { "epoch": 0.046245541805299564, "grad_norm": 0.76171875, "learning_rate": 0.0017078355209579957, "loss": 0.1758, "step": 26082 }, { "epoch": 0.04624908797060938, "grad_norm": 0.41015625, "learning_rate": 0.0017077916260247652, "loss": 0.2042, "step": 26084 }, { "epoch": 0.0462526341359192, "grad_norm": 0.75390625, "learning_rate": 0.0017077477284334073, "loss": 0.1902, "step": 26086 }, { "epoch": 0.046256180301229015, "grad_norm": 0.375, "learning_rate": 0.0017077038281841138, "loss": 0.3049, "step": 26088 }, { "epoch": 0.04625972646653883, "grad_norm": 0.44140625, "learning_rate": 0.0017076599252770766, "loss": 0.1475, "step": 26090 }, { "epoch": 0.046263272631848644, "grad_norm": 0.498046875, "learning_rate": 0.0017076160197124878, "loss": 0.1764, "step": 26092 }, { "epoch": 0.04626681879715846, "grad_norm": 0.90234375, "learning_rate": 0.00170757211149054, "loss": 0.207, "step": 26094 }, { "epoch": 0.04627036496246827, "grad_norm": 0.55078125, "learning_rate": 0.0017075282006114242, "loss": 0.164, "step": 26096 }, { "epoch": 0.04627391112777809, "grad_norm": 5.0625, "learning_rate": 0.0017074842870753333, "loss": 0.4012, "step": 26098 }, { "epoch": 0.0462774572930879, "grad_norm": 0.310546875, "learning_rate": 0.0017074403708824586, "loss": 0.192, "step": 26100 }, { "epoch": 0.04628100345839772, "grad_norm": 0.298828125, "learning_rate": 0.0017073964520329925, "loss": 0.206, "step": 26102 }, { "epoch": 0.04628454962370753, "grad_norm": 0.640625, "learning_rate": 0.0017073525305271275, "loss": 0.2016, "step": 26104 }, { "epoch": 0.046288095789017346, "grad_norm": 0.88671875, "learning_rate": 0.0017073086063650557, "loss": 0.1796, "step": 26106 }, { "epoch": 0.04629164195432717, "grad_norm": 0.439453125, "learning_rate": 0.001707264679546968, "loss": 0.216, "step": 26108 }, { "epoch": 0.04629518811963698, "grad_norm": 0.78515625, "learning_rate": 0.0017072207500730577, "loss": 0.1942, "step": 26110 }, { "epoch": 0.046298734284946796, "grad_norm": 0.263671875, "learning_rate": 0.0017071768179435167, "loss": 0.1946, "step": 26112 }, { "epoch": 0.04630228045025661, "grad_norm": 0.357421875, "learning_rate": 0.0017071328831585366, "loss": 0.205, "step": 26114 }, { "epoch": 0.046305826615566426, "grad_norm": 1.0078125, "learning_rate": 0.0017070889457183104, "loss": 0.3614, "step": 26116 }, { "epoch": 0.04630937278087624, "grad_norm": 0.828125, "learning_rate": 0.0017070450056230293, "loss": 0.2697, "step": 26118 }, { "epoch": 0.046312918946186055, "grad_norm": 0.322265625, "learning_rate": 0.0017070010628728862, "loss": 0.2137, "step": 26120 }, { "epoch": 0.04631646511149587, "grad_norm": 0.41015625, "learning_rate": 0.001706957117468073, "loss": 0.2054, "step": 26122 }, { "epoch": 0.046320011276805684, "grad_norm": 1.6015625, "learning_rate": 0.0017069131694087824, "loss": 0.3104, "step": 26124 }, { "epoch": 0.0463235574421155, "grad_norm": 0.640625, "learning_rate": 0.0017068692186952054, "loss": 0.1769, "step": 26126 }, { "epoch": 0.04632710360742531, "grad_norm": 0.79296875, "learning_rate": 0.0017068252653275353, "loss": 0.1966, "step": 26128 }, { "epoch": 0.04633064977273513, "grad_norm": 1.875, "learning_rate": 0.0017067813093059642, "loss": 0.2292, "step": 26130 }, { "epoch": 0.04633419593804495, "grad_norm": 0.232421875, "learning_rate": 0.0017067373506306836, "loss": 0.1376, "step": 26132 }, { "epoch": 0.04633774210335476, "grad_norm": 0.35546875, "learning_rate": 0.0017066933893018865, "loss": 0.3009, "step": 26134 }, { "epoch": 0.04634128826866458, "grad_norm": 0.328125, "learning_rate": 0.0017066494253197648, "loss": 0.1524, "step": 26136 }, { "epoch": 0.04634483443397439, "grad_norm": 0.494140625, "learning_rate": 0.0017066054586845113, "loss": 0.2016, "step": 26138 }, { "epoch": 0.04634838059928421, "grad_norm": 1.15625, "learning_rate": 0.0017065614893963174, "loss": 0.2322, "step": 26140 }, { "epoch": 0.04635192676459402, "grad_norm": 0.369140625, "learning_rate": 0.001706517517455376, "loss": 0.2868, "step": 26142 }, { "epoch": 0.046355472929903836, "grad_norm": 0.37109375, "learning_rate": 0.0017064735428618794, "loss": 0.1756, "step": 26144 }, { "epoch": 0.04635901909521365, "grad_norm": 0.859375, "learning_rate": 0.0017064295656160197, "loss": 0.2021, "step": 26146 }, { "epoch": 0.046362565260523465, "grad_norm": 0.61328125, "learning_rate": 0.0017063855857179897, "loss": 0.1954, "step": 26148 }, { "epoch": 0.04636611142583328, "grad_norm": 0.251953125, "learning_rate": 0.001706341603167981, "loss": 0.1248, "step": 26150 }, { "epoch": 0.046369657591143094, "grad_norm": 0.423828125, "learning_rate": 0.0017062976179661868, "loss": 0.1505, "step": 26152 }, { "epoch": 0.046373203756452916, "grad_norm": 0.6953125, "learning_rate": 0.0017062536301127981, "loss": 0.1791, "step": 26154 }, { "epoch": 0.04637674992176273, "grad_norm": 0.275390625, "learning_rate": 0.0017062096396080087, "loss": 0.1923, "step": 26156 }, { "epoch": 0.046380296087072545, "grad_norm": 0.72265625, "learning_rate": 0.0017061656464520106, "loss": 0.185, "step": 26158 }, { "epoch": 0.04638384225238236, "grad_norm": 1.2265625, "learning_rate": 0.0017061216506449961, "loss": 0.2128, "step": 26160 }, { "epoch": 0.046387388417692174, "grad_norm": 2.234375, "learning_rate": 0.0017060776521871572, "loss": 0.2501, "step": 26162 }, { "epoch": 0.04639093458300199, "grad_norm": 0.41015625, "learning_rate": 0.0017060336510786869, "loss": 0.2348, "step": 26164 }, { "epoch": 0.0463944807483118, "grad_norm": 0.263671875, "learning_rate": 0.0017059896473197778, "loss": 0.1794, "step": 26166 }, { "epoch": 0.04639802691362162, "grad_norm": 0.578125, "learning_rate": 0.0017059456409106216, "loss": 0.2392, "step": 26168 }, { "epoch": 0.04640157307893143, "grad_norm": 0.298828125, "learning_rate": 0.001705901631851411, "loss": 0.6879, "step": 26170 }, { "epoch": 0.04640511924424125, "grad_norm": 1.8203125, "learning_rate": 0.001705857620142339, "loss": 0.3341, "step": 26172 }, { "epoch": 0.04640866540955106, "grad_norm": 0.5546875, "learning_rate": 0.0017058136057835974, "loss": 0.1771, "step": 26174 }, { "epoch": 0.04641221157486088, "grad_norm": 0.404296875, "learning_rate": 0.001705769588775379, "loss": 0.2007, "step": 26176 }, { "epoch": 0.0464157577401707, "grad_norm": 0.52734375, "learning_rate": 0.0017057255691178763, "loss": 0.1908, "step": 26178 }, { "epoch": 0.04641930390548051, "grad_norm": 0.55078125, "learning_rate": 0.001705681546811282, "loss": 0.2116, "step": 26180 }, { "epoch": 0.04642285007079033, "grad_norm": 1.1953125, "learning_rate": 0.0017056375218557883, "loss": 0.1989, "step": 26182 }, { "epoch": 0.04642639623610014, "grad_norm": 0.357421875, "learning_rate": 0.0017055934942515878, "loss": 0.2417, "step": 26184 }, { "epoch": 0.046429942401409956, "grad_norm": 0.41796875, "learning_rate": 0.0017055494639988732, "loss": 0.2349, "step": 26186 }, { "epoch": 0.04643348856671977, "grad_norm": 0.6171875, "learning_rate": 0.001705505431097837, "loss": 0.2326, "step": 26188 }, { "epoch": 0.046437034732029585, "grad_norm": 0.2451171875, "learning_rate": 0.0017054613955486716, "loss": 0.2044, "step": 26190 }, { "epoch": 0.0464405808973394, "grad_norm": 2.40625, "learning_rate": 0.00170541735735157, "loss": 0.3135, "step": 26192 }, { "epoch": 0.046444127062649214, "grad_norm": 0.4453125, "learning_rate": 0.0017053733165067245, "loss": 0.2392, "step": 26194 }, { "epoch": 0.04644767322795903, "grad_norm": 0.3359375, "learning_rate": 0.0017053292730143277, "loss": 0.1449, "step": 26196 }, { "epoch": 0.04645121939326884, "grad_norm": 0.3203125, "learning_rate": 0.0017052852268745723, "loss": 0.1934, "step": 26198 }, { "epoch": 0.046454765558578665, "grad_norm": 0.64453125, "learning_rate": 0.0017052411780876509, "loss": 0.2074, "step": 26200 }, { "epoch": 0.04645831172388848, "grad_norm": 0.46875, "learning_rate": 0.0017051971266537562, "loss": 0.1877, "step": 26202 }, { "epoch": 0.046461857889198294, "grad_norm": 1.296875, "learning_rate": 0.0017051530725730808, "loss": 0.2244, "step": 26204 }, { "epoch": 0.04646540405450811, "grad_norm": 1.40625, "learning_rate": 0.0017051090158458173, "loss": 0.1829, "step": 26206 }, { "epoch": 0.04646895021981792, "grad_norm": 0.275390625, "learning_rate": 0.0017050649564721583, "loss": 0.1775, "step": 26208 }, { "epoch": 0.04647249638512774, "grad_norm": 2.4375, "learning_rate": 0.0017050208944522972, "loss": 0.4318, "step": 26210 }, { "epoch": 0.04647604255043755, "grad_norm": 0.443359375, "learning_rate": 0.0017049768297864257, "loss": 0.202, "step": 26212 }, { "epoch": 0.046479588715747366, "grad_norm": 0.416015625, "learning_rate": 0.0017049327624747371, "loss": 0.2132, "step": 26214 }, { "epoch": 0.04648313488105718, "grad_norm": 0.431640625, "learning_rate": 0.001704888692517424, "loss": 0.1796, "step": 26216 }, { "epoch": 0.046486681046366995, "grad_norm": 0.625, "learning_rate": 0.0017048446199146787, "loss": 0.1897, "step": 26218 }, { "epoch": 0.04649022721167681, "grad_norm": 0.40625, "learning_rate": 0.0017048005446666948, "loss": 0.2152, "step": 26220 }, { "epoch": 0.04649377337698663, "grad_norm": 0.72265625, "learning_rate": 0.0017047564667736644, "loss": 0.183, "step": 26222 }, { "epoch": 0.046497319542296446, "grad_norm": 0.384765625, "learning_rate": 0.0017047123862357807, "loss": 0.1673, "step": 26224 }, { "epoch": 0.04650086570760626, "grad_norm": 0.263671875, "learning_rate": 0.0017046683030532363, "loss": 0.2806, "step": 26226 }, { "epoch": 0.046504411872916075, "grad_norm": 1.0859375, "learning_rate": 0.0017046242172262238, "loss": 0.274, "step": 26228 }, { "epoch": 0.04650795803822589, "grad_norm": 0.6328125, "learning_rate": 0.0017045801287549362, "loss": 0.2151, "step": 26230 }, { "epoch": 0.046511504203535704, "grad_norm": 0.388671875, "learning_rate": 0.0017045360376395664, "loss": 0.2637, "step": 26232 }, { "epoch": 0.04651505036884552, "grad_norm": 1.1875, "learning_rate": 0.0017044919438803073, "loss": 0.1963, "step": 26234 }, { "epoch": 0.04651859653415533, "grad_norm": 0.470703125, "learning_rate": 0.0017044478474773513, "loss": 0.2368, "step": 26236 }, { "epoch": 0.04652214269946515, "grad_norm": 0.60546875, "learning_rate": 0.0017044037484308916, "loss": 0.2424, "step": 26238 }, { "epoch": 0.04652568886477496, "grad_norm": 1.515625, "learning_rate": 0.0017043596467411209, "loss": 0.2824, "step": 26240 }, { "epoch": 0.04652923503008478, "grad_norm": 0.33984375, "learning_rate": 0.0017043155424082323, "loss": 0.1975, "step": 26242 }, { "epoch": 0.0465327811953946, "grad_norm": 0.25390625, "learning_rate": 0.0017042714354324185, "loss": 0.1724, "step": 26244 }, { "epoch": 0.04653632736070441, "grad_norm": 0.369140625, "learning_rate": 0.0017042273258138722, "loss": 0.2137, "step": 26246 }, { "epoch": 0.04653987352601423, "grad_norm": 0.34765625, "learning_rate": 0.0017041832135527867, "loss": 0.2472, "step": 26248 }, { "epoch": 0.04654341969132404, "grad_norm": 1.78125, "learning_rate": 0.001704139098649355, "loss": 0.4322, "step": 26250 }, { "epoch": 0.04654696585663386, "grad_norm": 0.75, "learning_rate": 0.0017040949811037698, "loss": 0.1899, "step": 26252 }, { "epoch": 0.04655051202194367, "grad_norm": 0.427734375, "learning_rate": 0.0017040508609162239, "loss": 0.2124, "step": 26254 }, { "epoch": 0.046554058187253486, "grad_norm": 0.5546875, "learning_rate": 0.0017040067380869105, "loss": 0.1779, "step": 26256 }, { "epoch": 0.0465576043525633, "grad_norm": 1.59375, "learning_rate": 0.0017039626126160224, "loss": 0.2418, "step": 26258 }, { "epoch": 0.046561150517873115, "grad_norm": 0.72265625, "learning_rate": 0.0017039184845037527, "loss": 0.3143, "step": 26260 }, { "epoch": 0.04656469668318293, "grad_norm": 0.8359375, "learning_rate": 0.0017038743537502942, "loss": 0.4441, "step": 26262 }, { "epoch": 0.046568242848492744, "grad_norm": 0.3046875, "learning_rate": 0.0017038302203558403, "loss": 0.1865, "step": 26264 }, { "epoch": 0.04657178901380256, "grad_norm": 0.28515625, "learning_rate": 0.0017037860843205834, "loss": 0.2082, "step": 26266 }, { "epoch": 0.04657533517911238, "grad_norm": 0.38671875, "learning_rate": 0.001703741945644717, "loss": 0.2036, "step": 26268 }, { "epoch": 0.046578881344422195, "grad_norm": 0.70703125, "learning_rate": 0.0017036978043284343, "loss": 0.2463, "step": 26270 }, { "epoch": 0.04658242750973201, "grad_norm": 0.462890625, "learning_rate": 0.0017036536603719278, "loss": 0.3107, "step": 26272 }, { "epoch": 0.046585973675041824, "grad_norm": 0.376953125, "learning_rate": 0.0017036095137753912, "loss": 0.1705, "step": 26274 }, { "epoch": 0.04658951984035164, "grad_norm": 0.267578125, "learning_rate": 0.0017035653645390166, "loss": 0.19, "step": 26276 }, { "epoch": 0.04659306600566145, "grad_norm": 0.79296875, "learning_rate": 0.0017035212126629983, "loss": 0.2222, "step": 26278 }, { "epoch": 0.04659661217097127, "grad_norm": 4.71875, "learning_rate": 0.0017034770581475284, "loss": 0.3348, "step": 26280 }, { "epoch": 0.04660015833628108, "grad_norm": 4.59375, "learning_rate": 0.0017034329009928005, "loss": 0.2646, "step": 26282 }, { "epoch": 0.0466037045015909, "grad_norm": 1.9453125, "learning_rate": 0.0017033887411990075, "loss": 0.359, "step": 26284 }, { "epoch": 0.04660725066690071, "grad_norm": 0.96484375, "learning_rate": 0.0017033445787663427, "loss": 0.2766, "step": 26286 }, { "epoch": 0.046610796832210526, "grad_norm": 0.361328125, "learning_rate": 0.0017033004136949995, "loss": 0.2161, "step": 26288 }, { "epoch": 0.04661434299752035, "grad_norm": 0.6328125, "learning_rate": 0.0017032562459851704, "loss": 0.1906, "step": 26290 }, { "epoch": 0.04661788916283016, "grad_norm": 0.30859375, "learning_rate": 0.0017032120756370489, "loss": 0.1883, "step": 26292 }, { "epoch": 0.046621435328139976, "grad_norm": 0.52734375, "learning_rate": 0.001703167902650828, "loss": 0.1549, "step": 26294 }, { "epoch": 0.04662498149344979, "grad_norm": 0.73828125, "learning_rate": 0.0017031237270267012, "loss": 0.2314, "step": 26296 }, { "epoch": 0.046628527658759605, "grad_norm": 0.388671875, "learning_rate": 0.0017030795487648616, "loss": 0.2373, "step": 26298 }, { "epoch": 0.04663207382406942, "grad_norm": 0.8046875, "learning_rate": 0.0017030353678655027, "loss": 0.1619, "step": 26300 }, { "epoch": 0.046635619989379234, "grad_norm": 1.328125, "learning_rate": 0.0017029911843288168, "loss": 0.3279, "step": 26302 }, { "epoch": 0.04663916615468905, "grad_norm": 1.625, "learning_rate": 0.001702946998154998, "loss": 0.2421, "step": 26304 }, { "epoch": 0.046642712319998864, "grad_norm": 0.30078125, "learning_rate": 0.0017029028093442393, "loss": 0.1816, "step": 26306 }, { "epoch": 0.04664625848530868, "grad_norm": 1.28125, "learning_rate": 0.001702858617896734, "loss": 0.2181, "step": 26308 }, { "epoch": 0.04664980465061849, "grad_norm": 0.68359375, "learning_rate": 0.0017028144238126748, "loss": 0.1898, "step": 26310 }, { "epoch": 0.046653350815928314, "grad_norm": 1.4140625, "learning_rate": 0.001702770227092256, "loss": 0.2021, "step": 26312 }, { "epoch": 0.04665689698123813, "grad_norm": 0.359375, "learning_rate": 0.0017027260277356702, "loss": 0.1304, "step": 26314 }, { "epoch": 0.04666044314654794, "grad_norm": 0.64453125, "learning_rate": 0.001702681825743111, "loss": 0.2546, "step": 26316 }, { "epoch": 0.04666398931185776, "grad_norm": 1.6015625, "learning_rate": 0.001702637621114771, "loss": 0.1731, "step": 26318 }, { "epoch": 0.04666753547716757, "grad_norm": 0.515625, "learning_rate": 0.001702593413850845, "loss": 0.2259, "step": 26320 }, { "epoch": 0.04667108164247739, "grad_norm": 0.59375, "learning_rate": 0.0017025492039515248, "loss": 0.1596, "step": 26322 }, { "epoch": 0.0466746278077872, "grad_norm": 0.25, "learning_rate": 0.0017025049914170043, "loss": 0.2008, "step": 26324 }, { "epoch": 0.046678173973097016, "grad_norm": 0.43359375, "learning_rate": 0.0017024607762474774, "loss": 0.2482, "step": 26326 }, { "epoch": 0.04668172013840683, "grad_norm": 0.78125, "learning_rate": 0.0017024165584431366, "loss": 0.2442, "step": 26328 }, { "epoch": 0.046685266303716645, "grad_norm": 0.5234375, "learning_rate": 0.001702372338004176, "loss": 0.2235, "step": 26330 }, { "epoch": 0.04668881246902646, "grad_norm": 0.43359375, "learning_rate": 0.0017023281149307885, "loss": 0.1694, "step": 26332 }, { "epoch": 0.046692358634336274, "grad_norm": 0.51171875, "learning_rate": 0.001702283889223168, "loss": 0.397, "step": 26334 }, { "epoch": 0.046695904799646096, "grad_norm": 0.6953125, "learning_rate": 0.0017022396608815074, "loss": 0.1493, "step": 26336 }, { "epoch": 0.04669945096495591, "grad_norm": 0.333984375, "learning_rate": 0.0017021954299060002, "loss": 0.1927, "step": 26338 }, { "epoch": 0.046702997130265725, "grad_norm": 0.279296875, "learning_rate": 0.0017021511962968402, "loss": 0.2215, "step": 26340 }, { "epoch": 0.04670654329557554, "grad_norm": 0.27734375, "learning_rate": 0.0017021069600542205, "loss": 0.2289, "step": 26342 }, { "epoch": 0.046710089460885354, "grad_norm": 0.6953125, "learning_rate": 0.0017020627211783348, "loss": 0.22, "step": 26344 }, { "epoch": 0.04671363562619517, "grad_norm": 0.3984375, "learning_rate": 0.0017020184796693765, "loss": 0.1477, "step": 26346 }, { "epoch": 0.04671718179150498, "grad_norm": 0.48828125, "learning_rate": 0.001701974235527539, "loss": 0.1967, "step": 26348 }, { "epoch": 0.0467207279568148, "grad_norm": 0.37890625, "learning_rate": 0.0017019299887530155, "loss": 0.1668, "step": 26350 }, { "epoch": 0.04672427412212461, "grad_norm": 0.38671875, "learning_rate": 0.0017018857393460009, "loss": 0.2344, "step": 26352 }, { "epoch": 0.04672782028743443, "grad_norm": 1.0390625, "learning_rate": 0.0017018414873066865, "loss": 0.2144, "step": 26354 }, { "epoch": 0.04673136645274424, "grad_norm": 0.68359375, "learning_rate": 0.001701797232635268, "loss": 0.2501, "step": 26356 }, { "epoch": 0.04673491261805406, "grad_norm": 0.380859375, "learning_rate": 0.0017017529753319371, "loss": 0.2134, "step": 26358 }, { "epoch": 0.04673845878336388, "grad_norm": 0.93359375, "learning_rate": 0.0017017087153968888, "loss": 0.289, "step": 26360 }, { "epoch": 0.04674200494867369, "grad_norm": 0.359375, "learning_rate": 0.001701664452830316, "loss": 0.1221, "step": 26362 }, { "epoch": 0.046745551113983506, "grad_norm": 0.75390625, "learning_rate": 0.0017016201876324123, "loss": 0.2445, "step": 26364 }, { "epoch": 0.04674909727929332, "grad_norm": 0.51953125, "learning_rate": 0.0017015759198033714, "loss": 0.27, "step": 26366 }, { "epoch": 0.046752643444603136, "grad_norm": 0.298828125, "learning_rate": 0.0017015316493433866, "loss": 0.177, "step": 26368 }, { "epoch": 0.04675618960991295, "grad_norm": 0.3671875, "learning_rate": 0.0017014873762526523, "loss": 0.1573, "step": 26370 }, { "epoch": 0.046759735775222765, "grad_norm": 0.384765625, "learning_rate": 0.001701443100531361, "loss": 0.3564, "step": 26372 }, { "epoch": 0.04676328194053258, "grad_norm": 0.498046875, "learning_rate": 0.0017013988221797076, "loss": 0.1993, "step": 26374 }, { "epoch": 0.046766828105842394, "grad_norm": 0.375, "learning_rate": 0.0017013545411978847, "loss": 0.1907, "step": 26376 }, { "epoch": 0.04677037427115221, "grad_norm": 0.2041015625, "learning_rate": 0.0017013102575860863, "loss": 0.2344, "step": 26378 }, { "epoch": 0.04677392043646203, "grad_norm": 0.71875, "learning_rate": 0.0017012659713445063, "loss": 0.2775, "step": 26380 }, { "epoch": 0.046777466601771844, "grad_norm": 0.78515625, "learning_rate": 0.0017012216824733382, "loss": 0.2069, "step": 26382 }, { "epoch": 0.04678101276708166, "grad_norm": 0.55859375, "learning_rate": 0.0017011773909727755, "loss": 0.1913, "step": 26384 }, { "epoch": 0.04678455893239147, "grad_norm": 0.62890625, "learning_rate": 0.0017011330968430122, "loss": 0.2029, "step": 26386 }, { "epoch": 0.04678810509770129, "grad_norm": 0.318359375, "learning_rate": 0.001701088800084242, "loss": 0.2483, "step": 26388 }, { "epoch": 0.0467916512630111, "grad_norm": 0.921875, "learning_rate": 0.0017010445006966583, "loss": 0.2236, "step": 26390 }, { "epoch": 0.04679519742832092, "grad_norm": 0.765625, "learning_rate": 0.0017010001986804554, "loss": 0.2329, "step": 26392 }, { "epoch": 0.04679874359363073, "grad_norm": 0.1669921875, "learning_rate": 0.0017009558940358264, "loss": 0.1477, "step": 26394 }, { "epoch": 0.046802289758940546, "grad_norm": 1.09375, "learning_rate": 0.0017009115867629657, "loss": 0.2482, "step": 26396 }, { "epoch": 0.04680583592425036, "grad_norm": 1.0546875, "learning_rate": 0.0017008672768620665, "loss": 0.1572, "step": 26398 }, { "epoch": 0.046809382089560175, "grad_norm": 0.26171875, "learning_rate": 0.0017008229643333229, "loss": 0.2865, "step": 26400 }, { "epoch": 0.04681292825486999, "grad_norm": 0.3515625, "learning_rate": 0.0017007786491769288, "loss": 0.2051, "step": 26402 }, { "epoch": 0.04681647442017981, "grad_norm": 0.515625, "learning_rate": 0.0017007343313930779, "loss": 0.1688, "step": 26404 }, { "epoch": 0.046820020585489626, "grad_norm": 0.8671875, "learning_rate": 0.0017006900109819637, "loss": 0.1314, "step": 26406 }, { "epoch": 0.04682356675079944, "grad_norm": 0.30078125, "learning_rate": 0.0017006456879437807, "loss": 0.2086, "step": 26408 }, { "epoch": 0.046827112916109255, "grad_norm": 3.71875, "learning_rate": 0.001700601362278722, "loss": 0.2599, "step": 26410 }, { "epoch": 0.04683065908141907, "grad_norm": 0.99609375, "learning_rate": 0.001700557033986982, "loss": 0.2239, "step": 26412 }, { "epoch": 0.046834205246728884, "grad_norm": 1.8125, "learning_rate": 0.0017005127030687544, "loss": 0.3041, "step": 26414 }, { "epoch": 0.0468377514120387, "grad_norm": 0.5234375, "learning_rate": 0.001700468369524233, "loss": 0.2533, "step": 26416 }, { "epoch": 0.04684129757734851, "grad_norm": 0.89453125, "learning_rate": 0.0017004240333536118, "loss": 0.3332, "step": 26418 }, { "epoch": 0.04684484374265833, "grad_norm": 0.578125, "learning_rate": 0.0017003796945570847, "loss": 0.1965, "step": 26420 }, { "epoch": 0.04684838990796814, "grad_norm": 0.69921875, "learning_rate": 0.0017003353531348455, "loss": 0.2099, "step": 26422 }, { "epoch": 0.04685193607327796, "grad_norm": 0.8359375, "learning_rate": 0.001700291009087088, "loss": 0.2719, "step": 26424 }, { "epoch": 0.04685548223858778, "grad_norm": 3.046875, "learning_rate": 0.0017002466624140066, "loss": 0.2137, "step": 26426 }, { "epoch": 0.04685902840389759, "grad_norm": 0.466796875, "learning_rate": 0.0017002023131157946, "loss": 0.1751, "step": 26428 }, { "epoch": 0.04686257456920741, "grad_norm": 2.796875, "learning_rate": 0.0017001579611926468, "loss": 0.2077, "step": 26430 }, { "epoch": 0.04686612073451722, "grad_norm": 0.427734375, "learning_rate": 0.0017001136066447562, "loss": 0.2164, "step": 26432 }, { "epoch": 0.04686966689982704, "grad_norm": 0.302734375, "learning_rate": 0.0017000692494723176, "loss": 0.2455, "step": 26434 }, { "epoch": 0.04687321306513685, "grad_norm": 0.2373046875, "learning_rate": 0.0017000248896755245, "loss": 0.1795, "step": 26436 }, { "epoch": 0.046876759230446666, "grad_norm": 0.7265625, "learning_rate": 0.0016999805272545712, "loss": 0.213, "step": 26438 }, { "epoch": 0.04688030539575648, "grad_norm": 2.875, "learning_rate": 0.0016999361622096517, "loss": 0.3079, "step": 26440 }, { "epoch": 0.046883851561066295, "grad_norm": 1.421875, "learning_rate": 0.0016998917945409595, "loss": 0.2155, "step": 26442 }, { "epoch": 0.04688739772637611, "grad_norm": 0.271484375, "learning_rate": 0.0016998474242486891, "loss": 0.2075, "step": 26444 }, { "epoch": 0.046890943891685924, "grad_norm": 0.671875, "learning_rate": 0.001699803051333035, "loss": 0.2399, "step": 26446 }, { "epoch": 0.046894490056995745, "grad_norm": 1.453125, "learning_rate": 0.0016997586757941904, "loss": 0.3139, "step": 26448 }, { "epoch": 0.04689803622230556, "grad_norm": 0.353515625, "learning_rate": 0.00169971429763235, "loss": 0.2062, "step": 26450 }, { "epoch": 0.046901582387615375, "grad_norm": 0.66015625, "learning_rate": 0.001699669916847707, "loss": 0.2301, "step": 26452 }, { "epoch": 0.04690512855292519, "grad_norm": 3.171875, "learning_rate": 0.0016996255334404565, "loss": 0.3623, "step": 26454 }, { "epoch": 0.046908674718235004, "grad_norm": 0.443359375, "learning_rate": 0.0016995811474107923, "loss": 0.3399, "step": 26456 }, { "epoch": 0.04691222088354482, "grad_norm": 0.5625, "learning_rate": 0.0016995367587589083, "loss": 0.1929, "step": 26458 }, { "epoch": 0.04691576704885463, "grad_norm": 0.208984375, "learning_rate": 0.001699492367484999, "loss": 0.2058, "step": 26460 }, { "epoch": 0.04691931321416445, "grad_norm": 0.28515625, "learning_rate": 0.0016994479735892578, "loss": 0.188, "step": 26462 }, { "epoch": 0.04692285937947426, "grad_norm": 0.478515625, "learning_rate": 0.0016994035770718798, "loss": 0.2374, "step": 26464 }, { "epoch": 0.046926405544784076, "grad_norm": 1.375, "learning_rate": 0.0016993591779330588, "loss": 0.1652, "step": 26466 }, { "epoch": 0.04692995171009389, "grad_norm": 0.396484375, "learning_rate": 0.0016993147761729889, "loss": 0.2014, "step": 26468 }, { "epoch": 0.046933497875403705, "grad_norm": 0.55859375, "learning_rate": 0.0016992703717918641, "loss": 0.1951, "step": 26470 }, { "epoch": 0.04693704404071353, "grad_norm": 0.291015625, "learning_rate": 0.0016992259647898788, "loss": 0.3431, "step": 26472 }, { "epoch": 0.04694059020602334, "grad_norm": 0.48828125, "learning_rate": 0.0016991815551672272, "loss": 0.2235, "step": 26474 }, { "epoch": 0.046944136371333156, "grad_norm": 0.6953125, "learning_rate": 0.0016991371429241036, "loss": 0.2247, "step": 26476 }, { "epoch": 0.04694768253664297, "grad_norm": 0.91796875, "learning_rate": 0.001699092728060702, "loss": 0.225, "step": 26478 }, { "epoch": 0.046951228701952785, "grad_norm": 0.6015625, "learning_rate": 0.001699048310577217, "loss": 0.2535, "step": 26480 }, { "epoch": 0.0469547748672626, "grad_norm": 0.33984375, "learning_rate": 0.0016990038904738426, "loss": 0.1822, "step": 26482 }, { "epoch": 0.046958321032572414, "grad_norm": 3.15625, "learning_rate": 0.001698959467750773, "loss": 0.3388, "step": 26484 }, { "epoch": 0.04696186719788223, "grad_norm": 0.2333984375, "learning_rate": 0.0016989150424082027, "loss": 0.3176, "step": 26486 }, { "epoch": 0.04696541336319204, "grad_norm": 0.283203125, "learning_rate": 0.001698870614446326, "loss": 0.2681, "step": 26488 }, { "epoch": 0.04696895952850186, "grad_norm": 0.51953125, "learning_rate": 0.0016988261838653367, "loss": 0.1832, "step": 26490 }, { "epoch": 0.04697250569381167, "grad_norm": 0.431640625, "learning_rate": 0.0016987817506654301, "loss": 0.186, "step": 26492 }, { "epoch": 0.046976051859121494, "grad_norm": 0.74609375, "learning_rate": 0.0016987373148467995, "loss": 0.2559, "step": 26494 }, { "epoch": 0.04697959802443131, "grad_norm": 0.408203125, "learning_rate": 0.0016986928764096399, "loss": 0.2334, "step": 26496 }, { "epoch": 0.04698314418974112, "grad_norm": 0.3515625, "learning_rate": 0.0016986484353541454, "loss": 0.1615, "step": 26498 }, { "epoch": 0.04698669035505094, "grad_norm": 1.7421875, "learning_rate": 0.0016986039916805102, "loss": 0.2688, "step": 26500 }, { "epoch": 0.04699023652036075, "grad_norm": 0.255859375, "learning_rate": 0.0016985595453889288, "loss": 0.1914, "step": 26502 }, { "epoch": 0.04699378268567057, "grad_norm": 0.310546875, "learning_rate": 0.001698515096479596, "loss": 0.2446, "step": 26504 }, { "epoch": 0.04699732885098038, "grad_norm": 0.279296875, "learning_rate": 0.0016984706449527055, "loss": 0.2026, "step": 26506 }, { "epoch": 0.047000875016290196, "grad_norm": 0.33984375, "learning_rate": 0.0016984261908084522, "loss": 0.1927, "step": 26508 }, { "epoch": 0.04700442118160001, "grad_norm": 0.3046875, "learning_rate": 0.0016983817340470305, "loss": 0.2186, "step": 26510 }, { "epoch": 0.047007967346909825, "grad_norm": 0.5546875, "learning_rate": 0.0016983372746686345, "loss": 0.4702, "step": 26512 }, { "epoch": 0.04701151351221964, "grad_norm": 2.046875, "learning_rate": 0.001698292812673459, "loss": 0.385, "step": 26514 }, { "epoch": 0.047015059677529454, "grad_norm": 0.96484375, "learning_rate": 0.001698248348061698, "loss": 0.1972, "step": 26516 }, { "epoch": 0.047018605842839276, "grad_norm": 0.478515625, "learning_rate": 0.001698203880833546, "loss": 0.2332, "step": 26518 }, { "epoch": 0.04702215200814909, "grad_norm": 0.5390625, "learning_rate": 0.001698159410989198, "loss": 0.2654, "step": 26520 }, { "epoch": 0.047025698173458905, "grad_norm": 1.7578125, "learning_rate": 0.001698114938528848, "loss": 0.223, "step": 26522 }, { "epoch": 0.04702924433876872, "grad_norm": 0.369140625, "learning_rate": 0.001698070463452691, "loss": 0.2001, "step": 26524 }, { "epoch": 0.047032790504078534, "grad_norm": 0.291015625, "learning_rate": 0.0016980259857609212, "loss": 0.3718, "step": 26526 }, { "epoch": 0.04703633666938835, "grad_norm": 0.54296875, "learning_rate": 0.001697981505453733, "loss": 0.2237, "step": 26528 }, { "epoch": 0.04703988283469816, "grad_norm": 0.3125, "learning_rate": 0.0016979370225313208, "loss": 0.2062, "step": 26530 }, { "epoch": 0.04704342900000798, "grad_norm": 0.5, "learning_rate": 0.0016978925369938798, "loss": 0.2603, "step": 26532 }, { "epoch": 0.04704697516531779, "grad_norm": 1.28125, "learning_rate": 0.001697848048841604, "loss": 0.2309, "step": 26534 }, { "epoch": 0.04705052133062761, "grad_norm": 0.78515625, "learning_rate": 0.001697803558074688, "loss": 0.1878, "step": 26536 }, { "epoch": 0.04705406749593742, "grad_norm": 0.3125, "learning_rate": 0.0016977590646933267, "loss": 0.2087, "step": 26538 }, { "epoch": 0.04705761366124724, "grad_norm": 0.9375, "learning_rate": 0.0016977145686977143, "loss": 0.2314, "step": 26540 }, { "epoch": 0.04706115982655706, "grad_norm": 0.27734375, "learning_rate": 0.0016976700700880457, "loss": 0.1449, "step": 26542 }, { "epoch": 0.04706470599186687, "grad_norm": 0.4765625, "learning_rate": 0.001697625568864515, "loss": 0.1747, "step": 26544 }, { "epoch": 0.047068252157176686, "grad_norm": 0.50390625, "learning_rate": 0.0016975810650273176, "loss": 0.2192, "step": 26546 }, { "epoch": 0.0470717983224865, "grad_norm": 0.2451171875, "learning_rate": 0.0016975365585766475, "loss": 0.4713, "step": 26548 }, { "epoch": 0.047075344487796315, "grad_norm": 0.369140625, "learning_rate": 0.0016974920495126998, "loss": 0.1806, "step": 26550 }, { "epoch": 0.04707889065310613, "grad_norm": 2.84375, "learning_rate": 0.001697447537835669, "loss": 0.182, "step": 26552 }, { "epoch": 0.047082436818415944, "grad_norm": 5.1875, "learning_rate": 0.0016974030235457494, "loss": 0.2765, "step": 26554 }, { "epoch": 0.04708598298372576, "grad_norm": 0.80859375, "learning_rate": 0.001697358506643136, "loss": 0.2096, "step": 26556 }, { "epoch": 0.047089529149035574, "grad_norm": 0.265625, "learning_rate": 0.0016973139871280236, "loss": 0.1767, "step": 26558 }, { "epoch": 0.04709307531434539, "grad_norm": 0.68359375, "learning_rate": 0.0016972694650006068, "loss": 0.1866, "step": 26560 }, { "epoch": 0.04709662147965521, "grad_norm": 0.796875, "learning_rate": 0.0016972249402610801, "loss": 0.2523, "step": 26562 }, { "epoch": 0.047100167644965024, "grad_norm": 0.326171875, "learning_rate": 0.0016971804129096387, "loss": 0.2213, "step": 26564 }, { "epoch": 0.04710371381027484, "grad_norm": 0.310546875, "learning_rate": 0.0016971358829464772, "loss": 0.134, "step": 26566 }, { "epoch": 0.04710725997558465, "grad_norm": 1.7421875, "learning_rate": 0.0016970913503717897, "loss": 0.155, "step": 26568 }, { "epoch": 0.04711080614089447, "grad_norm": 4.9375, "learning_rate": 0.0016970468151857718, "loss": 0.3679, "step": 26570 }, { "epoch": 0.04711435230620428, "grad_norm": 0.73046875, "learning_rate": 0.001697002277388618, "loss": 0.2125, "step": 26572 }, { "epoch": 0.0471178984715141, "grad_norm": 0.369140625, "learning_rate": 0.0016969577369805229, "loss": 0.1798, "step": 26574 }, { "epoch": 0.04712144463682391, "grad_norm": 0.59375, "learning_rate": 0.0016969131939616811, "loss": 0.186, "step": 26576 }, { "epoch": 0.047124990802133726, "grad_norm": 0.50390625, "learning_rate": 0.001696868648332288, "loss": 0.2249, "step": 26578 }, { "epoch": 0.04712853696744354, "grad_norm": 1.2109375, "learning_rate": 0.0016968241000925384, "loss": 0.1821, "step": 26580 }, { "epoch": 0.047132083132753355, "grad_norm": 0.78515625, "learning_rate": 0.0016967795492426265, "loss": 0.1938, "step": 26582 }, { "epoch": 0.04713562929806317, "grad_norm": 0.64453125, "learning_rate": 0.0016967349957827476, "loss": 0.1888, "step": 26584 }, { "epoch": 0.04713917546337299, "grad_norm": 0.25390625, "learning_rate": 0.0016966904397130965, "loss": 0.1776, "step": 26586 }, { "epoch": 0.047142721628682806, "grad_norm": 0.392578125, "learning_rate": 0.0016966458810338681, "loss": 0.221, "step": 26588 }, { "epoch": 0.04714626779399262, "grad_norm": 0.34375, "learning_rate": 0.001696601319745257, "loss": 0.1848, "step": 26590 }, { "epoch": 0.047149813959302435, "grad_norm": 0.87109375, "learning_rate": 0.0016965567558474583, "loss": 0.1882, "step": 26592 }, { "epoch": 0.04715336012461225, "grad_norm": 0.80078125, "learning_rate": 0.001696512189340667, "loss": 0.2284, "step": 26594 }, { "epoch": 0.047156906289922064, "grad_norm": 0.671875, "learning_rate": 0.0016964676202250778, "loss": 0.2095, "step": 26596 }, { "epoch": 0.04716045245523188, "grad_norm": 0.1865234375, "learning_rate": 0.0016964230485008859, "loss": 0.145, "step": 26598 }, { "epoch": 0.04716399862054169, "grad_norm": 0.671875, "learning_rate": 0.0016963784741682855, "loss": 0.2673, "step": 26600 }, { "epoch": 0.04716754478585151, "grad_norm": 0.5546875, "learning_rate": 0.0016963338972274726, "loss": 0.2122, "step": 26602 }, { "epoch": 0.04717109095116132, "grad_norm": 16.375, "learning_rate": 0.0016962893176786413, "loss": 0.4163, "step": 26604 }, { "epoch": 0.04717463711647114, "grad_norm": 0.83203125, "learning_rate": 0.001696244735521987, "loss": 0.274, "step": 26606 }, { "epoch": 0.04717818328178096, "grad_norm": 0.39453125, "learning_rate": 0.0016962001507577046, "loss": 0.1843, "step": 26608 }, { "epoch": 0.04718172944709077, "grad_norm": 1.078125, "learning_rate": 0.0016961555633859893, "loss": 0.2262, "step": 26610 }, { "epoch": 0.04718527561240059, "grad_norm": 0.490234375, "learning_rate": 0.0016961109734070356, "loss": 0.1802, "step": 26612 }, { "epoch": 0.0471888217777104, "grad_norm": 0.828125, "learning_rate": 0.0016960663808210385, "loss": 0.1749, "step": 26614 }, { "epoch": 0.047192367943020216, "grad_norm": 0.515625, "learning_rate": 0.0016960217856281936, "loss": 0.1896, "step": 26616 }, { "epoch": 0.04719591410833003, "grad_norm": 0.326171875, "learning_rate": 0.0016959771878286957, "loss": 0.1714, "step": 26618 }, { "epoch": 0.047199460273639846, "grad_norm": 2.046875, "learning_rate": 0.0016959325874227396, "loss": 0.4339, "step": 26620 }, { "epoch": 0.04720300643894966, "grad_norm": 0.6328125, "learning_rate": 0.0016958879844105204, "loss": 0.2082, "step": 26622 }, { "epoch": 0.047206552604259475, "grad_norm": 0.78125, "learning_rate": 0.0016958433787922333, "loss": 0.2411, "step": 26624 }, { "epoch": 0.04721009876956929, "grad_norm": 0.7265625, "learning_rate": 0.0016957987705680736, "loss": 0.264, "step": 26626 }, { "epoch": 0.047213644934879104, "grad_norm": 0.5546875, "learning_rate": 0.0016957541597382361, "loss": 0.1873, "step": 26628 }, { "epoch": 0.047217191100188925, "grad_norm": 0.37109375, "learning_rate": 0.001695709546302916, "loss": 0.2267, "step": 26630 }, { "epoch": 0.04722073726549874, "grad_norm": 0.609375, "learning_rate": 0.001695664930262308, "loss": 0.2653, "step": 26632 }, { "epoch": 0.047224283430808554, "grad_norm": 0.4296875, "learning_rate": 0.001695620311616608, "loss": 0.2177, "step": 26634 }, { "epoch": 0.04722782959611837, "grad_norm": 1.4140625, "learning_rate": 0.0016955756903660108, "loss": 0.3076, "step": 26636 }, { "epoch": 0.04723137576142818, "grad_norm": 0.3671875, "learning_rate": 0.0016955310665107113, "loss": 0.24, "step": 26638 }, { "epoch": 0.047234921926738, "grad_norm": 0.87890625, "learning_rate": 0.001695486440050905, "loss": 0.1752, "step": 26640 }, { "epoch": 0.04723846809204781, "grad_norm": 0.451171875, "learning_rate": 0.0016954418109867866, "loss": 0.2261, "step": 26642 }, { "epoch": 0.04724201425735763, "grad_norm": 0.671875, "learning_rate": 0.001695397179318552, "loss": 0.1952, "step": 26644 }, { "epoch": 0.04724556042266744, "grad_norm": 1.5390625, "learning_rate": 0.0016953525450463956, "loss": 0.3127, "step": 26646 }, { "epoch": 0.047249106587977256, "grad_norm": 0.2578125, "learning_rate": 0.0016953079081705132, "loss": 0.1597, "step": 26648 }, { "epoch": 0.04725265275328707, "grad_norm": 0.40625, "learning_rate": 0.0016952632686910996, "loss": 0.233, "step": 26650 }, { "epoch": 0.047256198918596885, "grad_norm": 0.48828125, "learning_rate": 0.0016952186266083506, "loss": 0.1523, "step": 26652 }, { "epoch": 0.04725974508390671, "grad_norm": 1.421875, "learning_rate": 0.0016951739819224608, "loss": 0.2706, "step": 26654 }, { "epoch": 0.04726329124921652, "grad_norm": 0.416015625, "learning_rate": 0.0016951293346336258, "loss": 0.2314, "step": 26656 }, { "epoch": 0.047266837414526336, "grad_norm": 0.388671875, "learning_rate": 0.0016950846847420408, "loss": 0.1628, "step": 26658 }, { "epoch": 0.04727038357983615, "grad_norm": 0.5703125, "learning_rate": 0.0016950400322479008, "loss": 0.2043, "step": 26660 }, { "epoch": 0.047273929745145965, "grad_norm": 0.72265625, "learning_rate": 0.0016949953771514016, "loss": 0.2141, "step": 26662 }, { "epoch": 0.04727747591045578, "grad_norm": 0.796875, "learning_rate": 0.0016949507194527382, "loss": 0.2153, "step": 26664 }, { "epoch": 0.047281022075765594, "grad_norm": 0.25, "learning_rate": 0.0016949060591521059, "loss": 0.3093, "step": 26666 }, { "epoch": 0.04728456824107541, "grad_norm": 0.55859375, "learning_rate": 0.0016948613962497001, "loss": 0.1671, "step": 26668 }, { "epoch": 0.04728811440638522, "grad_norm": 0.89453125, "learning_rate": 0.0016948167307457161, "loss": 0.1981, "step": 26670 }, { "epoch": 0.04729166057169504, "grad_norm": 0.7890625, "learning_rate": 0.001694772062640349, "loss": 0.1879, "step": 26672 }, { "epoch": 0.04729520673700485, "grad_norm": 0.40625, "learning_rate": 0.0016947273919337947, "loss": 0.1701, "step": 26674 }, { "epoch": 0.047298752902314674, "grad_norm": 2.25, "learning_rate": 0.0016946827186262484, "loss": 0.2584, "step": 26676 }, { "epoch": 0.04730229906762449, "grad_norm": 0.35546875, "learning_rate": 0.0016946380427179046, "loss": 0.1787, "step": 26678 }, { "epoch": 0.0473058452329343, "grad_norm": 0.5078125, "learning_rate": 0.0016945933642089602, "loss": 0.3514, "step": 26680 }, { "epoch": 0.04730939139824412, "grad_norm": 0.41015625, "learning_rate": 0.0016945486830996096, "loss": 0.2255, "step": 26682 }, { "epoch": 0.04731293756355393, "grad_norm": 0.66796875, "learning_rate": 0.0016945039993900481, "loss": 0.1752, "step": 26684 }, { "epoch": 0.04731648372886375, "grad_norm": 0.60546875, "learning_rate": 0.0016944593130804714, "loss": 0.2008, "step": 26686 }, { "epoch": 0.04732002989417356, "grad_norm": 0.66015625, "learning_rate": 0.0016944146241710755, "loss": 0.2074, "step": 26688 }, { "epoch": 0.047323576059483376, "grad_norm": 0.4765625, "learning_rate": 0.001694369932662055, "loss": 0.2019, "step": 26690 }, { "epoch": 0.04732712222479319, "grad_norm": 3.046875, "learning_rate": 0.0016943252385536057, "loss": 0.2159, "step": 26692 }, { "epoch": 0.047330668390103005, "grad_norm": 0.3203125, "learning_rate": 0.0016942805418459228, "loss": 0.2373, "step": 26694 }, { "epoch": 0.04733421455541282, "grad_norm": 1.796875, "learning_rate": 0.0016942358425392024, "loss": 0.259, "step": 26696 }, { "epoch": 0.04733776072072264, "grad_norm": 0.609375, "learning_rate": 0.0016941911406336391, "loss": 0.202, "step": 26698 }, { "epoch": 0.047341306886032455, "grad_norm": 0.765625, "learning_rate": 0.0016941464361294291, "loss": 0.1538, "step": 26700 }, { "epoch": 0.04734485305134227, "grad_norm": 0.494140625, "learning_rate": 0.0016941017290267681, "loss": 0.2765, "step": 26702 }, { "epoch": 0.047348399216652085, "grad_norm": 0.306640625, "learning_rate": 0.0016940570193258507, "loss": 0.2019, "step": 26704 }, { "epoch": 0.0473519453819619, "grad_norm": 1.203125, "learning_rate": 0.0016940123070268733, "loss": 0.2224, "step": 26706 }, { "epoch": 0.047355491547271714, "grad_norm": 0.27734375, "learning_rate": 0.001693967592130031, "loss": 0.1978, "step": 26708 }, { "epoch": 0.04735903771258153, "grad_norm": 0.9140625, "learning_rate": 0.0016939228746355197, "loss": 0.2975, "step": 26710 }, { "epoch": 0.04736258387789134, "grad_norm": 0.388671875, "learning_rate": 0.0016938781545435342, "loss": 0.2641, "step": 26712 }, { "epoch": 0.04736613004320116, "grad_norm": 0.5, "learning_rate": 0.001693833431854271, "loss": 0.1644, "step": 26714 }, { "epoch": 0.04736967620851097, "grad_norm": 0.671875, "learning_rate": 0.0016937887065679254, "loss": 0.3008, "step": 26716 }, { "epoch": 0.047373222373820786, "grad_norm": 0.470703125, "learning_rate": 0.0016937439786846927, "loss": 0.202, "step": 26718 }, { "epoch": 0.0473767685391306, "grad_norm": 0.439453125, "learning_rate": 0.001693699248204769, "loss": 0.1363, "step": 26720 }, { "epoch": 0.04738031470444042, "grad_norm": 0.55859375, "learning_rate": 0.0016936545151283494, "loss": 0.1663, "step": 26722 }, { "epoch": 0.04738386086975024, "grad_norm": 3.796875, "learning_rate": 0.0016936097794556296, "loss": 0.4902, "step": 26724 }, { "epoch": 0.04738740703506005, "grad_norm": 0.255859375, "learning_rate": 0.0016935650411868057, "loss": 0.2973, "step": 26726 }, { "epoch": 0.047390953200369866, "grad_norm": 1.3046875, "learning_rate": 0.001693520300322073, "loss": 0.2227, "step": 26728 }, { "epoch": 0.04739449936567968, "grad_norm": 0.57421875, "learning_rate": 0.0016934755568616276, "loss": 0.2098, "step": 26730 }, { "epoch": 0.047398045530989495, "grad_norm": 0.380859375, "learning_rate": 0.0016934308108056643, "loss": 0.1398, "step": 26732 }, { "epoch": 0.04740159169629931, "grad_norm": 0.35546875, "learning_rate": 0.0016933860621543798, "loss": 0.202, "step": 26734 }, { "epoch": 0.047405137861609124, "grad_norm": 0.78125, "learning_rate": 0.001693341310907969, "loss": 0.2498, "step": 26736 }, { "epoch": 0.04740868402691894, "grad_norm": 0.640625, "learning_rate": 0.001693296557066628, "loss": 0.1964, "step": 26738 }, { "epoch": 0.04741223019222875, "grad_norm": 0.55859375, "learning_rate": 0.0016932518006305525, "loss": 0.2096, "step": 26740 }, { "epoch": 0.04741577635753857, "grad_norm": 3.828125, "learning_rate": 0.0016932070415999385, "loss": 0.5608, "step": 26742 }, { "epoch": 0.04741932252284839, "grad_norm": 0.35546875, "learning_rate": 0.0016931622799749814, "loss": 0.2021, "step": 26744 }, { "epoch": 0.047422868688158204, "grad_norm": 0.703125, "learning_rate": 0.0016931175157558767, "loss": 0.3564, "step": 26746 }, { "epoch": 0.04742641485346802, "grad_norm": 0.287109375, "learning_rate": 0.001693072748942821, "loss": 0.2096, "step": 26748 }, { "epoch": 0.04742996101877783, "grad_norm": 0.244140625, "learning_rate": 0.0016930279795360093, "loss": 0.1717, "step": 26750 }, { "epoch": 0.04743350718408765, "grad_norm": 1.1640625, "learning_rate": 0.0016929832075356377, "loss": 0.2168, "step": 26752 }, { "epoch": 0.04743705334939746, "grad_norm": 0.2236328125, "learning_rate": 0.001692938432941902, "loss": 0.1648, "step": 26754 }, { "epoch": 0.04744059951470728, "grad_norm": 0.439453125, "learning_rate": 0.001692893655754998, "loss": 0.2113, "step": 26756 }, { "epoch": 0.04744414568001709, "grad_norm": 0.37109375, "learning_rate": 0.0016928488759751217, "loss": 0.199, "step": 26758 }, { "epoch": 0.047447691845326906, "grad_norm": 0.349609375, "learning_rate": 0.0016928040936024685, "loss": 0.2144, "step": 26760 }, { "epoch": 0.04745123801063672, "grad_norm": 0.328125, "learning_rate": 0.0016927593086372348, "loss": 0.2307, "step": 26762 }, { "epoch": 0.047454784175946535, "grad_norm": 0.921875, "learning_rate": 0.001692714521079616, "loss": 0.2631, "step": 26764 }, { "epoch": 0.047458330341256356, "grad_norm": 0.294921875, "learning_rate": 0.0016926697309298084, "loss": 0.1919, "step": 26766 }, { "epoch": 0.04746187650656617, "grad_norm": 0.482421875, "learning_rate": 0.0016926249381880077, "loss": 0.1862, "step": 26768 }, { "epoch": 0.047465422671875986, "grad_norm": 0.490234375, "learning_rate": 0.0016925801428544097, "loss": 0.2482, "step": 26770 }, { "epoch": 0.0474689688371858, "grad_norm": 0.546875, "learning_rate": 0.0016925353449292098, "loss": 0.182, "step": 26772 }, { "epoch": 0.047472515002495615, "grad_norm": 1.46875, "learning_rate": 0.0016924905444126051, "loss": 0.2364, "step": 26774 }, { "epoch": 0.04747606116780543, "grad_norm": 0.84765625, "learning_rate": 0.0016924457413047907, "loss": 0.2101, "step": 26776 }, { "epoch": 0.047479607333115244, "grad_norm": 0.58203125, "learning_rate": 0.0016924009356059629, "loss": 0.1731, "step": 26778 }, { "epoch": 0.04748315349842506, "grad_norm": 0.46875, "learning_rate": 0.0016923561273163173, "loss": 0.2054, "step": 26780 }, { "epoch": 0.04748669966373487, "grad_norm": 0.26171875, "learning_rate": 0.0016923113164360505, "loss": 0.1965, "step": 26782 }, { "epoch": 0.04749024582904469, "grad_norm": 0.66796875, "learning_rate": 0.0016922665029653577, "loss": 0.2095, "step": 26784 }, { "epoch": 0.0474937919943545, "grad_norm": 1.359375, "learning_rate": 0.0016922216869044353, "loss": 0.2737, "step": 26786 }, { "epoch": 0.04749733815966432, "grad_norm": 2.734375, "learning_rate": 0.0016921768682534792, "loss": 0.3536, "step": 26788 }, { "epoch": 0.04750088432497414, "grad_norm": 0.474609375, "learning_rate": 0.0016921320470126852, "loss": 0.2142, "step": 26790 }, { "epoch": 0.04750443049028395, "grad_norm": 1.046875, "learning_rate": 0.0016920872231822503, "loss": 0.2686, "step": 26792 }, { "epoch": 0.04750797665559377, "grad_norm": 0.55859375, "learning_rate": 0.0016920423967623692, "loss": 0.3635, "step": 26794 }, { "epoch": 0.04751152282090358, "grad_norm": 0.72265625, "learning_rate": 0.001691997567753239, "loss": 0.2676, "step": 26796 }, { "epoch": 0.047515068986213396, "grad_norm": 0.46875, "learning_rate": 0.001691952736155055, "loss": 0.1452, "step": 26798 }, { "epoch": 0.04751861515152321, "grad_norm": 0.298828125, "learning_rate": 0.0016919079019680134, "loss": 0.4631, "step": 26800 }, { "epoch": 0.047522161316833025, "grad_norm": 0.361328125, "learning_rate": 0.0016918630651923108, "loss": 0.2792, "step": 26802 }, { "epoch": 0.04752570748214284, "grad_norm": 0.271484375, "learning_rate": 0.001691818225828143, "loss": 0.1678, "step": 26804 }, { "epoch": 0.047529253647452654, "grad_norm": 0.3828125, "learning_rate": 0.0016917733838757057, "loss": 0.1863, "step": 26806 }, { "epoch": 0.04753279981276247, "grad_norm": 0.86328125, "learning_rate": 0.0016917285393351958, "loss": 0.2667, "step": 26808 }, { "epoch": 0.047536345978072284, "grad_norm": 1.2421875, "learning_rate": 0.0016916836922068088, "loss": 0.1615, "step": 26810 }, { "epoch": 0.047539892143382105, "grad_norm": 0.396484375, "learning_rate": 0.0016916388424907409, "loss": 0.1935, "step": 26812 }, { "epoch": 0.04754343830869192, "grad_norm": 0.33203125, "learning_rate": 0.0016915939901871885, "loss": 0.1828, "step": 26814 }, { "epoch": 0.047546984474001734, "grad_norm": 0.55078125, "learning_rate": 0.0016915491352963476, "loss": 0.1822, "step": 26816 }, { "epoch": 0.04755053063931155, "grad_norm": 1.078125, "learning_rate": 0.001691504277818414, "loss": 0.2499, "step": 26818 }, { "epoch": 0.04755407680462136, "grad_norm": 0.267578125, "learning_rate": 0.001691459417753585, "loss": 0.1889, "step": 26820 }, { "epoch": 0.04755762296993118, "grad_norm": 0.91015625, "learning_rate": 0.0016914145551020555, "loss": 0.1931, "step": 26822 }, { "epoch": 0.04756116913524099, "grad_norm": 0.400390625, "learning_rate": 0.0016913696898640225, "loss": 0.2128, "step": 26824 }, { "epoch": 0.04756471530055081, "grad_norm": 0.34375, "learning_rate": 0.0016913248220396821, "loss": 0.1635, "step": 26826 }, { "epoch": 0.04756826146586062, "grad_norm": 0.375, "learning_rate": 0.00169127995162923, "loss": 0.203, "step": 26828 }, { "epoch": 0.047571807631170436, "grad_norm": 0.7109375, "learning_rate": 0.0016912350786328634, "loss": 0.2795, "step": 26830 }, { "epoch": 0.04757535379648025, "grad_norm": 1.3828125, "learning_rate": 0.0016911902030507778, "loss": 0.2004, "step": 26832 }, { "epoch": 0.04757889996179007, "grad_norm": 0.462890625, "learning_rate": 0.0016911453248831694, "loss": 0.2872, "step": 26834 }, { "epoch": 0.04758244612709989, "grad_norm": 0.609375, "learning_rate": 0.0016911004441302348, "loss": 0.2363, "step": 26836 }, { "epoch": 0.0475859922924097, "grad_norm": 0.2353515625, "learning_rate": 0.0016910555607921702, "loss": 0.1554, "step": 26838 }, { "epoch": 0.047589538457719516, "grad_norm": 0.328125, "learning_rate": 0.0016910106748691722, "loss": 0.1638, "step": 26840 }, { "epoch": 0.04759308462302933, "grad_norm": 2.28125, "learning_rate": 0.0016909657863614367, "loss": 0.265, "step": 26842 }, { "epoch": 0.047596630788339145, "grad_norm": 0.92578125, "learning_rate": 0.00169092089526916, "loss": 0.2391, "step": 26844 }, { "epoch": 0.04760017695364896, "grad_norm": 0.8203125, "learning_rate": 0.0016908760015925386, "loss": 0.1643, "step": 26846 }, { "epoch": 0.047603723118958774, "grad_norm": 0.416015625, "learning_rate": 0.001690831105331769, "loss": 0.2094, "step": 26848 }, { "epoch": 0.04760726928426859, "grad_norm": 0.8046875, "learning_rate": 0.001690786206487047, "loss": 0.2242, "step": 26850 }, { "epoch": 0.0476108154495784, "grad_norm": 0.58984375, "learning_rate": 0.0016907413050585695, "loss": 0.1878, "step": 26852 }, { "epoch": 0.04761436161488822, "grad_norm": 0.74609375, "learning_rate": 0.0016906964010465324, "loss": 0.2738, "step": 26854 }, { "epoch": 0.04761790778019803, "grad_norm": 0.671875, "learning_rate": 0.0016906514944511326, "loss": 0.2113, "step": 26856 }, { "epoch": 0.047621453945507854, "grad_norm": 0.80078125, "learning_rate": 0.0016906065852725662, "loss": 0.2342, "step": 26858 }, { "epoch": 0.04762500011081767, "grad_norm": 0.90625, "learning_rate": 0.0016905616735110299, "loss": 0.1734, "step": 26860 }, { "epoch": 0.04762854627612748, "grad_norm": 0.201171875, "learning_rate": 0.0016905167591667197, "loss": 0.1547, "step": 26862 }, { "epoch": 0.0476320924414373, "grad_norm": 0.9609375, "learning_rate": 0.0016904718422398322, "loss": 0.1941, "step": 26864 }, { "epoch": 0.04763563860674711, "grad_norm": 0.4140625, "learning_rate": 0.0016904269227305635, "loss": 0.2407, "step": 26866 }, { "epoch": 0.047639184772056926, "grad_norm": 0.484375, "learning_rate": 0.0016903820006391108, "loss": 0.2149, "step": 26868 }, { "epoch": 0.04764273093736674, "grad_norm": 0.796875, "learning_rate": 0.0016903370759656703, "loss": 0.256, "step": 26870 }, { "epoch": 0.047646277102676556, "grad_norm": 1.25, "learning_rate": 0.001690292148710438, "loss": 0.2674, "step": 26872 }, { "epoch": 0.04764982326798637, "grad_norm": 0.50390625, "learning_rate": 0.0016902472188736108, "loss": 0.2204, "step": 26874 }, { "epoch": 0.047653369433296185, "grad_norm": 1.2109375, "learning_rate": 0.001690202286455385, "loss": 0.5059, "step": 26876 }, { "epoch": 0.047656915598606, "grad_norm": 0.91015625, "learning_rate": 0.0016901573514559575, "loss": 0.1789, "step": 26878 }, { "epoch": 0.04766046176391582, "grad_norm": 10.9375, "learning_rate": 0.0016901124138755247, "loss": 0.3592, "step": 26880 }, { "epoch": 0.047664007929225635, "grad_norm": 0.62890625, "learning_rate": 0.0016900674737142824, "loss": 0.2536, "step": 26882 }, { "epoch": 0.04766755409453545, "grad_norm": 2.0, "learning_rate": 0.0016900225309724281, "loss": 0.1749, "step": 26884 }, { "epoch": 0.047671100259845264, "grad_norm": 0.255859375, "learning_rate": 0.0016899775856501582, "loss": 0.2355, "step": 26886 }, { "epoch": 0.04767464642515508, "grad_norm": 0.61328125, "learning_rate": 0.0016899326377476684, "loss": 0.1652, "step": 26888 }, { "epoch": 0.04767819259046489, "grad_norm": 0.76171875, "learning_rate": 0.0016898876872651562, "loss": 0.1886, "step": 26890 }, { "epoch": 0.04768173875577471, "grad_norm": 3.046875, "learning_rate": 0.0016898427342028181, "loss": 0.2788, "step": 26892 }, { "epoch": 0.04768528492108452, "grad_norm": 0.279296875, "learning_rate": 0.0016897977785608504, "loss": 0.1484, "step": 26894 }, { "epoch": 0.04768883108639434, "grad_norm": 0.427734375, "learning_rate": 0.00168975282033945, "loss": 0.1474, "step": 26896 }, { "epoch": 0.04769237725170415, "grad_norm": 1.0390625, "learning_rate": 0.001689707859538813, "loss": 0.3669, "step": 26898 }, { "epoch": 0.047695923417013966, "grad_norm": 0.85546875, "learning_rate": 0.0016896628961591367, "loss": 0.2442, "step": 26900 }, { "epoch": 0.04769946958232379, "grad_norm": 0.8515625, "learning_rate": 0.001689617930200617, "loss": 0.548, "step": 26902 }, { "epoch": 0.0477030157476336, "grad_norm": 0.423828125, "learning_rate": 0.0016895729616634512, "loss": 0.2445, "step": 26904 }, { "epoch": 0.04770656191294342, "grad_norm": 2.40625, "learning_rate": 0.0016895279905478357, "loss": 0.2967, "step": 26906 }, { "epoch": 0.04771010807825323, "grad_norm": 0.6015625, "learning_rate": 0.0016894830168539673, "loss": 0.1548, "step": 26908 }, { "epoch": 0.047713654243563046, "grad_norm": 0.6875, "learning_rate": 0.0016894380405820424, "loss": 0.1795, "step": 26910 }, { "epoch": 0.04771720040887286, "grad_norm": 1.234375, "learning_rate": 0.0016893930617322582, "loss": 0.1617, "step": 26912 }, { "epoch": 0.047720746574182675, "grad_norm": 0.47265625, "learning_rate": 0.001689348080304811, "loss": 0.256, "step": 26914 }, { "epoch": 0.04772429273949249, "grad_norm": 1.921875, "learning_rate": 0.0016893030962998974, "loss": 0.2681, "step": 26916 }, { "epoch": 0.047727838904802304, "grad_norm": 0.62890625, "learning_rate": 0.0016892581097177144, "loss": 0.1833, "step": 26918 }, { "epoch": 0.04773138507011212, "grad_norm": 1.1015625, "learning_rate": 0.0016892131205584588, "loss": 0.3758, "step": 26920 }, { "epoch": 0.04773493123542193, "grad_norm": 0.2294921875, "learning_rate": 0.0016891681288223274, "loss": 0.2344, "step": 26922 }, { "epoch": 0.04773847740073175, "grad_norm": 0.279296875, "learning_rate": 0.0016891231345095166, "loss": 0.2185, "step": 26924 }, { "epoch": 0.04774202356604157, "grad_norm": 0.453125, "learning_rate": 0.0016890781376202235, "loss": 0.2265, "step": 26926 }, { "epoch": 0.047745569731351384, "grad_norm": 0.5859375, "learning_rate": 0.0016890331381546448, "loss": 0.2437, "step": 26928 }, { "epoch": 0.0477491158966612, "grad_norm": 3.15625, "learning_rate": 0.0016889881361129772, "loss": 0.2029, "step": 26930 }, { "epoch": 0.04775266206197101, "grad_norm": 0.98046875, "learning_rate": 0.0016889431314954181, "loss": 0.2015, "step": 26932 }, { "epoch": 0.04775620822728083, "grad_norm": 0.453125, "learning_rate": 0.0016888981243021633, "loss": 0.1718, "step": 26934 }, { "epoch": 0.04775975439259064, "grad_norm": 1.4140625, "learning_rate": 0.0016888531145334102, "loss": 0.241, "step": 26936 }, { "epoch": 0.04776330055790046, "grad_norm": 0.71875, "learning_rate": 0.001688808102189356, "loss": 0.2415, "step": 26938 }, { "epoch": 0.04776684672321027, "grad_norm": 1.3125, "learning_rate": 0.001688763087270197, "loss": 0.2881, "step": 26940 }, { "epoch": 0.047770392888520086, "grad_norm": 1.046875, "learning_rate": 0.0016887180697761303, "loss": 0.2255, "step": 26942 }, { "epoch": 0.0477739390538299, "grad_norm": 1.328125, "learning_rate": 0.0016886730497073525, "loss": 0.2826, "step": 26944 }, { "epoch": 0.047777485219139715, "grad_norm": 0.66796875, "learning_rate": 0.001688628027064061, "loss": 0.2586, "step": 26946 }, { "epoch": 0.047781031384449536, "grad_norm": 0.439453125, "learning_rate": 0.0016885830018464523, "loss": 0.2123, "step": 26948 }, { "epoch": 0.04778457754975935, "grad_norm": 2.4375, "learning_rate": 0.0016885379740547233, "loss": 0.4881, "step": 26950 }, { "epoch": 0.047788123715069165, "grad_norm": 0.73046875, "learning_rate": 0.0016884929436890713, "loss": 0.2228, "step": 26952 }, { "epoch": 0.04779166988037898, "grad_norm": 1.078125, "learning_rate": 0.0016884479107496931, "loss": 0.2062, "step": 26954 }, { "epoch": 0.047795216045688795, "grad_norm": 0.75, "learning_rate": 0.0016884028752367853, "loss": 0.3562, "step": 26956 }, { "epoch": 0.04779876221099861, "grad_norm": 0.65234375, "learning_rate": 0.0016883578371505448, "loss": 0.1781, "step": 26958 }, { "epoch": 0.047802308376308424, "grad_norm": 0.37109375, "learning_rate": 0.0016883127964911694, "loss": 0.2409, "step": 26960 }, { "epoch": 0.04780585454161824, "grad_norm": 1.2109375, "learning_rate": 0.0016882677532588555, "loss": 0.2075, "step": 26962 }, { "epoch": 0.04780940070692805, "grad_norm": 1.4921875, "learning_rate": 0.0016882227074538, "loss": 0.314, "step": 26964 }, { "epoch": 0.04781294687223787, "grad_norm": 0.61328125, "learning_rate": 0.0016881776590762, "loss": 0.1763, "step": 26966 }, { "epoch": 0.04781649303754768, "grad_norm": 0.59765625, "learning_rate": 0.001688132608126253, "loss": 0.1953, "step": 26968 }, { "epoch": 0.0478200392028575, "grad_norm": 1.09375, "learning_rate": 0.0016880875546041551, "loss": 0.1731, "step": 26970 }, { "epoch": 0.04782358536816732, "grad_norm": 0.91796875, "learning_rate": 0.0016880424985101043, "loss": 0.2987, "step": 26972 }, { "epoch": 0.04782713153347713, "grad_norm": 0.640625, "learning_rate": 0.0016879974398442965, "loss": 0.2192, "step": 26974 }, { "epoch": 0.04783067769878695, "grad_norm": 0.404296875, "learning_rate": 0.0016879523786069303, "loss": 0.2007, "step": 26976 }, { "epoch": 0.04783422386409676, "grad_norm": 0.66796875, "learning_rate": 0.0016879073147982014, "loss": 0.2356, "step": 26978 }, { "epoch": 0.047837770029406576, "grad_norm": 0.2734375, "learning_rate": 0.0016878622484183075, "loss": 0.1508, "step": 26980 }, { "epoch": 0.04784131619471639, "grad_norm": 0.68359375, "learning_rate": 0.0016878171794674453, "loss": 0.1392, "step": 26982 }, { "epoch": 0.047844862360026205, "grad_norm": 0.369140625, "learning_rate": 0.0016877721079458128, "loss": 0.1602, "step": 26984 }, { "epoch": 0.04784840852533602, "grad_norm": 1.875, "learning_rate": 0.001687727033853606, "loss": 0.4699, "step": 26986 }, { "epoch": 0.047851954690645834, "grad_norm": 1.828125, "learning_rate": 0.0016876819571910227, "loss": 0.2239, "step": 26988 }, { "epoch": 0.04785550085595565, "grad_norm": 0.328125, "learning_rate": 0.00168763687795826, "loss": 0.2329, "step": 26990 }, { "epoch": 0.04785904702126546, "grad_norm": 0.34375, "learning_rate": 0.001687591796155515, "loss": 0.1856, "step": 26992 }, { "epoch": 0.047862593186575285, "grad_norm": 0.26171875, "learning_rate": 0.0016875467117829846, "loss": 0.1636, "step": 26994 }, { "epoch": 0.0478661393518851, "grad_norm": 0.71875, "learning_rate": 0.0016875016248408662, "loss": 0.3355, "step": 26996 }, { "epoch": 0.047869685517194914, "grad_norm": 2.671875, "learning_rate": 0.0016874565353293572, "loss": 0.332, "step": 26998 }, { "epoch": 0.04787323168250473, "grad_norm": 0.46484375, "learning_rate": 0.0016874114432486543, "loss": 0.2015, "step": 27000 }, { "epoch": 0.04787677784781454, "grad_norm": 0.5546875, "learning_rate": 0.0016873663485989549, "loss": 0.1933, "step": 27002 }, { "epoch": 0.04788032401312436, "grad_norm": 0.7421875, "learning_rate": 0.0016873212513804563, "loss": 0.2273, "step": 27004 }, { "epoch": 0.04788387017843417, "grad_norm": 0.294921875, "learning_rate": 0.0016872761515933558, "loss": 0.3624, "step": 27006 }, { "epoch": 0.04788741634374399, "grad_norm": 1.2109375, "learning_rate": 0.0016872310492378507, "loss": 0.2118, "step": 27008 }, { "epoch": 0.0478909625090538, "grad_norm": 1.6015625, "learning_rate": 0.001687185944314138, "loss": 0.1949, "step": 27010 }, { "epoch": 0.047894508674363616, "grad_norm": 0.55859375, "learning_rate": 0.0016871408368224151, "loss": 0.3822, "step": 27012 }, { "epoch": 0.04789805483967343, "grad_norm": 1.0078125, "learning_rate": 0.0016870957267628788, "loss": 0.2411, "step": 27014 }, { "epoch": 0.04790160100498325, "grad_norm": 0.423828125, "learning_rate": 0.0016870506141357273, "loss": 0.1851, "step": 27016 }, { "epoch": 0.047905147170293066, "grad_norm": 0.82421875, "learning_rate": 0.0016870054989411572, "loss": 0.1851, "step": 27018 }, { "epoch": 0.04790869333560288, "grad_norm": 0.40625, "learning_rate": 0.0016869603811793662, "loss": 0.1984, "step": 27020 }, { "epoch": 0.047912239500912696, "grad_norm": 0.765625, "learning_rate": 0.0016869152608505512, "loss": 0.1396, "step": 27022 }, { "epoch": 0.04791578566622251, "grad_norm": 0.61328125, "learning_rate": 0.0016868701379549097, "loss": 0.1765, "step": 27024 }, { "epoch": 0.047919331831532325, "grad_norm": 0.482421875, "learning_rate": 0.0016868250124926392, "loss": 0.2057, "step": 27026 }, { "epoch": 0.04792287799684214, "grad_norm": 0.478515625, "learning_rate": 0.0016867798844639372, "loss": 0.2135, "step": 27028 }, { "epoch": 0.047926424162151954, "grad_norm": 2.984375, "learning_rate": 0.0016867347538690005, "loss": 0.2056, "step": 27030 }, { "epoch": 0.04792997032746177, "grad_norm": 0.439453125, "learning_rate": 0.0016866896207080272, "loss": 0.196, "step": 27032 }, { "epoch": 0.04793351649277158, "grad_norm": 0.70703125, "learning_rate": 0.001686644484981214, "loss": 0.2462, "step": 27034 }, { "epoch": 0.0479370626580814, "grad_norm": 0.447265625, "learning_rate": 0.0016865993466887586, "loss": 0.2099, "step": 27036 }, { "epoch": 0.04794060882339122, "grad_norm": 0.484375, "learning_rate": 0.0016865542058308585, "loss": 0.3989, "step": 27038 }, { "epoch": 0.047944154988701033, "grad_norm": 0.69921875, "learning_rate": 0.0016865090624077108, "loss": 0.1884, "step": 27040 }, { "epoch": 0.04794770115401085, "grad_norm": 0.439453125, "learning_rate": 0.0016864639164195134, "loss": 0.1829, "step": 27042 }, { "epoch": 0.04795124731932066, "grad_norm": 1.25, "learning_rate": 0.0016864187678664634, "loss": 0.2385, "step": 27044 }, { "epoch": 0.04795479348463048, "grad_norm": 0.333984375, "learning_rate": 0.0016863736167487585, "loss": 0.1863, "step": 27046 }, { "epoch": 0.04795833964994029, "grad_norm": 0.5078125, "learning_rate": 0.0016863284630665959, "loss": 0.1735, "step": 27048 }, { "epoch": 0.047961885815250106, "grad_norm": 1.015625, "learning_rate": 0.0016862833068201728, "loss": 0.1854, "step": 27050 }, { "epoch": 0.04796543198055992, "grad_norm": 0.5859375, "learning_rate": 0.0016862381480096872, "loss": 0.1997, "step": 27052 }, { "epoch": 0.047968978145869735, "grad_norm": 0.28515625, "learning_rate": 0.001686192986635337, "loss": 0.182, "step": 27054 }, { "epoch": 0.04797252431117955, "grad_norm": 0.263671875, "learning_rate": 0.001686147822697319, "loss": 0.1874, "step": 27056 }, { "epoch": 0.047976070476489364, "grad_norm": 0.326171875, "learning_rate": 0.0016861026561958306, "loss": 0.2006, "step": 27058 }, { "epoch": 0.04797961664179918, "grad_norm": 0.25, "learning_rate": 0.0016860574871310696, "loss": 0.2099, "step": 27060 }, { "epoch": 0.047983162807109, "grad_norm": 0.5, "learning_rate": 0.001686012315503234, "loss": 0.1624, "step": 27062 }, { "epoch": 0.047986708972418815, "grad_norm": 0.412109375, "learning_rate": 0.0016859671413125205, "loss": 0.2451, "step": 27064 }, { "epoch": 0.04799025513772863, "grad_norm": 0.369140625, "learning_rate": 0.0016859219645591272, "loss": 0.2396, "step": 27066 }, { "epoch": 0.047993801303038444, "grad_norm": 0.271484375, "learning_rate": 0.0016858767852432517, "loss": 0.203, "step": 27068 }, { "epoch": 0.04799734746834826, "grad_norm": 1.9765625, "learning_rate": 0.0016858316033650917, "loss": 0.2157, "step": 27070 }, { "epoch": 0.04800089363365807, "grad_norm": 0.69140625, "learning_rate": 0.001685786418924844, "loss": 0.1947, "step": 27072 }, { "epoch": 0.04800443979896789, "grad_norm": 0.76953125, "learning_rate": 0.001685741231922707, "loss": 0.2488, "step": 27074 }, { "epoch": 0.0480079859642777, "grad_norm": 2.109375, "learning_rate": 0.001685696042358878, "loss": 0.3698, "step": 27076 }, { "epoch": 0.04801153212958752, "grad_norm": 0.44921875, "learning_rate": 0.0016856508502335551, "loss": 0.1618, "step": 27078 }, { "epoch": 0.04801507829489733, "grad_norm": 0.53515625, "learning_rate": 0.001685605655546935, "loss": 0.2105, "step": 27080 }, { "epoch": 0.048018624460207146, "grad_norm": 1.578125, "learning_rate": 0.0016855604582992165, "loss": 0.2491, "step": 27082 }, { "epoch": 0.04802217062551697, "grad_norm": 1.0625, "learning_rate": 0.0016855152584905962, "loss": 0.4457, "step": 27084 }, { "epoch": 0.04802571679082678, "grad_norm": 0.64453125, "learning_rate": 0.0016854700561212727, "loss": 0.278, "step": 27086 }, { "epoch": 0.0480292629561366, "grad_norm": 0.357421875, "learning_rate": 0.0016854248511914429, "loss": 0.2004, "step": 27088 }, { "epoch": 0.04803280912144641, "grad_norm": 1.421875, "learning_rate": 0.0016853796437013052, "loss": 0.26, "step": 27090 }, { "epoch": 0.048036355286756226, "grad_norm": 1.796875, "learning_rate": 0.0016853344336510568, "loss": 0.3279, "step": 27092 }, { "epoch": 0.04803990145206604, "grad_norm": 0.41015625, "learning_rate": 0.0016852892210408954, "loss": 0.2015, "step": 27094 }, { "epoch": 0.048043447617375855, "grad_norm": 0.6015625, "learning_rate": 0.0016852440058710192, "loss": 0.1767, "step": 27096 }, { "epoch": 0.04804699378268567, "grad_norm": 0.408203125, "learning_rate": 0.0016851987881416255, "loss": 0.2141, "step": 27098 }, { "epoch": 0.048050539947995484, "grad_norm": 0.2255859375, "learning_rate": 0.001685153567852912, "loss": 0.2427, "step": 27100 }, { "epoch": 0.0480540861133053, "grad_norm": 0.53515625, "learning_rate": 0.0016851083450050773, "loss": 0.2414, "step": 27102 }, { "epoch": 0.04805763227861511, "grad_norm": 0.271484375, "learning_rate": 0.001685063119598318, "loss": 0.2359, "step": 27104 }, { "epoch": 0.048061178443924935, "grad_norm": 0.65234375, "learning_rate": 0.001685017891632833, "loss": 0.176, "step": 27106 }, { "epoch": 0.04806472460923475, "grad_norm": 0.609375, "learning_rate": 0.001684972661108819, "loss": 0.1763, "step": 27108 }, { "epoch": 0.048068270774544564, "grad_norm": 0.40625, "learning_rate": 0.0016849274280264748, "loss": 0.2518, "step": 27110 }, { "epoch": 0.04807181693985438, "grad_norm": 1.375, "learning_rate": 0.0016848821923859975, "loss": 0.2934, "step": 27112 }, { "epoch": 0.04807536310516419, "grad_norm": 0.4140625, "learning_rate": 0.0016848369541875854, "loss": 0.189, "step": 27114 }, { "epoch": 0.04807890927047401, "grad_norm": 0.419921875, "learning_rate": 0.001684791713431436, "loss": 0.2169, "step": 27116 }, { "epoch": 0.04808245543578382, "grad_norm": 0.2314453125, "learning_rate": 0.0016847464701177475, "loss": 0.3293, "step": 27118 }, { "epoch": 0.048086001601093636, "grad_norm": 0.51953125, "learning_rate": 0.0016847012242467176, "loss": 0.1872, "step": 27120 }, { "epoch": 0.04808954776640345, "grad_norm": 0.494140625, "learning_rate": 0.0016846559758185442, "loss": 0.1716, "step": 27122 }, { "epoch": 0.048093093931713266, "grad_norm": 0.32421875, "learning_rate": 0.001684610724833425, "loss": 0.1923, "step": 27124 }, { "epoch": 0.04809664009702308, "grad_norm": 0.3515625, "learning_rate": 0.0016845654712915581, "loss": 0.1978, "step": 27126 }, { "epoch": 0.048100186262332895, "grad_norm": 0.609375, "learning_rate": 0.0016845202151931418, "loss": 0.1957, "step": 27128 }, { "epoch": 0.048103732427642716, "grad_norm": 1.875, "learning_rate": 0.001684474956538373, "loss": 0.2163, "step": 27130 }, { "epoch": 0.04810727859295253, "grad_norm": 0.50390625, "learning_rate": 0.0016844296953274507, "loss": 0.17, "step": 27132 }, { "epoch": 0.048110824758262345, "grad_norm": 0.416015625, "learning_rate": 0.0016843844315605721, "loss": 0.2131, "step": 27134 }, { "epoch": 0.04811437092357216, "grad_norm": 1.171875, "learning_rate": 0.001684339165237936, "loss": 0.2205, "step": 27136 }, { "epoch": 0.048117917088881974, "grad_norm": 0.255859375, "learning_rate": 0.001684293896359739, "loss": 0.2053, "step": 27138 }, { "epoch": 0.04812146325419179, "grad_norm": 0.18359375, "learning_rate": 0.0016842486249261802, "loss": 0.1666, "step": 27140 }, { "epoch": 0.0481250094195016, "grad_norm": 0.259765625, "learning_rate": 0.0016842033509374575, "loss": 0.1818, "step": 27142 }, { "epoch": 0.04812855558481142, "grad_norm": 3.890625, "learning_rate": 0.0016841580743937685, "loss": 0.3159, "step": 27144 }, { "epoch": 0.04813210175012123, "grad_norm": 0.5546875, "learning_rate": 0.0016841127952953114, "loss": 0.3616, "step": 27146 }, { "epoch": 0.04813564791543105, "grad_norm": 0.51171875, "learning_rate": 0.0016840675136422843, "loss": 0.1813, "step": 27148 }, { "epoch": 0.04813919408074086, "grad_norm": 0.515625, "learning_rate": 0.001684022229434885, "loss": 0.1584, "step": 27150 }, { "epoch": 0.04814274024605068, "grad_norm": 0.265625, "learning_rate": 0.001683976942673312, "loss": 0.1954, "step": 27152 }, { "epoch": 0.0481462864113605, "grad_norm": 0.4140625, "learning_rate": 0.001683931653357763, "loss": 0.1794, "step": 27154 }, { "epoch": 0.04814983257667031, "grad_norm": 0.14453125, "learning_rate": 0.001683886361488436, "loss": 0.1702, "step": 27156 }, { "epoch": 0.04815337874198013, "grad_norm": 0.36328125, "learning_rate": 0.0016838410670655293, "loss": 0.2468, "step": 27158 }, { "epoch": 0.04815692490728994, "grad_norm": 0.53515625, "learning_rate": 0.0016837957700892407, "loss": 0.1892, "step": 27160 }, { "epoch": 0.048160471072599756, "grad_norm": 0.25390625, "learning_rate": 0.0016837504705597688, "loss": 0.1769, "step": 27162 }, { "epoch": 0.04816401723790957, "grad_norm": 0.53125, "learning_rate": 0.0016837051684773113, "loss": 0.2333, "step": 27164 }, { "epoch": 0.048167563403219385, "grad_norm": 0.224609375, "learning_rate": 0.0016836598638420666, "loss": 0.1821, "step": 27166 }, { "epoch": 0.0481711095685292, "grad_norm": 0.66015625, "learning_rate": 0.0016836145566542324, "loss": 0.177, "step": 27168 }, { "epoch": 0.048174655733839014, "grad_norm": 0.37890625, "learning_rate": 0.0016835692469140075, "loss": 0.2508, "step": 27170 }, { "epoch": 0.04817820189914883, "grad_norm": 0.69921875, "learning_rate": 0.0016835239346215892, "loss": 0.1475, "step": 27172 }, { "epoch": 0.04818174806445865, "grad_norm": 0.2431640625, "learning_rate": 0.0016834786197771764, "loss": 0.14, "step": 27174 }, { "epoch": 0.048185294229768465, "grad_norm": 0.6875, "learning_rate": 0.001683433302380967, "loss": 0.2758, "step": 27176 }, { "epoch": 0.04818884039507828, "grad_norm": 0.84375, "learning_rate": 0.0016833879824331595, "loss": 0.3031, "step": 27178 }, { "epoch": 0.048192386560388094, "grad_norm": 1.09375, "learning_rate": 0.0016833426599339519, "loss": 0.2239, "step": 27180 }, { "epoch": 0.04819593272569791, "grad_norm": 0.244140625, "learning_rate": 0.0016832973348835417, "loss": 0.2178, "step": 27182 }, { "epoch": 0.04819947889100772, "grad_norm": 0.5234375, "learning_rate": 0.0016832520072821283, "loss": 0.1862, "step": 27184 }, { "epoch": 0.04820302505631754, "grad_norm": 0.494140625, "learning_rate": 0.0016832066771299092, "loss": 0.2372, "step": 27186 }, { "epoch": 0.04820657122162735, "grad_norm": 0.55078125, "learning_rate": 0.0016831613444270833, "loss": 0.169, "step": 27188 }, { "epoch": 0.04821011738693717, "grad_norm": 0.5703125, "learning_rate": 0.0016831160091738478, "loss": 0.1817, "step": 27190 }, { "epoch": 0.04821366355224698, "grad_norm": 0.609375, "learning_rate": 0.0016830706713704018, "loss": 0.1706, "step": 27192 }, { "epoch": 0.048217209717556796, "grad_norm": 0.55859375, "learning_rate": 0.0016830253310169433, "loss": 0.1905, "step": 27194 }, { "epoch": 0.04822075588286661, "grad_norm": 0.40234375, "learning_rate": 0.001682979988113671, "loss": 0.1929, "step": 27196 }, { "epoch": 0.04822430204817643, "grad_norm": 1.5625, "learning_rate": 0.0016829346426607825, "loss": 0.2519, "step": 27198 }, { "epoch": 0.048227848213486246, "grad_norm": 1.625, "learning_rate": 0.0016828892946584768, "loss": 0.2396, "step": 27200 }, { "epoch": 0.04823139437879606, "grad_norm": 0.341796875, "learning_rate": 0.0016828439441069515, "loss": 0.2009, "step": 27202 }, { "epoch": 0.048234940544105875, "grad_norm": 0.1796875, "learning_rate": 0.0016827985910064056, "loss": 0.188, "step": 27204 }, { "epoch": 0.04823848670941569, "grad_norm": 2.515625, "learning_rate": 0.0016827532353570374, "loss": 0.3611, "step": 27206 }, { "epoch": 0.048242032874725504, "grad_norm": 0.474609375, "learning_rate": 0.0016827078771590447, "loss": 0.3856, "step": 27208 }, { "epoch": 0.04824557904003532, "grad_norm": 0.490234375, "learning_rate": 0.0016826625164126266, "loss": 0.3042, "step": 27210 }, { "epoch": 0.048249125205345134, "grad_norm": 1.1796875, "learning_rate": 0.0016826171531179805, "loss": 0.1793, "step": 27212 }, { "epoch": 0.04825267137065495, "grad_norm": 0.66015625, "learning_rate": 0.0016825717872753058, "loss": 0.1771, "step": 27214 }, { "epoch": 0.04825621753596476, "grad_norm": 0.384765625, "learning_rate": 0.0016825264188848007, "loss": 0.2396, "step": 27216 }, { "epoch": 0.04825976370127458, "grad_norm": 0.515625, "learning_rate": 0.001682481047946663, "loss": 0.2228, "step": 27218 }, { "epoch": 0.0482633098665844, "grad_norm": 0.41796875, "learning_rate": 0.0016824356744610917, "loss": 0.2015, "step": 27220 }, { "epoch": 0.04826685603189421, "grad_norm": 0.333984375, "learning_rate": 0.0016823902984282853, "loss": 0.2322, "step": 27222 }, { "epoch": 0.04827040219720403, "grad_norm": 0.82421875, "learning_rate": 0.0016823449198484417, "loss": 0.1882, "step": 27224 }, { "epoch": 0.04827394836251384, "grad_norm": 0.41015625, "learning_rate": 0.0016822995387217599, "loss": 0.2796, "step": 27226 }, { "epoch": 0.04827749452782366, "grad_norm": 0.9453125, "learning_rate": 0.001682254155048438, "loss": 0.1583, "step": 27228 }, { "epoch": 0.04828104069313347, "grad_norm": 0.4921875, "learning_rate": 0.0016822087688286747, "loss": 0.1893, "step": 27230 }, { "epoch": 0.048284586858443286, "grad_norm": 0.9921875, "learning_rate": 0.0016821633800626686, "loss": 0.2557, "step": 27232 }, { "epoch": 0.0482881330237531, "grad_norm": 0.3359375, "learning_rate": 0.0016821179887506177, "loss": 0.1586, "step": 27234 }, { "epoch": 0.048291679189062915, "grad_norm": 1.828125, "learning_rate": 0.0016820725948927212, "loss": 0.2888, "step": 27236 }, { "epoch": 0.04829522535437273, "grad_norm": 0.4921875, "learning_rate": 0.001682027198489177, "loss": 0.247, "step": 27238 }, { "epoch": 0.048298771519682544, "grad_norm": 0.94140625, "learning_rate": 0.0016819817995401838, "loss": 0.2127, "step": 27240 }, { "epoch": 0.048302317684992366, "grad_norm": 1.0546875, "learning_rate": 0.0016819363980459406, "loss": 0.2308, "step": 27242 }, { "epoch": 0.04830586385030218, "grad_norm": 0.2109375, "learning_rate": 0.0016818909940066456, "loss": 0.2207, "step": 27244 }, { "epoch": 0.048309410015611995, "grad_norm": 0.33984375, "learning_rate": 0.0016818455874224973, "loss": 0.2246, "step": 27246 }, { "epoch": 0.04831295618092181, "grad_norm": 0.4921875, "learning_rate": 0.0016818001782936945, "loss": 0.2301, "step": 27248 }, { "epoch": 0.048316502346231624, "grad_norm": 0.76171875, "learning_rate": 0.0016817547666204355, "loss": 0.258, "step": 27250 }, { "epoch": 0.04832004851154144, "grad_norm": 1.484375, "learning_rate": 0.001681709352402919, "loss": 0.232, "step": 27252 }, { "epoch": 0.04832359467685125, "grad_norm": 0.298828125, "learning_rate": 0.0016816639356413438, "loss": 0.2077, "step": 27254 }, { "epoch": 0.04832714084216107, "grad_norm": 0.5, "learning_rate": 0.0016816185163359084, "loss": 0.2552, "step": 27256 }, { "epoch": 0.04833068700747088, "grad_norm": 0.3515625, "learning_rate": 0.0016815730944868116, "loss": 0.1645, "step": 27258 }, { "epoch": 0.0483342331727807, "grad_norm": 0.5703125, "learning_rate": 0.0016815276700942517, "loss": 0.261, "step": 27260 }, { "epoch": 0.04833777933809051, "grad_norm": 0.330078125, "learning_rate": 0.0016814822431584276, "loss": 0.1779, "step": 27262 }, { "epoch": 0.048341325503400326, "grad_norm": 1.046875, "learning_rate": 0.001681436813679538, "loss": 0.1821, "step": 27264 }, { "epoch": 0.04834487166871015, "grad_norm": 0.427734375, "learning_rate": 0.0016813913816577817, "loss": 0.1995, "step": 27266 }, { "epoch": 0.04834841783401996, "grad_norm": 0.435546875, "learning_rate": 0.0016813459470933568, "loss": 0.2083, "step": 27268 }, { "epoch": 0.048351963999329776, "grad_norm": 1.140625, "learning_rate": 0.0016813005099864627, "loss": 0.3625, "step": 27270 }, { "epoch": 0.04835551016463959, "grad_norm": 0.412109375, "learning_rate": 0.0016812550703372978, "loss": 0.1887, "step": 27272 }, { "epoch": 0.048359056329949406, "grad_norm": 0.96484375, "learning_rate": 0.0016812096281460607, "loss": 0.1821, "step": 27274 }, { "epoch": 0.04836260249525922, "grad_norm": 0.37890625, "learning_rate": 0.0016811641834129506, "loss": 0.1943, "step": 27276 }, { "epoch": 0.048366148660569035, "grad_norm": 1.125, "learning_rate": 0.0016811187361381655, "loss": 0.272, "step": 27278 }, { "epoch": 0.04836969482587885, "grad_norm": 1.6796875, "learning_rate": 0.0016810732863219047, "loss": 0.3667, "step": 27280 }, { "epoch": 0.048373240991188664, "grad_norm": 0.404296875, "learning_rate": 0.0016810278339643673, "loss": 0.203, "step": 27282 }, { "epoch": 0.04837678715649848, "grad_norm": 0.78125, "learning_rate": 0.0016809823790657511, "loss": 0.5751, "step": 27284 }, { "epoch": 0.04838033332180829, "grad_norm": 0.37890625, "learning_rate": 0.0016809369216262558, "loss": 0.2276, "step": 27286 }, { "epoch": 0.048383879487118114, "grad_norm": 1.34375, "learning_rate": 0.0016808914616460798, "loss": 0.2584, "step": 27288 }, { "epoch": 0.04838742565242793, "grad_norm": 0.353515625, "learning_rate": 0.001680845999125422, "loss": 0.3475, "step": 27290 }, { "epoch": 0.048390971817737743, "grad_norm": 0.203125, "learning_rate": 0.0016808005340644809, "loss": 0.1675, "step": 27292 }, { "epoch": 0.04839451798304756, "grad_norm": 0.455078125, "learning_rate": 0.0016807550664634562, "loss": 0.187, "step": 27294 }, { "epoch": 0.04839806414835737, "grad_norm": 0.390625, "learning_rate": 0.0016807095963225457, "loss": 0.1954, "step": 27296 }, { "epoch": 0.04840161031366719, "grad_norm": 10.5, "learning_rate": 0.001680664123641949, "loss": 0.2485, "step": 27298 }, { "epoch": 0.048405156478977, "grad_norm": 0.890625, "learning_rate": 0.0016806186484218646, "loss": 0.2187, "step": 27300 }, { "epoch": 0.048408702644286816, "grad_norm": 0.31640625, "learning_rate": 0.0016805731706624914, "loss": 0.1903, "step": 27302 }, { "epoch": 0.04841224880959663, "grad_norm": 0.298828125, "learning_rate": 0.0016805276903640285, "loss": 0.2631, "step": 27304 }, { "epoch": 0.048415794974906445, "grad_norm": 0.494140625, "learning_rate": 0.0016804822075266745, "loss": 0.178, "step": 27306 }, { "epoch": 0.04841934114021626, "grad_norm": 0.50390625, "learning_rate": 0.001680436722150629, "loss": 0.2145, "step": 27308 }, { "epoch": 0.04842288730552608, "grad_norm": 1.390625, "learning_rate": 0.0016803912342360898, "loss": 0.2795, "step": 27310 }, { "epoch": 0.048426433470835896, "grad_norm": 2.34375, "learning_rate": 0.001680345743783257, "loss": 0.453, "step": 27312 }, { "epoch": 0.04842997963614571, "grad_norm": 0.6875, "learning_rate": 0.0016803002507923288, "loss": 0.213, "step": 27314 }, { "epoch": 0.048433525801455525, "grad_norm": 0.357421875, "learning_rate": 0.0016802547552635042, "loss": 0.3996, "step": 27316 }, { "epoch": 0.04843707196676534, "grad_norm": 2.09375, "learning_rate": 0.0016802092571969827, "loss": 0.2752, "step": 27318 }, { "epoch": 0.048440618132075154, "grad_norm": 0.7578125, "learning_rate": 0.001680163756592963, "loss": 0.2484, "step": 27320 }, { "epoch": 0.04844416429738497, "grad_norm": 0.875, "learning_rate": 0.0016801182534516438, "loss": 0.1838, "step": 27322 }, { "epoch": 0.04844771046269478, "grad_norm": 0.40234375, "learning_rate": 0.0016800727477732243, "loss": 0.4011, "step": 27324 }, { "epoch": 0.0484512566280046, "grad_norm": 0.28515625, "learning_rate": 0.0016800272395579034, "loss": 0.2467, "step": 27326 }, { "epoch": 0.04845480279331441, "grad_norm": 0.357421875, "learning_rate": 0.0016799817288058808, "loss": 0.2006, "step": 27328 }, { "epoch": 0.04845834895862423, "grad_norm": 4.90625, "learning_rate": 0.0016799362155173543, "loss": 0.3769, "step": 27330 }, { "epoch": 0.04846189512393404, "grad_norm": 0.408203125, "learning_rate": 0.001679890699692524, "loss": 0.1401, "step": 27332 }, { "epoch": 0.04846544128924386, "grad_norm": 14.875, "learning_rate": 0.0016798451813315888, "loss": 0.3457, "step": 27334 }, { "epoch": 0.04846898745455368, "grad_norm": 0.70703125, "learning_rate": 0.0016797996604347477, "loss": 0.2404, "step": 27336 }, { "epoch": 0.04847253361986349, "grad_norm": 0.7734375, "learning_rate": 0.0016797541370021992, "loss": 0.1658, "step": 27338 }, { "epoch": 0.04847607978517331, "grad_norm": 3.375, "learning_rate": 0.001679708611034143, "loss": 0.3007, "step": 27340 }, { "epoch": 0.04847962595048312, "grad_norm": 0.3671875, "learning_rate": 0.001679663082530778, "loss": 0.1615, "step": 27342 }, { "epoch": 0.048483172115792936, "grad_norm": 0.375, "learning_rate": 0.0016796175514923039, "loss": 0.2092, "step": 27344 }, { "epoch": 0.04848671828110275, "grad_norm": 0.77734375, "learning_rate": 0.0016795720179189186, "loss": 0.1919, "step": 27346 }, { "epoch": 0.048490264446412565, "grad_norm": 0.32421875, "learning_rate": 0.0016795264818108226, "loss": 0.1425, "step": 27348 }, { "epoch": 0.04849381061172238, "grad_norm": 0.3515625, "learning_rate": 0.001679480943168214, "loss": 0.5534, "step": 27350 }, { "epoch": 0.048497356777032194, "grad_norm": 0.88671875, "learning_rate": 0.0016794354019912924, "loss": 0.2148, "step": 27352 }, { "epoch": 0.04850090294234201, "grad_norm": 3.078125, "learning_rate": 0.001679389858280257, "loss": 0.2641, "step": 27354 }, { "epoch": 0.04850444910765183, "grad_norm": 0.65625, "learning_rate": 0.001679344312035307, "loss": 0.182, "step": 27356 }, { "epoch": 0.048507995272961645, "grad_norm": 0.5078125, "learning_rate": 0.0016792987632566411, "loss": 0.2607, "step": 27358 }, { "epoch": 0.04851154143827146, "grad_norm": 0.408203125, "learning_rate": 0.0016792532119444595, "loss": 0.1941, "step": 27360 }, { "epoch": 0.048515087603581274, "grad_norm": 0.2470703125, "learning_rate": 0.0016792076580989606, "loss": 0.1526, "step": 27362 }, { "epoch": 0.04851863376889109, "grad_norm": 0.7109375, "learning_rate": 0.0016791621017203438, "loss": 0.2434, "step": 27364 }, { "epoch": 0.0485221799342009, "grad_norm": 0.447265625, "learning_rate": 0.0016791165428088081, "loss": 0.2025, "step": 27366 }, { "epoch": 0.04852572609951072, "grad_norm": 0.37890625, "learning_rate": 0.0016790709813645536, "loss": 0.2147, "step": 27368 }, { "epoch": 0.04852927226482053, "grad_norm": 0.53515625, "learning_rate": 0.0016790254173877786, "loss": 0.2113, "step": 27370 }, { "epoch": 0.048532818430130346, "grad_norm": 0.51171875, "learning_rate": 0.001678979850878683, "loss": 0.225, "step": 27372 }, { "epoch": 0.04853636459544016, "grad_norm": 0.376953125, "learning_rate": 0.0016789342818374655, "loss": 0.2878, "step": 27374 }, { "epoch": 0.048539910760749976, "grad_norm": 0.5078125, "learning_rate": 0.0016788887102643258, "loss": 0.1826, "step": 27376 }, { "epoch": 0.0485434569260598, "grad_norm": 0.490234375, "learning_rate": 0.0016788431361594634, "loss": 0.2061, "step": 27378 }, { "epoch": 0.04854700309136961, "grad_norm": 0.470703125, "learning_rate": 0.001678797559523077, "loss": 0.1844, "step": 27380 }, { "epoch": 0.048550549256679426, "grad_norm": 1.0703125, "learning_rate": 0.0016787519803553666, "loss": 0.2227, "step": 27382 }, { "epoch": 0.04855409542198924, "grad_norm": 4.8125, "learning_rate": 0.0016787063986565313, "loss": 0.194, "step": 27384 }, { "epoch": 0.048557641587299055, "grad_norm": 0.375, "learning_rate": 0.0016786608144267702, "loss": 0.2409, "step": 27386 }, { "epoch": 0.04856118775260887, "grad_norm": 0.37890625, "learning_rate": 0.0016786152276662828, "loss": 0.2078, "step": 27388 }, { "epoch": 0.048564733917918684, "grad_norm": 0.59765625, "learning_rate": 0.0016785696383752684, "loss": 0.2104, "step": 27390 }, { "epoch": 0.0485682800832285, "grad_norm": 0.23046875, "learning_rate": 0.0016785240465539268, "loss": 0.1736, "step": 27392 }, { "epoch": 0.04857182624853831, "grad_norm": 0.26953125, "learning_rate": 0.0016784784522024569, "loss": 0.1716, "step": 27394 }, { "epoch": 0.04857537241384813, "grad_norm": 0.953125, "learning_rate": 0.001678432855321058, "loss": 0.1756, "step": 27396 }, { "epoch": 0.04857891857915794, "grad_norm": 2.5625, "learning_rate": 0.00167838725590993, "loss": 0.2079, "step": 27398 }, { "epoch": 0.04858246474446776, "grad_norm": 0.298828125, "learning_rate": 0.001678341653969272, "loss": 0.1601, "step": 27400 }, { "epoch": 0.04858601090977758, "grad_norm": 0.96484375, "learning_rate": 0.001678296049499284, "loss": 0.1651, "step": 27402 }, { "epoch": 0.04858955707508739, "grad_norm": 0.546875, "learning_rate": 0.0016782504425001642, "loss": 0.207, "step": 27404 }, { "epoch": 0.04859310324039721, "grad_norm": 0.76953125, "learning_rate": 0.0016782048329721136, "loss": 0.1949, "step": 27406 }, { "epoch": 0.04859664940570702, "grad_norm": 0.337890625, "learning_rate": 0.0016781592209153303, "loss": 0.4266, "step": 27408 }, { "epoch": 0.04860019557101684, "grad_norm": 2.109375, "learning_rate": 0.001678113606330015, "loss": 0.2862, "step": 27410 }, { "epoch": 0.04860374173632665, "grad_norm": 2.921875, "learning_rate": 0.0016780679892163663, "loss": 0.2805, "step": 27412 }, { "epoch": 0.048607287901636466, "grad_norm": 1.4375, "learning_rate": 0.001678022369574584, "loss": 0.2626, "step": 27414 }, { "epoch": 0.04861083406694628, "grad_norm": 0.4453125, "learning_rate": 0.0016779767474048675, "loss": 0.2185, "step": 27416 }, { "epoch": 0.048614380232256095, "grad_norm": 0.75, "learning_rate": 0.0016779311227074166, "loss": 0.2095, "step": 27418 }, { "epoch": 0.04861792639756591, "grad_norm": 0.5390625, "learning_rate": 0.0016778854954824307, "loss": 0.2161, "step": 27420 }, { "epoch": 0.048621472562875724, "grad_norm": 0.59375, "learning_rate": 0.0016778398657301095, "loss": 0.2607, "step": 27422 }, { "epoch": 0.048625018728185546, "grad_norm": 0.51171875, "learning_rate": 0.001677794233450652, "loss": 0.1964, "step": 27424 }, { "epoch": 0.04862856489349536, "grad_norm": 0.376953125, "learning_rate": 0.0016777485986442583, "loss": 0.1694, "step": 27426 }, { "epoch": 0.048632111058805175, "grad_norm": 0.26171875, "learning_rate": 0.0016777029613111276, "loss": 0.1694, "step": 27428 }, { "epoch": 0.04863565722411499, "grad_norm": 0.3984375, "learning_rate": 0.00167765732145146, "loss": 0.1756, "step": 27430 }, { "epoch": 0.048639203389424804, "grad_norm": 1.6640625, "learning_rate": 0.0016776116790654546, "loss": 0.2613, "step": 27432 }, { "epoch": 0.04864274955473462, "grad_norm": 0.27734375, "learning_rate": 0.0016775660341533114, "loss": 0.1737, "step": 27434 }, { "epoch": 0.04864629572004443, "grad_norm": 0.275390625, "learning_rate": 0.00167752038671523, "loss": 0.201, "step": 27436 }, { "epoch": 0.04864984188535425, "grad_norm": 0.349609375, "learning_rate": 0.0016774747367514096, "loss": 0.1553, "step": 27438 }, { "epoch": 0.04865338805066406, "grad_norm": 1.15625, "learning_rate": 0.0016774290842620503, "loss": 0.304, "step": 27440 }, { "epoch": 0.04865693421597388, "grad_norm": 0.5859375, "learning_rate": 0.0016773834292473518, "loss": 0.2129, "step": 27442 }, { "epoch": 0.04866048038128369, "grad_norm": 5.84375, "learning_rate": 0.0016773377717075129, "loss": 0.2513, "step": 27444 }, { "epoch": 0.04866402654659351, "grad_norm": 0.302734375, "learning_rate": 0.0016772921116427345, "loss": 0.3658, "step": 27446 }, { "epoch": 0.04866757271190333, "grad_norm": 0.244140625, "learning_rate": 0.0016772464490532157, "loss": 0.1777, "step": 27448 }, { "epoch": 0.04867111887721314, "grad_norm": 0.59765625, "learning_rate": 0.001677200783939156, "loss": 0.216, "step": 27450 }, { "epoch": 0.048674665042522956, "grad_norm": 0.59765625, "learning_rate": 0.0016771551163007555, "loss": 0.1837, "step": 27452 }, { "epoch": 0.04867821120783277, "grad_norm": 1.2109375, "learning_rate": 0.0016771094461382138, "loss": 0.262, "step": 27454 }, { "epoch": 0.048681757373142585, "grad_norm": 0.314453125, "learning_rate": 0.0016770637734517307, "loss": 0.1647, "step": 27456 }, { "epoch": 0.0486853035384524, "grad_norm": 1.0859375, "learning_rate": 0.0016770180982415056, "loss": 0.199, "step": 27458 }, { "epoch": 0.048688849703762214, "grad_norm": 0.63671875, "learning_rate": 0.0016769724205077388, "loss": 0.2328, "step": 27460 }, { "epoch": 0.04869239586907203, "grad_norm": 0.400390625, "learning_rate": 0.0016769267402506295, "loss": 0.2053, "step": 27462 }, { "epoch": 0.048695942034381844, "grad_norm": 0.7578125, "learning_rate": 0.0016768810574703777, "loss": 0.2549, "step": 27464 }, { "epoch": 0.04869948819969166, "grad_norm": 0.80078125, "learning_rate": 0.0016768353721671834, "loss": 0.2234, "step": 27466 }, { "epoch": 0.04870303436500147, "grad_norm": 0.5234375, "learning_rate": 0.0016767896843412464, "loss": 0.146, "step": 27468 }, { "epoch": 0.048706580530311294, "grad_norm": 0.953125, "learning_rate": 0.001676743993992766, "loss": 0.1832, "step": 27470 }, { "epoch": 0.04871012669562111, "grad_norm": 0.44921875, "learning_rate": 0.001676698301121943, "loss": 0.1689, "step": 27472 }, { "epoch": 0.04871367286093092, "grad_norm": 0.375, "learning_rate": 0.001676652605728976, "loss": 0.1697, "step": 27474 }, { "epoch": 0.04871721902624074, "grad_norm": 1.171875, "learning_rate": 0.0016766069078140656, "loss": 0.2778, "step": 27476 }, { "epoch": 0.04872076519155055, "grad_norm": 0.474609375, "learning_rate": 0.0016765612073774115, "loss": 0.1741, "step": 27478 }, { "epoch": 0.04872431135686037, "grad_norm": 0.47265625, "learning_rate": 0.001676515504419214, "loss": 0.362, "step": 27480 }, { "epoch": 0.04872785752217018, "grad_norm": 2.296875, "learning_rate": 0.0016764697989396719, "loss": 0.3004, "step": 27482 }, { "epoch": 0.048731403687479996, "grad_norm": 0.423828125, "learning_rate": 0.0016764240909389863, "loss": 0.2278, "step": 27484 }, { "epoch": 0.04873494985278981, "grad_norm": 0.62109375, "learning_rate": 0.0016763783804173563, "loss": 0.1944, "step": 27486 }, { "epoch": 0.048738496018099625, "grad_norm": 0.9296875, "learning_rate": 0.001676332667374982, "loss": 0.2052, "step": 27488 }, { "epoch": 0.04874204218340944, "grad_norm": 0.59375, "learning_rate": 0.0016762869518120635, "loss": 0.2697, "step": 27490 }, { "epoch": 0.04874558834871926, "grad_norm": 1.0390625, "learning_rate": 0.0016762412337288008, "loss": 0.1912, "step": 27492 }, { "epoch": 0.048749134514029076, "grad_norm": 1.7109375, "learning_rate": 0.0016761955131253936, "loss": 0.217, "step": 27494 }, { "epoch": 0.04875268067933889, "grad_norm": 0.6640625, "learning_rate": 0.0016761497900020417, "loss": 0.2027, "step": 27496 }, { "epoch": 0.048756226844648705, "grad_norm": 0.279296875, "learning_rate": 0.0016761040643589453, "loss": 0.1727, "step": 27498 }, { "epoch": 0.04875977300995852, "grad_norm": 0.3828125, "learning_rate": 0.0016760583361963045, "loss": 0.18, "step": 27500 }, { "epoch": 0.048763319175268334, "grad_norm": 0.59375, "learning_rate": 0.001676012605514319, "loss": 0.2111, "step": 27502 }, { "epoch": 0.04876686534057815, "grad_norm": 0.416015625, "learning_rate": 0.0016759668723131892, "loss": 0.1468, "step": 27504 }, { "epoch": 0.04877041150588796, "grad_norm": 0.380859375, "learning_rate": 0.0016759211365931148, "loss": 0.2135, "step": 27506 }, { "epoch": 0.04877395767119778, "grad_norm": 0.8046875, "learning_rate": 0.0016758753983542956, "loss": 0.1693, "step": 27508 }, { "epoch": 0.04877750383650759, "grad_norm": 1.1171875, "learning_rate": 0.0016758296575969322, "loss": 0.1903, "step": 27510 }, { "epoch": 0.04878105000181741, "grad_norm": 0.263671875, "learning_rate": 0.0016757839143212243, "loss": 0.1966, "step": 27512 }, { "epoch": 0.04878459616712723, "grad_norm": 0.478515625, "learning_rate": 0.0016757381685273721, "loss": 0.2254, "step": 27514 }, { "epoch": 0.04878814233243704, "grad_norm": 0.271484375, "learning_rate": 0.0016756924202155753, "loss": 0.1927, "step": 27516 }, { "epoch": 0.04879168849774686, "grad_norm": 0.66796875, "learning_rate": 0.0016756466693860345, "loss": 0.3193, "step": 27518 }, { "epoch": 0.04879523466305667, "grad_norm": 1.515625, "learning_rate": 0.0016756009160389495, "loss": 0.3272, "step": 27520 }, { "epoch": 0.048798780828366486, "grad_norm": 3.5625, "learning_rate": 0.0016755551601745206, "loss": 0.5131, "step": 27522 }, { "epoch": 0.0488023269936763, "grad_norm": 0.76953125, "learning_rate": 0.0016755094017929476, "loss": 0.2043, "step": 27524 }, { "epoch": 0.048805873158986116, "grad_norm": 1.03125, "learning_rate": 0.0016754636408944305, "loss": 0.2843, "step": 27526 }, { "epoch": 0.04880941932429593, "grad_norm": 0.421875, "learning_rate": 0.00167541787747917, "loss": 0.2117, "step": 27528 }, { "epoch": 0.048812965489605745, "grad_norm": 0.81640625, "learning_rate": 0.0016753721115473656, "loss": 0.2117, "step": 27530 }, { "epoch": 0.04881651165491556, "grad_norm": 0.79296875, "learning_rate": 0.0016753263430992183, "loss": 0.2181, "step": 27532 }, { "epoch": 0.048820057820225374, "grad_norm": 0.328125, "learning_rate": 0.0016752805721349277, "loss": 0.1528, "step": 27534 }, { "epoch": 0.04882360398553519, "grad_norm": 0.421875, "learning_rate": 0.0016752347986546939, "loss": 0.2262, "step": 27536 }, { "epoch": 0.04882715015084501, "grad_norm": 0.216796875, "learning_rate": 0.0016751890226587172, "loss": 0.2413, "step": 27538 }, { "epoch": 0.048830696316154824, "grad_norm": 0.703125, "learning_rate": 0.001675143244147198, "loss": 0.1929, "step": 27540 }, { "epoch": 0.04883424248146464, "grad_norm": 1.1875, "learning_rate": 0.0016750974631203359, "loss": 0.1618, "step": 27542 }, { "epoch": 0.048837788646774453, "grad_norm": 0.28125, "learning_rate": 0.0016750516795783319, "loss": 0.2506, "step": 27544 }, { "epoch": 0.04884133481208427, "grad_norm": 0.5703125, "learning_rate": 0.0016750058935213858, "loss": 0.2427, "step": 27546 }, { "epoch": 0.04884488097739408, "grad_norm": 0.54296875, "learning_rate": 0.0016749601049496979, "loss": 0.1424, "step": 27548 }, { "epoch": 0.0488484271427039, "grad_norm": 0.474609375, "learning_rate": 0.0016749143138634686, "loss": 0.2198, "step": 27550 }, { "epoch": 0.04885197330801371, "grad_norm": 0.27734375, "learning_rate": 0.001674868520262898, "loss": 0.2631, "step": 27552 }, { "epoch": 0.048855519473323526, "grad_norm": 0.466796875, "learning_rate": 0.0016748227241481861, "loss": 0.2152, "step": 27554 }, { "epoch": 0.04885906563863334, "grad_norm": 1.8671875, "learning_rate": 0.001674776925519534, "loss": 0.2257, "step": 27556 }, { "epoch": 0.048862611803943155, "grad_norm": 0.46875, "learning_rate": 0.0016747311243771412, "loss": 0.2459, "step": 27558 }, { "epoch": 0.04886615796925298, "grad_norm": 0.51953125, "learning_rate": 0.0016746853207212085, "loss": 0.341, "step": 27560 }, { "epoch": 0.04886970413456279, "grad_norm": 0.7578125, "learning_rate": 0.001674639514551936, "loss": 0.2167, "step": 27562 }, { "epoch": 0.048873250299872606, "grad_norm": 0.451171875, "learning_rate": 0.0016745937058695237, "loss": 0.2854, "step": 27564 }, { "epoch": 0.04887679646518242, "grad_norm": 0.55078125, "learning_rate": 0.0016745478946741728, "loss": 0.3759, "step": 27566 }, { "epoch": 0.048880342630492235, "grad_norm": 0.859375, "learning_rate": 0.0016745020809660826, "loss": 0.3439, "step": 27568 }, { "epoch": 0.04888388879580205, "grad_norm": 0.298828125, "learning_rate": 0.0016744562647454547, "loss": 0.177, "step": 27570 }, { "epoch": 0.048887434961111864, "grad_norm": 0.67578125, "learning_rate": 0.0016744104460124883, "loss": 0.1629, "step": 27572 }, { "epoch": 0.04889098112642168, "grad_norm": 0.376953125, "learning_rate": 0.0016743646247673843, "loss": 0.2112, "step": 27574 }, { "epoch": 0.04889452729173149, "grad_norm": 0.89453125, "learning_rate": 0.0016743188010103429, "loss": 0.1197, "step": 27576 }, { "epoch": 0.04889807345704131, "grad_norm": 0.90234375, "learning_rate": 0.0016742729747415651, "loss": 0.2486, "step": 27578 }, { "epoch": 0.04890161962235112, "grad_norm": 0.4375, "learning_rate": 0.0016742271459612506, "loss": 0.2216, "step": 27580 }, { "epoch": 0.048905165787660944, "grad_norm": 0.6328125, "learning_rate": 0.0016741813146696001, "loss": 0.2427, "step": 27582 }, { "epoch": 0.04890871195297076, "grad_norm": 0.38671875, "learning_rate": 0.0016741354808668142, "loss": 0.1873, "step": 27584 }, { "epoch": 0.04891225811828057, "grad_norm": 1.8203125, "learning_rate": 0.0016740896445530933, "loss": 0.196, "step": 27586 }, { "epoch": 0.04891580428359039, "grad_norm": 1.125, "learning_rate": 0.0016740438057286375, "loss": 0.2936, "step": 27588 }, { "epoch": 0.0489193504489002, "grad_norm": 0.6328125, "learning_rate": 0.0016739979643936478, "loss": 0.327, "step": 27590 }, { "epoch": 0.04892289661421002, "grad_norm": 0.58984375, "learning_rate": 0.0016739521205483242, "loss": 0.1819, "step": 27592 }, { "epoch": 0.04892644277951983, "grad_norm": 0.5625, "learning_rate": 0.0016739062741928674, "loss": 0.2103, "step": 27594 }, { "epoch": 0.048929988944829646, "grad_norm": 2.1875, "learning_rate": 0.0016738604253274778, "loss": 0.2365, "step": 27596 }, { "epoch": 0.04893353511013946, "grad_norm": 0.52734375, "learning_rate": 0.0016738145739523564, "loss": 0.1964, "step": 27598 }, { "epoch": 0.048937081275449275, "grad_norm": 0.359375, "learning_rate": 0.0016737687200677033, "loss": 0.1642, "step": 27600 }, { "epoch": 0.04894062744075909, "grad_norm": 0.478515625, "learning_rate": 0.001673722863673719, "loss": 0.2654, "step": 27602 }, { "epoch": 0.048944173606068904, "grad_norm": 0.2265625, "learning_rate": 0.001673677004770604, "loss": 0.2291, "step": 27604 }, { "epoch": 0.048947719771378725, "grad_norm": 4.28125, "learning_rate": 0.001673631143358559, "loss": 0.3125, "step": 27606 }, { "epoch": 0.04895126593668854, "grad_norm": 0.75, "learning_rate": 0.0016735852794377852, "loss": 0.216, "step": 27608 }, { "epoch": 0.048954812101998355, "grad_norm": 0.90625, "learning_rate": 0.0016735394130084822, "loss": 0.2221, "step": 27610 }, { "epoch": 0.04895835826730817, "grad_norm": 4.46875, "learning_rate": 0.0016734935440708507, "loss": 0.2369, "step": 27612 }, { "epoch": 0.048961904432617984, "grad_norm": 0.69140625, "learning_rate": 0.0016734476726250917, "loss": 0.1766, "step": 27614 }, { "epoch": 0.0489654505979278, "grad_norm": 0.70703125, "learning_rate": 0.0016734017986714056, "loss": 0.2263, "step": 27616 }, { "epoch": 0.04896899676323761, "grad_norm": 0.369140625, "learning_rate": 0.0016733559222099938, "loss": 0.1922, "step": 27618 }, { "epoch": 0.04897254292854743, "grad_norm": 0.94921875, "learning_rate": 0.0016733100432410554, "loss": 0.159, "step": 27620 }, { "epoch": 0.04897608909385724, "grad_norm": 0.6875, "learning_rate": 0.0016732641617647921, "loss": 0.2057, "step": 27622 }, { "epoch": 0.048979635259167056, "grad_norm": 0.478515625, "learning_rate": 0.0016732182777814042, "loss": 0.1901, "step": 27624 }, { "epoch": 0.04898318142447687, "grad_norm": 0.375, "learning_rate": 0.0016731723912910927, "loss": 0.1933, "step": 27626 }, { "epoch": 0.04898672758978669, "grad_norm": 0.54296875, "learning_rate": 0.0016731265022940586, "loss": 0.253, "step": 27628 }, { "epoch": 0.04899027375509651, "grad_norm": 0.9609375, "learning_rate": 0.0016730806107905013, "loss": 0.2822, "step": 27630 }, { "epoch": 0.04899381992040632, "grad_norm": 0.7890625, "learning_rate": 0.0016730347167806226, "loss": 0.2566, "step": 27632 }, { "epoch": 0.048997366085716136, "grad_norm": 0.478515625, "learning_rate": 0.001672988820264623, "loss": 0.1671, "step": 27634 }, { "epoch": 0.04900091225102595, "grad_norm": 0.18359375, "learning_rate": 0.001672942921242703, "loss": 0.1419, "step": 27636 }, { "epoch": 0.049004458416335765, "grad_norm": 0.515625, "learning_rate": 0.0016728970197150636, "loss": 0.3397, "step": 27638 }, { "epoch": 0.04900800458164558, "grad_norm": 0.48046875, "learning_rate": 0.001672851115681905, "loss": 0.1907, "step": 27640 }, { "epoch": 0.049011550746955394, "grad_norm": 0.7734375, "learning_rate": 0.0016728052091434288, "loss": 0.3122, "step": 27642 }, { "epoch": 0.04901509691226521, "grad_norm": 0.259765625, "learning_rate": 0.0016727593000998354, "loss": 0.148, "step": 27644 }, { "epoch": 0.04901864307757502, "grad_norm": 0.73046875, "learning_rate": 0.0016727133885513252, "loss": 0.1999, "step": 27646 }, { "epoch": 0.04902218924288484, "grad_norm": 0.87890625, "learning_rate": 0.0016726674744980993, "loss": 0.2652, "step": 27648 }, { "epoch": 0.04902573540819466, "grad_norm": 1.03125, "learning_rate": 0.001672621557940359, "loss": 0.2193, "step": 27650 }, { "epoch": 0.049029281573504474, "grad_norm": 0.400390625, "learning_rate": 0.0016725756388783044, "loss": 0.2048, "step": 27652 }, { "epoch": 0.04903282773881429, "grad_norm": 0.240234375, "learning_rate": 0.0016725297173121364, "loss": 0.1569, "step": 27654 }, { "epoch": 0.0490363739041241, "grad_norm": 0.74609375, "learning_rate": 0.001672483793242056, "loss": 0.2729, "step": 27656 }, { "epoch": 0.04903992006943392, "grad_norm": 0.24609375, "learning_rate": 0.001672437866668264, "loss": 0.1925, "step": 27658 }, { "epoch": 0.04904346623474373, "grad_norm": 2.75, "learning_rate": 0.0016723919375909613, "loss": 0.271, "step": 27660 }, { "epoch": 0.04904701240005355, "grad_norm": 0.58984375, "learning_rate": 0.0016723460060103488, "loss": 0.2411, "step": 27662 }, { "epoch": 0.04905055856536336, "grad_norm": 0.39453125, "learning_rate": 0.0016723000719266272, "loss": 0.2002, "step": 27664 }, { "epoch": 0.049054104730673176, "grad_norm": 0.33984375, "learning_rate": 0.0016722541353399976, "loss": 0.2032, "step": 27666 }, { "epoch": 0.04905765089598299, "grad_norm": 0.373046875, "learning_rate": 0.001672208196250661, "loss": 0.1659, "step": 27668 }, { "epoch": 0.049061197061292805, "grad_norm": 0.69921875, "learning_rate": 0.0016721622546588177, "loss": 0.4634, "step": 27670 }, { "epoch": 0.04906474322660262, "grad_norm": 0.57421875, "learning_rate": 0.0016721163105646697, "loss": 0.2322, "step": 27672 }, { "epoch": 0.04906828939191244, "grad_norm": 0.765625, "learning_rate": 0.0016720703639684167, "loss": 0.1721, "step": 27674 }, { "epoch": 0.049071835557222256, "grad_norm": 0.95703125, "learning_rate": 0.0016720244148702606, "loss": 0.2633, "step": 27676 }, { "epoch": 0.04907538172253207, "grad_norm": 0.4375, "learning_rate": 0.0016719784632704018, "loss": 0.1912, "step": 27678 }, { "epoch": 0.049078927887841885, "grad_norm": 0.78125, "learning_rate": 0.0016719325091690417, "loss": 0.1853, "step": 27680 }, { "epoch": 0.0490824740531517, "grad_norm": 0.6953125, "learning_rate": 0.0016718865525663807, "loss": 0.1913, "step": 27682 }, { "epoch": 0.049086020218461514, "grad_norm": 0.65625, "learning_rate": 0.0016718405934626202, "loss": 0.1892, "step": 27684 }, { "epoch": 0.04908956638377133, "grad_norm": 0.34375, "learning_rate": 0.0016717946318579608, "loss": 0.2402, "step": 27686 }, { "epoch": 0.04909311254908114, "grad_norm": 0.6875, "learning_rate": 0.0016717486677526043, "loss": 0.1605, "step": 27688 }, { "epoch": 0.04909665871439096, "grad_norm": 0.376953125, "learning_rate": 0.0016717027011467509, "loss": 0.1727, "step": 27690 }, { "epoch": 0.04910020487970077, "grad_norm": 0.1875, "learning_rate": 0.0016716567320406023, "loss": 0.1964, "step": 27692 }, { "epoch": 0.04910375104501059, "grad_norm": 0.59375, "learning_rate": 0.0016716107604343592, "loss": 0.1557, "step": 27694 }, { "epoch": 0.04910729721032041, "grad_norm": 0.703125, "learning_rate": 0.0016715647863282223, "loss": 0.1917, "step": 27696 }, { "epoch": 0.04911084337563022, "grad_norm": 0.9140625, "learning_rate": 0.001671518809722393, "loss": 0.1907, "step": 27698 }, { "epoch": 0.04911438954094004, "grad_norm": 0.54296875, "learning_rate": 0.001671472830617073, "loss": 0.2001, "step": 27700 }, { "epoch": 0.04911793570624985, "grad_norm": 0.3125, "learning_rate": 0.0016714268490124625, "loss": 0.2129, "step": 27702 }, { "epoch": 0.049121481871559666, "grad_norm": 0.435546875, "learning_rate": 0.001671380864908763, "loss": 0.2355, "step": 27704 }, { "epoch": 0.04912502803686948, "grad_norm": 1.734375, "learning_rate": 0.001671334878306175, "loss": 0.2582, "step": 27706 }, { "epoch": 0.049128574202179295, "grad_norm": 0.453125, "learning_rate": 0.0016712888892049006, "loss": 0.1878, "step": 27708 }, { "epoch": 0.04913212036748911, "grad_norm": 0.470703125, "learning_rate": 0.0016712428976051398, "loss": 0.202, "step": 27710 }, { "epoch": 0.049135666532798924, "grad_norm": 1.890625, "learning_rate": 0.001671196903507095, "loss": 0.3378, "step": 27712 }, { "epoch": 0.04913921269810874, "grad_norm": 0.65234375, "learning_rate": 0.0016711509069109666, "loss": 0.2083, "step": 27714 }, { "epoch": 0.049142758863418554, "grad_norm": 0.291015625, "learning_rate": 0.0016711049078169558, "loss": 0.2795, "step": 27716 }, { "epoch": 0.049146305028728375, "grad_norm": 0.79296875, "learning_rate": 0.001671058906225264, "loss": 0.2019, "step": 27718 }, { "epoch": 0.04914985119403819, "grad_norm": 0.9609375, "learning_rate": 0.0016710129021360923, "loss": 0.2016, "step": 27720 }, { "epoch": 0.049153397359348004, "grad_norm": 0.7734375, "learning_rate": 0.0016709668955496417, "loss": 0.1704, "step": 27722 }, { "epoch": 0.04915694352465782, "grad_norm": 0.23046875, "learning_rate": 0.0016709208864661138, "loss": 0.1586, "step": 27724 }, { "epoch": 0.04916048968996763, "grad_norm": 0.65625, "learning_rate": 0.0016708748748857092, "loss": 0.2848, "step": 27726 }, { "epoch": 0.04916403585527745, "grad_norm": 0.3984375, "learning_rate": 0.0016708288608086297, "loss": 0.3441, "step": 27728 }, { "epoch": 0.04916758202058726, "grad_norm": 0.78515625, "learning_rate": 0.0016707828442350766, "loss": 0.201, "step": 27730 }, { "epoch": 0.04917112818589708, "grad_norm": 0.427734375, "learning_rate": 0.0016707368251652503, "loss": 0.2629, "step": 27732 }, { "epoch": 0.04917467435120689, "grad_norm": 0.58203125, "learning_rate": 0.0016706908035993531, "loss": 0.1957, "step": 27734 }, { "epoch": 0.049178220516516706, "grad_norm": 0.35546875, "learning_rate": 0.0016706447795375857, "loss": 0.1834, "step": 27736 }, { "epoch": 0.04918176668182652, "grad_norm": 0.439453125, "learning_rate": 0.0016705987529801497, "loss": 0.1378, "step": 27738 }, { "epoch": 0.049185312847136335, "grad_norm": 0.984375, "learning_rate": 0.0016705527239272457, "loss": 0.2028, "step": 27740 }, { "epoch": 0.04918885901244616, "grad_norm": 0.32421875, "learning_rate": 0.0016705066923790757, "loss": 0.1986, "step": 27742 }, { "epoch": 0.04919240517775597, "grad_norm": 0.4921875, "learning_rate": 0.001670460658335841, "loss": 0.1796, "step": 27744 }, { "epoch": 0.049195951343065786, "grad_norm": 0.40234375, "learning_rate": 0.0016704146217977427, "loss": 0.2651, "step": 27746 }, { "epoch": 0.0491994975083756, "grad_norm": 2.375, "learning_rate": 0.0016703685827649823, "loss": 0.3195, "step": 27748 }, { "epoch": 0.049203043673685415, "grad_norm": 1.421875, "learning_rate": 0.0016703225412377607, "loss": 0.2236, "step": 27750 }, { "epoch": 0.04920658983899523, "grad_norm": 0.6328125, "learning_rate": 0.00167027649721628, "loss": 0.2245, "step": 27752 }, { "epoch": 0.049210136004305044, "grad_norm": 0.63671875, "learning_rate": 0.0016702304507007406, "loss": 0.2257, "step": 27754 }, { "epoch": 0.04921368216961486, "grad_norm": 0.318359375, "learning_rate": 0.0016701844016913448, "loss": 0.2242, "step": 27756 }, { "epoch": 0.04921722833492467, "grad_norm": 0.3671875, "learning_rate": 0.0016701383501882936, "loss": 0.2182, "step": 27758 }, { "epoch": 0.04922077450023449, "grad_norm": 0.482421875, "learning_rate": 0.0016700922961917885, "loss": 0.2102, "step": 27760 }, { "epoch": 0.0492243206655443, "grad_norm": 0.91796875, "learning_rate": 0.0016700462397020307, "loss": 0.2379, "step": 27762 }, { "epoch": 0.049227866830854124, "grad_norm": 0.26171875, "learning_rate": 0.0016700001807192215, "loss": 0.1694, "step": 27764 }, { "epoch": 0.04923141299616394, "grad_norm": 0.390625, "learning_rate": 0.0016699541192435631, "loss": 0.2549, "step": 27766 }, { "epoch": 0.04923495916147375, "grad_norm": 0.314453125, "learning_rate": 0.0016699080552752563, "loss": 0.1718, "step": 27768 }, { "epoch": 0.04923850532678357, "grad_norm": 0.6015625, "learning_rate": 0.001669861988814503, "loss": 0.2045, "step": 27770 }, { "epoch": 0.04924205149209338, "grad_norm": 0.59765625, "learning_rate": 0.0016698159198615036, "loss": 0.2026, "step": 27772 }, { "epoch": 0.049245597657403196, "grad_norm": 0.92578125, "learning_rate": 0.0016697698484164612, "loss": 0.244, "step": 27774 }, { "epoch": 0.04924914382271301, "grad_norm": 0.90625, "learning_rate": 0.0016697237744795759, "loss": 0.1724, "step": 27776 }, { "epoch": 0.049252689988022826, "grad_norm": 0.76171875, "learning_rate": 0.00166967769805105, "loss": 0.2235, "step": 27778 }, { "epoch": 0.04925623615333264, "grad_norm": 0.6171875, "learning_rate": 0.0016696316191310845, "loss": 0.2293, "step": 27780 }, { "epoch": 0.049259782318642455, "grad_norm": 0.337890625, "learning_rate": 0.0016695855377198813, "loss": 0.1815, "step": 27782 }, { "epoch": 0.04926332848395227, "grad_norm": 7.9375, "learning_rate": 0.0016695394538176422, "loss": 0.2171, "step": 27784 }, { "epoch": 0.04926687464926209, "grad_norm": 0.376953125, "learning_rate": 0.0016694933674245677, "loss": 0.1785, "step": 27786 }, { "epoch": 0.049270420814571905, "grad_norm": 0.482421875, "learning_rate": 0.0016694472785408604, "loss": 0.1918, "step": 27788 }, { "epoch": 0.04927396697988172, "grad_norm": 0.5859375, "learning_rate": 0.0016694011871667214, "loss": 0.2075, "step": 27790 }, { "epoch": 0.049277513145191534, "grad_norm": 0.302734375, "learning_rate": 0.0016693550933023524, "loss": 0.1964, "step": 27792 }, { "epoch": 0.04928105931050135, "grad_norm": 2.046875, "learning_rate": 0.001669308996947955, "loss": 0.3022, "step": 27794 }, { "epoch": 0.04928460547581116, "grad_norm": 0.369140625, "learning_rate": 0.0016692628981037306, "loss": 0.2381, "step": 27796 }, { "epoch": 0.04928815164112098, "grad_norm": 0.44921875, "learning_rate": 0.001669216796769881, "loss": 0.2227, "step": 27798 }, { "epoch": 0.04929169780643079, "grad_norm": 0.609375, "learning_rate": 0.0016691706929466076, "loss": 0.2152, "step": 27800 }, { "epoch": 0.04929524397174061, "grad_norm": 0.5546875, "learning_rate": 0.0016691245866341123, "loss": 0.192, "step": 27802 }, { "epoch": 0.04929879013705042, "grad_norm": 0.67578125, "learning_rate": 0.001669078477832597, "loss": 0.1694, "step": 27804 }, { "epoch": 0.049302336302360236, "grad_norm": 0.5, "learning_rate": 0.0016690323665422627, "loss": 0.1698, "step": 27806 }, { "epoch": 0.04930588246767005, "grad_norm": 0.390625, "learning_rate": 0.0016689862527633113, "loss": 0.1649, "step": 27808 }, { "epoch": 0.04930942863297987, "grad_norm": 0.4140625, "learning_rate": 0.0016689401364959444, "loss": 0.2896, "step": 27810 }, { "epoch": 0.04931297479828969, "grad_norm": 1.40625, "learning_rate": 0.0016688940177403643, "loss": 0.4941, "step": 27812 }, { "epoch": 0.0493165209635995, "grad_norm": 1.0078125, "learning_rate": 0.0016688478964967718, "loss": 0.3078, "step": 27814 }, { "epoch": 0.049320067128909316, "grad_norm": 0.625, "learning_rate": 0.0016688017727653692, "loss": 0.1915, "step": 27816 }, { "epoch": 0.04932361329421913, "grad_norm": 0.83984375, "learning_rate": 0.0016687556465463577, "loss": 0.1865, "step": 27818 }, { "epoch": 0.049327159459528945, "grad_norm": 0.232421875, "learning_rate": 0.00166870951783994, "loss": 0.1926, "step": 27820 }, { "epoch": 0.04933070562483876, "grad_norm": 0.6015625, "learning_rate": 0.001668663386646317, "loss": 0.2602, "step": 27822 }, { "epoch": 0.049334251790148574, "grad_norm": 0.455078125, "learning_rate": 0.0016686172529656903, "loss": 0.2284, "step": 27824 }, { "epoch": 0.04933779795545839, "grad_norm": 1.0078125, "learning_rate": 0.0016685711167982625, "loss": 0.3065, "step": 27826 }, { "epoch": 0.0493413441207682, "grad_norm": 0.3984375, "learning_rate": 0.0016685249781442347, "loss": 0.1869, "step": 27828 }, { "epoch": 0.04934489028607802, "grad_norm": 2.03125, "learning_rate": 0.0016684788370038089, "loss": 0.2437, "step": 27830 }, { "epoch": 0.04934843645138784, "grad_norm": 0.8984375, "learning_rate": 0.001668432693377187, "loss": 0.1359, "step": 27832 }, { "epoch": 0.049351982616697654, "grad_norm": 0.6484375, "learning_rate": 0.0016683865472645703, "loss": 0.1849, "step": 27834 }, { "epoch": 0.04935552878200747, "grad_norm": 0.337890625, "learning_rate": 0.0016683403986661614, "loss": 0.1791, "step": 27836 }, { "epoch": 0.04935907494731728, "grad_norm": 1.0078125, "learning_rate": 0.0016682942475821617, "loss": 0.2013, "step": 27838 }, { "epoch": 0.0493626211126271, "grad_norm": 1.234375, "learning_rate": 0.0016682480940127728, "loss": 0.1919, "step": 27840 }, { "epoch": 0.04936616727793691, "grad_norm": 0.35546875, "learning_rate": 0.001668201937958197, "loss": 0.1576, "step": 27842 }, { "epoch": 0.04936971344324673, "grad_norm": 0.6640625, "learning_rate": 0.001668155779418636, "loss": 0.2228, "step": 27844 }, { "epoch": 0.04937325960855654, "grad_norm": 1.0859375, "learning_rate": 0.0016681096183942914, "loss": 0.2717, "step": 27846 }, { "epoch": 0.049376805773866356, "grad_norm": 0.22265625, "learning_rate": 0.0016680634548853657, "loss": 0.1955, "step": 27848 }, { "epoch": 0.04938035193917617, "grad_norm": 0.31640625, "learning_rate": 0.0016680172888920602, "loss": 0.2688, "step": 27850 }, { "epoch": 0.049383898104485985, "grad_norm": 0.353515625, "learning_rate": 0.001667971120414577, "loss": 0.1823, "step": 27852 }, { "epoch": 0.049387444269795806, "grad_norm": 2.484375, "learning_rate": 0.0016679249494531182, "loss": 0.1953, "step": 27854 }, { "epoch": 0.04939099043510562, "grad_norm": 0.93359375, "learning_rate": 0.0016678787760078854, "loss": 0.2108, "step": 27856 }, { "epoch": 0.049394536600415435, "grad_norm": 0.57421875, "learning_rate": 0.0016678326000790806, "loss": 0.1638, "step": 27858 }, { "epoch": 0.04939808276572525, "grad_norm": 0.3046875, "learning_rate": 0.0016677864216669061, "loss": 0.1907, "step": 27860 }, { "epoch": 0.049401628931035065, "grad_norm": 0.80078125, "learning_rate": 0.0016677402407715633, "loss": 0.1934, "step": 27862 }, { "epoch": 0.04940517509634488, "grad_norm": 0.61328125, "learning_rate": 0.0016676940573932548, "loss": 0.271, "step": 27864 }, { "epoch": 0.049408721261654694, "grad_norm": 0.828125, "learning_rate": 0.0016676478715321821, "loss": 0.2908, "step": 27866 }, { "epoch": 0.04941226742696451, "grad_norm": 0.3203125, "learning_rate": 0.0016676016831885475, "loss": 0.1757, "step": 27868 }, { "epoch": 0.04941581359227432, "grad_norm": 0.5390625, "learning_rate": 0.0016675554923625527, "loss": 0.2011, "step": 27870 }, { "epoch": 0.04941935975758414, "grad_norm": 5.5625, "learning_rate": 0.0016675092990543997, "loss": 0.2947, "step": 27872 }, { "epoch": 0.04942290592289395, "grad_norm": 0.81640625, "learning_rate": 0.0016674631032642909, "loss": 0.1589, "step": 27874 }, { "epoch": 0.049426452088203766, "grad_norm": 0.56640625, "learning_rate": 0.0016674169049924281, "loss": 0.2102, "step": 27876 }, { "epoch": 0.04942999825351359, "grad_norm": 0.67578125, "learning_rate": 0.001667370704239013, "loss": 0.229, "step": 27878 }, { "epoch": 0.0494335444188234, "grad_norm": 0.53515625, "learning_rate": 0.0016673245010042485, "loss": 0.2138, "step": 27880 }, { "epoch": 0.04943709058413322, "grad_norm": 0.59765625, "learning_rate": 0.0016672782952883358, "loss": 0.2541, "step": 27882 }, { "epoch": 0.04944063674944303, "grad_norm": 1.953125, "learning_rate": 0.0016672320870914775, "loss": 0.2812, "step": 27884 }, { "epoch": 0.049444182914752846, "grad_norm": 0.73046875, "learning_rate": 0.0016671858764138753, "loss": 0.2342, "step": 27886 }, { "epoch": 0.04944772908006266, "grad_norm": 1.5, "learning_rate": 0.001667139663255732, "loss": 0.1575, "step": 27888 }, { "epoch": 0.049451275245372475, "grad_norm": 1.140625, "learning_rate": 0.001667093447617249, "loss": 0.2015, "step": 27890 }, { "epoch": 0.04945482141068229, "grad_norm": 0.7734375, "learning_rate": 0.0016670472294986287, "loss": 0.2173, "step": 27892 }, { "epoch": 0.049458367575992104, "grad_norm": 0.388671875, "learning_rate": 0.0016670010089000732, "loss": 0.2426, "step": 27894 }, { "epoch": 0.04946191374130192, "grad_norm": 0.408203125, "learning_rate": 0.0016669547858217846, "loss": 0.153, "step": 27896 }, { "epoch": 0.04946545990661173, "grad_norm": 0.5546875, "learning_rate": 0.0016669085602639654, "loss": 0.2145, "step": 27898 }, { "epoch": 0.049469006071921555, "grad_norm": 0.443359375, "learning_rate": 0.001666862332226817, "loss": 0.217, "step": 27900 }, { "epoch": 0.04947255223723137, "grad_norm": 0.341796875, "learning_rate": 0.0016668161017105423, "loss": 0.1746, "step": 27902 }, { "epoch": 0.049476098402541184, "grad_norm": 0.671875, "learning_rate": 0.001666769868715343, "loss": 0.1477, "step": 27904 }, { "epoch": 0.049479644567851, "grad_norm": 0.6328125, "learning_rate": 0.0016667236332414217, "loss": 0.1846, "step": 27906 }, { "epoch": 0.04948319073316081, "grad_norm": 0.96875, "learning_rate": 0.0016666773952889805, "loss": 0.22, "step": 27908 }, { "epoch": 0.04948673689847063, "grad_norm": 0.6796875, "learning_rate": 0.0016666311548582213, "loss": 0.1665, "step": 27910 }, { "epoch": 0.04949028306378044, "grad_norm": 0.23828125, "learning_rate": 0.0016665849119493464, "loss": 0.1986, "step": 27912 }, { "epoch": 0.04949382922909026, "grad_norm": 0.46875, "learning_rate": 0.0016665386665625588, "loss": 0.213, "step": 27914 }, { "epoch": 0.04949737539440007, "grad_norm": 0.345703125, "learning_rate": 0.0016664924186980596, "loss": 0.2079, "step": 27916 }, { "epoch": 0.049500921559709886, "grad_norm": 0.376953125, "learning_rate": 0.0016664461683560522, "loss": 0.166, "step": 27918 }, { "epoch": 0.0495044677250197, "grad_norm": 0.451171875, "learning_rate": 0.0016663999155367376, "loss": 0.2034, "step": 27920 }, { "epoch": 0.04950801389032952, "grad_norm": 1.8046875, "learning_rate": 0.0016663536602403192, "loss": 0.2747, "step": 27922 }, { "epoch": 0.049511560055639336, "grad_norm": 0.67578125, "learning_rate": 0.0016663074024669989, "loss": 0.2248, "step": 27924 }, { "epoch": 0.04951510622094915, "grad_norm": 1.890625, "learning_rate": 0.001666261142216979, "loss": 0.3508, "step": 27926 }, { "epoch": 0.049518652386258966, "grad_norm": 0.55078125, "learning_rate": 0.0016662148794904614, "loss": 0.2417, "step": 27928 }, { "epoch": 0.04952219855156878, "grad_norm": 0.39453125, "learning_rate": 0.0016661686142876494, "loss": 0.1728, "step": 27930 }, { "epoch": 0.049525744716878595, "grad_norm": 9.625, "learning_rate": 0.001666122346608744, "loss": 0.2835, "step": 27932 }, { "epoch": 0.04952929088218841, "grad_norm": 1.265625, "learning_rate": 0.001666076076453949, "loss": 0.2367, "step": 27934 }, { "epoch": 0.049532837047498224, "grad_norm": 0.89453125, "learning_rate": 0.0016660298038234658, "loss": 0.4485, "step": 27936 }, { "epoch": 0.04953638321280804, "grad_norm": 0.91015625, "learning_rate": 0.0016659835287174969, "loss": 0.4221, "step": 27938 }, { "epoch": 0.04953992937811785, "grad_norm": 3.296875, "learning_rate": 0.0016659372511362448, "loss": 0.224, "step": 27940 }, { "epoch": 0.04954347554342767, "grad_norm": 0.5546875, "learning_rate": 0.001665890971079912, "loss": 0.2089, "step": 27942 }, { "epoch": 0.04954702170873748, "grad_norm": 0.50390625, "learning_rate": 0.0016658446885487008, "loss": 0.2687, "step": 27944 }, { "epoch": 0.049550567874047304, "grad_norm": 0.462890625, "learning_rate": 0.0016657984035428135, "loss": 0.1956, "step": 27946 }, { "epoch": 0.04955411403935712, "grad_norm": 2.921875, "learning_rate": 0.001665752116062453, "loss": 0.5581, "step": 27948 }, { "epoch": 0.04955766020466693, "grad_norm": 0.71875, "learning_rate": 0.0016657058261078212, "loss": 0.5145, "step": 27950 }, { "epoch": 0.04956120636997675, "grad_norm": 1.4609375, "learning_rate": 0.0016656595336791205, "loss": 0.2503, "step": 27952 }, { "epoch": 0.04956475253528656, "grad_norm": 0.4765625, "learning_rate": 0.0016656132387765537, "loss": 0.164, "step": 27954 }, { "epoch": 0.049568298700596376, "grad_norm": 0.73828125, "learning_rate": 0.001665566941400323, "loss": 0.2543, "step": 27956 }, { "epoch": 0.04957184486590619, "grad_norm": 0.61328125, "learning_rate": 0.0016655206415506312, "loss": 0.1681, "step": 27958 }, { "epoch": 0.049575391031216005, "grad_norm": 0.79296875, "learning_rate": 0.0016654743392276808, "loss": 0.2143, "step": 27960 }, { "epoch": 0.04957893719652582, "grad_norm": 1.0078125, "learning_rate": 0.0016654280344316737, "loss": 0.22, "step": 27962 }, { "epoch": 0.049582483361835634, "grad_norm": 0.5234375, "learning_rate": 0.0016653817271628133, "loss": 0.1959, "step": 27964 }, { "epoch": 0.04958602952714545, "grad_norm": 4.125, "learning_rate": 0.0016653354174213009, "loss": 0.2416, "step": 27966 }, { "epoch": 0.04958957569245527, "grad_norm": 0.90234375, "learning_rate": 0.0016652891052073404, "loss": 0.2009, "step": 27968 }, { "epoch": 0.049593121857765085, "grad_norm": 0.2578125, "learning_rate": 0.0016652427905211335, "loss": 0.1838, "step": 27970 }, { "epoch": 0.0495966680230749, "grad_norm": 0.6484375, "learning_rate": 0.001665196473362883, "loss": 0.2672, "step": 27972 }, { "epoch": 0.049600214188384714, "grad_norm": 0.41796875, "learning_rate": 0.0016651501537327915, "loss": 0.2306, "step": 27974 }, { "epoch": 0.04960376035369453, "grad_norm": 0.6484375, "learning_rate": 0.0016651038316310616, "loss": 0.2079, "step": 27976 }, { "epoch": 0.04960730651900434, "grad_norm": 2.0, "learning_rate": 0.0016650575070578952, "loss": 0.2249, "step": 27978 }, { "epoch": 0.04961085268431416, "grad_norm": 0.234375, "learning_rate": 0.0016650111800134962, "loss": 0.1921, "step": 27980 }, { "epoch": 0.04961439884962397, "grad_norm": 0.7734375, "learning_rate": 0.0016649648504980662, "loss": 0.2214, "step": 27982 }, { "epoch": 0.04961794501493379, "grad_norm": 0.447265625, "learning_rate": 0.0016649185185118082, "loss": 0.2072, "step": 27984 }, { "epoch": 0.0496214911802436, "grad_norm": 2.9375, "learning_rate": 0.0016648721840549245, "loss": 0.1715, "step": 27986 }, { "epoch": 0.049625037345553416, "grad_norm": 1.453125, "learning_rate": 0.001664825847127618, "loss": 0.1837, "step": 27988 }, { "epoch": 0.04962858351086324, "grad_norm": 1.1484375, "learning_rate": 0.0016647795077300918, "loss": 0.3115, "step": 27990 }, { "epoch": 0.04963212967617305, "grad_norm": 0.64453125, "learning_rate": 0.0016647331658625477, "loss": 0.1602, "step": 27992 }, { "epoch": 0.04963567584148287, "grad_norm": 0.5546875, "learning_rate": 0.0016646868215251889, "loss": 0.2095, "step": 27994 }, { "epoch": 0.04963922200679268, "grad_norm": 0.8359375, "learning_rate": 0.001664640474718218, "loss": 0.2153, "step": 27996 }, { "epoch": 0.049642768172102496, "grad_norm": 2.03125, "learning_rate": 0.0016645941254418376, "loss": 0.3239, "step": 27998 }, { "epoch": 0.04964631433741231, "grad_norm": 0.83203125, "learning_rate": 0.0016645477736962502, "loss": 0.1795, "step": 28000 }, { "epoch": 0.049649860502722125, "grad_norm": 5.375, "learning_rate": 0.001664501419481659, "loss": 0.2481, "step": 28002 }, { "epoch": 0.04965340666803194, "grad_norm": 2.171875, "learning_rate": 0.0016644550627982666, "loss": 0.4749, "step": 28004 }, { "epoch": 0.049656952833341754, "grad_norm": 0.283203125, "learning_rate": 0.0016644087036462754, "loss": 0.2166, "step": 28006 }, { "epoch": 0.04966049899865157, "grad_norm": 2.0625, "learning_rate": 0.0016643623420258885, "loss": 0.2171, "step": 28008 }, { "epoch": 0.04966404516396138, "grad_norm": 2.453125, "learning_rate": 0.0016643159779373084, "loss": 0.232, "step": 28010 }, { "epoch": 0.0496675913292712, "grad_norm": 1.234375, "learning_rate": 0.001664269611380738, "loss": 0.1775, "step": 28012 }, { "epoch": 0.04967113749458102, "grad_norm": 0.6796875, "learning_rate": 0.0016642232423563802, "loss": 0.2393, "step": 28014 }, { "epoch": 0.049674683659890834, "grad_norm": 0.53515625, "learning_rate": 0.0016641768708644377, "loss": 0.1786, "step": 28016 }, { "epoch": 0.04967822982520065, "grad_norm": 0.435546875, "learning_rate": 0.0016641304969051134, "loss": 0.3051, "step": 28018 }, { "epoch": 0.04968177599051046, "grad_norm": 1.0625, "learning_rate": 0.0016640841204786099, "loss": 0.2987, "step": 28020 }, { "epoch": 0.04968532215582028, "grad_norm": 3.140625, "learning_rate": 0.0016640377415851298, "loss": 0.3208, "step": 28022 }, { "epoch": 0.04968886832113009, "grad_norm": 0.625, "learning_rate": 0.0016639913602248767, "loss": 0.2423, "step": 28024 }, { "epoch": 0.049692414486439906, "grad_norm": 0.57421875, "learning_rate": 0.0016639449763980528, "loss": 0.2448, "step": 28026 }, { "epoch": 0.04969596065174972, "grad_norm": 0.275390625, "learning_rate": 0.001663898590104861, "loss": 0.2189, "step": 28028 }, { "epoch": 0.049699506817059536, "grad_norm": 0.72265625, "learning_rate": 0.0016638522013455047, "loss": 0.1954, "step": 28030 }, { "epoch": 0.04970305298236935, "grad_norm": 1.0078125, "learning_rate": 0.001663805810120186, "loss": 0.2716, "step": 28032 }, { "epoch": 0.049706599147679165, "grad_norm": 0.50390625, "learning_rate": 0.0016637594164291083, "loss": 0.2058, "step": 28034 }, { "epoch": 0.049710145312988986, "grad_norm": 2.859375, "learning_rate": 0.0016637130202724745, "loss": 0.1881, "step": 28036 }, { "epoch": 0.0497136914782988, "grad_norm": 0.92578125, "learning_rate": 0.0016636666216504877, "loss": 0.2078, "step": 28038 }, { "epoch": 0.049717237643608615, "grad_norm": 1.0390625, "learning_rate": 0.00166362022056335, "loss": 0.1782, "step": 28040 }, { "epoch": 0.04972078380891843, "grad_norm": 1.0703125, "learning_rate": 0.0016635738170112649, "loss": 0.3353, "step": 28042 }, { "epoch": 0.049724329974228244, "grad_norm": 0.67578125, "learning_rate": 0.0016635274109944352, "loss": 0.2159, "step": 28044 }, { "epoch": 0.04972787613953806, "grad_norm": 0.24609375, "learning_rate": 0.0016634810025130643, "loss": 0.1879, "step": 28046 }, { "epoch": 0.04973142230484787, "grad_norm": 0.337890625, "learning_rate": 0.0016634345915673547, "loss": 0.2126, "step": 28048 }, { "epoch": 0.04973496847015769, "grad_norm": 2.4375, "learning_rate": 0.0016633881781575094, "loss": 0.2666, "step": 28050 }, { "epoch": 0.0497385146354675, "grad_norm": 1.8203125, "learning_rate": 0.0016633417622837312, "loss": 0.2933, "step": 28052 }, { "epoch": 0.04974206080077732, "grad_norm": 0.416015625, "learning_rate": 0.0016632953439462237, "loss": 0.2003, "step": 28054 }, { "epoch": 0.04974560696608713, "grad_norm": 0.87109375, "learning_rate": 0.0016632489231451896, "loss": 0.2298, "step": 28056 }, { "epoch": 0.04974915313139695, "grad_norm": 1.2578125, "learning_rate": 0.0016632024998808317, "loss": 0.2036, "step": 28058 }, { "epoch": 0.04975269929670677, "grad_norm": 0.30859375, "learning_rate": 0.0016631560741533532, "loss": 0.1691, "step": 28060 }, { "epoch": 0.04975624546201658, "grad_norm": 1.9375, "learning_rate": 0.0016631096459629572, "loss": 0.3316, "step": 28062 }, { "epoch": 0.0497597916273264, "grad_norm": 0.6484375, "learning_rate": 0.0016630632153098468, "loss": 0.2299, "step": 28064 }, { "epoch": 0.04976333779263621, "grad_norm": 4.09375, "learning_rate": 0.001663016782194225, "loss": 0.2308, "step": 28066 }, { "epoch": 0.049766883957946026, "grad_norm": 0.423828125, "learning_rate": 0.0016629703466162949, "loss": 0.1887, "step": 28068 }, { "epoch": 0.04977043012325584, "grad_norm": 0.408203125, "learning_rate": 0.0016629239085762591, "loss": 0.1918, "step": 28070 }, { "epoch": 0.049773976288565655, "grad_norm": 0.60546875, "learning_rate": 0.0016628774680743216, "loss": 0.2079, "step": 28072 }, { "epoch": 0.04977752245387547, "grad_norm": 1.296875, "learning_rate": 0.0016628310251106846, "loss": 0.1874, "step": 28074 }, { "epoch": 0.049781068619185284, "grad_norm": 0.310546875, "learning_rate": 0.0016627845796855518, "loss": 0.2035, "step": 28076 }, { "epoch": 0.0497846147844951, "grad_norm": 1.328125, "learning_rate": 0.001662738131799126, "loss": 0.2102, "step": 28078 }, { "epoch": 0.04978816094980491, "grad_norm": 1.0234375, "learning_rate": 0.0016626916814516108, "loss": 0.2039, "step": 28080 }, { "epoch": 0.049791707115114735, "grad_norm": 1.53125, "learning_rate": 0.0016626452286432089, "loss": 0.2385, "step": 28082 }, { "epoch": 0.04979525328042455, "grad_norm": 1.4921875, "learning_rate": 0.0016625987733741234, "loss": 0.2306, "step": 28084 }, { "epoch": 0.049798799445734364, "grad_norm": 0.40234375, "learning_rate": 0.001662552315644558, "loss": 0.1806, "step": 28086 }, { "epoch": 0.04980234561104418, "grad_norm": 0.74609375, "learning_rate": 0.0016625058554547152, "loss": 0.2605, "step": 28088 }, { "epoch": 0.04980589177635399, "grad_norm": 0.984375, "learning_rate": 0.001662459392804799, "loss": 0.1683, "step": 28090 }, { "epoch": 0.04980943794166381, "grad_norm": 0.38671875, "learning_rate": 0.0016624129276950118, "loss": 0.1945, "step": 28092 }, { "epoch": 0.04981298410697362, "grad_norm": 1.7109375, "learning_rate": 0.0016623664601255572, "loss": 0.2158, "step": 28094 }, { "epoch": 0.04981653027228344, "grad_norm": 0.515625, "learning_rate": 0.0016623199900966381, "loss": 0.2156, "step": 28096 }, { "epoch": 0.04982007643759325, "grad_norm": 0.83984375, "learning_rate": 0.0016622735176084582, "loss": 0.175, "step": 28098 }, { "epoch": 0.049823622602903066, "grad_norm": 1.515625, "learning_rate": 0.0016622270426612205, "loss": 0.2303, "step": 28100 }, { "epoch": 0.04982716876821288, "grad_norm": 0.71875, "learning_rate": 0.0016621805652551288, "loss": 0.2442, "step": 28102 }, { "epoch": 0.0498307149335227, "grad_norm": 0.62109375, "learning_rate": 0.0016621340853903853, "loss": 0.249, "step": 28104 }, { "epoch": 0.049834261098832516, "grad_norm": 0.59765625, "learning_rate": 0.0016620876030671939, "loss": 0.2068, "step": 28106 }, { "epoch": 0.04983780726414233, "grad_norm": 0.4140625, "learning_rate": 0.0016620411182857579, "loss": 0.2047, "step": 28108 }, { "epoch": 0.049841353429452145, "grad_norm": 0.90234375, "learning_rate": 0.0016619946310462805, "loss": 0.237, "step": 28110 }, { "epoch": 0.04984489959476196, "grad_norm": 0.59375, "learning_rate": 0.001661948141348965, "loss": 0.1792, "step": 28112 }, { "epoch": 0.049848445760071775, "grad_norm": 0.8828125, "learning_rate": 0.001661901649194015, "loss": 0.2811, "step": 28114 }, { "epoch": 0.04985199192538159, "grad_norm": 1.5859375, "learning_rate": 0.0016618551545816334, "loss": 0.2571, "step": 28116 }, { "epoch": 0.049855538090691404, "grad_norm": 1.78125, "learning_rate": 0.0016618086575120234, "loss": 0.2839, "step": 28118 }, { "epoch": 0.04985908425600122, "grad_norm": 0.66015625, "learning_rate": 0.0016617621579853893, "loss": 0.1873, "step": 28120 }, { "epoch": 0.04986263042131103, "grad_norm": 0.984375, "learning_rate": 0.0016617156560019334, "loss": 0.3298, "step": 28122 }, { "epoch": 0.04986617658662085, "grad_norm": 2.34375, "learning_rate": 0.0016616691515618594, "loss": 0.1956, "step": 28124 }, { "epoch": 0.04986972275193067, "grad_norm": 0.228515625, "learning_rate": 0.001661622644665371, "loss": 0.2449, "step": 28126 }, { "epoch": 0.04987326891724048, "grad_norm": 0.423828125, "learning_rate": 0.0016615761353126716, "loss": 0.1842, "step": 28128 }, { "epoch": 0.0498768150825503, "grad_norm": 0.40625, "learning_rate": 0.0016615296235039639, "loss": 0.2034, "step": 28130 }, { "epoch": 0.04988036124786011, "grad_norm": 0.66796875, "learning_rate": 0.001661483109239452, "loss": 0.1687, "step": 28132 }, { "epoch": 0.04988390741316993, "grad_norm": 0.39453125, "learning_rate": 0.0016614365925193392, "loss": 0.1741, "step": 28134 }, { "epoch": 0.04988745357847974, "grad_norm": 5.09375, "learning_rate": 0.001661390073343829, "loss": 0.3037, "step": 28136 }, { "epoch": 0.049890999743789556, "grad_norm": 0.392578125, "learning_rate": 0.0016613435517131244, "loss": 0.1983, "step": 28138 }, { "epoch": 0.04989454590909937, "grad_norm": 0.56640625, "learning_rate": 0.0016612970276274296, "loss": 0.2035, "step": 28140 }, { "epoch": 0.049898092074409185, "grad_norm": 1.421875, "learning_rate": 0.0016612505010869473, "loss": 0.2186, "step": 28142 }, { "epoch": 0.049901638239719, "grad_norm": 0.6875, "learning_rate": 0.0016612039720918812, "loss": 0.2084, "step": 28144 }, { "epoch": 0.049905184405028814, "grad_norm": 0.251953125, "learning_rate": 0.001661157440642435, "loss": 0.1901, "step": 28146 }, { "epoch": 0.04990873057033863, "grad_norm": 4.53125, "learning_rate": 0.0016611109067388124, "loss": 0.391, "step": 28148 }, { "epoch": 0.04991227673564845, "grad_norm": 0.40625, "learning_rate": 0.0016610643703812162, "loss": 0.1475, "step": 28150 }, { "epoch": 0.049915822900958265, "grad_norm": 0.470703125, "learning_rate": 0.001661017831569851, "loss": 0.228, "step": 28152 }, { "epoch": 0.04991936906626808, "grad_norm": 0.353515625, "learning_rate": 0.001660971290304919, "loss": 0.2312, "step": 28154 }, { "epoch": 0.049922915231577894, "grad_norm": 0.3671875, "learning_rate": 0.0016609247465866248, "loss": 0.1747, "step": 28156 }, { "epoch": 0.04992646139688771, "grad_norm": 0.3203125, "learning_rate": 0.0016608782004151714, "loss": 0.1633, "step": 28158 }, { "epoch": 0.04993000756219752, "grad_norm": 0.7890625, "learning_rate": 0.0016608316517907628, "loss": 0.2399, "step": 28160 }, { "epoch": 0.04993355372750734, "grad_norm": 1.78125, "learning_rate": 0.001660785100713602, "loss": 0.386, "step": 28162 }, { "epoch": 0.04993709989281715, "grad_norm": 3.421875, "learning_rate": 0.0016607385471838932, "loss": 0.3376, "step": 28164 }, { "epoch": 0.04994064605812697, "grad_norm": 0.4296875, "learning_rate": 0.0016606919912018397, "loss": 0.197, "step": 28166 }, { "epoch": 0.04994419222343678, "grad_norm": 0.474609375, "learning_rate": 0.0016606454327676447, "loss": 0.2007, "step": 28168 }, { "epoch": 0.049947738388746596, "grad_norm": 0.458984375, "learning_rate": 0.0016605988718815126, "loss": 0.2016, "step": 28170 }, { "epoch": 0.04995128455405642, "grad_norm": 0.455078125, "learning_rate": 0.0016605523085436466, "loss": 0.2106, "step": 28172 }, { "epoch": 0.04995483071936623, "grad_norm": 2.390625, "learning_rate": 0.0016605057427542505, "loss": 0.3039, "step": 28174 }, { "epoch": 0.049958376884676046, "grad_norm": 0.48046875, "learning_rate": 0.0016604591745135277, "loss": 0.1948, "step": 28176 }, { "epoch": 0.04996192304998586, "grad_norm": 0.423828125, "learning_rate": 0.0016604126038216823, "loss": 0.1596, "step": 28178 }, { "epoch": 0.049965469215295676, "grad_norm": 0.72265625, "learning_rate": 0.0016603660306789174, "loss": 0.2016, "step": 28180 }, { "epoch": 0.04996901538060549, "grad_norm": 0.353515625, "learning_rate": 0.001660319455085437, "loss": 0.1762, "step": 28182 }, { "epoch": 0.049972561545915305, "grad_norm": 3.984375, "learning_rate": 0.0016602728770414449, "loss": 0.284, "step": 28184 }, { "epoch": 0.04997610771122512, "grad_norm": 0.83984375, "learning_rate": 0.0016602262965471448, "loss": 0.2623, "step": 28186 }, { "epoch": 0.049979653876534934, "grad_norm": 0.73828125, "learning_rate": 0.0016601797136027402, "loss": 0.1973, "step": 28188 }, { "epoch": 0.04998320004184475, "grad_norm": 0.53515625, "learning_rate": 0.0016601331282084348, "loss": 0.1799, "step": 28190 }, { "epoch": 0.04998674620715456, "grad_norm": 0.96484375, "learning_rate": 0.0016600865403644325, "loss": 0.2871, "step": 28192 }, { "epoch": 0.049990292372464384, "grad_norm": 0.275390625, "learning_rate": 0.0016600399500709372, "loss": 0.1651, "step": 28194 }, { "epoch": 0.0499938385377742, "grad_norm": 0.6640625, "learning_rate": 0.0016599933573281521, "loss": 0.2835, "step": 28196 }, { "epoch": 0.049997384703084014, "grad_norm": 0.6015625, "learning_rate": 0.0016599467621362814, "loss": 0.1804, "step": 28198 }, { "epoch": 0.05000093086839383, "grad_norm": 0.3515625, "learning_rate": 0.001659900164495529, "loss": 0.2538, "step": 28200 }, { "epoch": 0.05000447703370364, "grad_norm": 0.42578125, "learning_rate": 0.0016598535644060987, "loss": 0.2363, "step": 28202 }, { "epoch": 0.05000802319901346, "grad_norm": 0.345703125, "learning_rate": 0.0016598069618681938, "loss": 0.2018, "step": 28204 }, { "epoch": 0.05001156936432327, "grad_norm": 2.265625, "learning_rate": 0.0016597603568820182, "loss": 0.2003, "step": 28206 }, { "epoch": 0.050015115529633086, "grad_norm": 0.6640625, "learning_rate": 0.0016597137494477764, "loss": 0.189, "step": 28208 }, { "epoch": 0.0500186616949429, "grad_norm": 1.1171875, "learning_rate": 0.0016596671395656718, "loss": 0.276, "step": 28210 }, { "epoch": 0.050022207860252715, "grad_norm": 0.58203125, "learning_rate": 0.0016596205272359082, "loss": 0.2019, "step": 28212 }, { "epoch": 0.05002575402556253, "grad_norm": 0.84765625, "learning_rate": 0.001659573912458689, "loss": 0.2806, "step": 28214 }, { "epoch": 0.050029300190872344, "grad_norm": 0.34375, "learning_rate": 0.0016595272952342189, "loss": 0.1727, "step": 28216 }, { "epoch": 0.050032846356182166, "grad_norm": 0.55078125, "learning_rate": 0.0016594806755627013, "loss": 0.178, "step": 28218 }, { "epoch": 0.05003639252149198, "grad_norm": 0.93359375, "learning_rate": 0.0016594340534443403, "loss": 0.2368, "step": 28220 }, { "epoch": 0.050039938686801795, "grad_norm": 0.310546875, "learning_rate": 0.0016593874288793398, "loss": 0.1954, "step": 28222 }, { "epoch": 0.05004348485211161, "grad_norm": 0.59375, "learning_rate": 0.0016593408018679035, "loss": 0.1724, "step": 28224 }, { "epoch": 0.050047031017421424, "grad_norm": 4.8125, "learning_rate": 0.0016592941724102356, "loss": 0.2349, "step": 28226 }, { "epoch": 0.05005057718273124, "grad_norm": 0.62109375, "learning_rate": 0.0016592475405065397, "loss": 0.196, "step": 28228 }, { "epoch": 0.05005412334804105, "grad_norm": 0.31640625, "learning_rate": 0.0016592009061570199, "loss": 0.1943, "step": 28230 }, { "epoch": 0.05005766951335087, "grad_norm": 0.5078125, "learning_rate": 0.0016591542693618802, "loss": 0.1971, "step": 28232 }, { "epoch": 0.05006121567866068, "grad_norm": 0.388671875, "learning_rate": 0.0016591076301213246, "loss": 0.2035, "step": 28234 }, { "epoch": 0.0500647618439705, "grad_norm": 1.1484375, "learning_rate": 0.0016590609884355569, "loss": 0.2031, "step": 28236 }, { "epoch": 0.05006830800928031, "grad_norm": 0.45703125, "learning_rate": 0.0016590143443047812, "loss": 0.1875, "step": 28238 }, { "epoch": 0.05007185417459013, "grad_norm": 0.8671875, "learning_rate": 0.0016589676977292011, "loss": 0.2142, "step": 28240 }, { "epoch": 0.05007540033989995, "grad_norm": 0.408203125, "learning_rate": 0.001658921048709022, "loss": 0.1999, "step": 28242 }, { "epoch": 0.05007894650520976, "grad_norm": 0.486328125, "learning_rate": 0.001658874397244446, "loss": 0.5444, "step": 28244 }, { "epoch": 0.05008249267051958, "grad_norm": 0.765625, "learning_rate": 0.0016588277433356783, "loss": 0.1623, "step": 28246 }, { "epoch": 0.05008603883582939, "grad_norm": 0.435546875, "learning_rate": 0.0016587810869829226, "loss": 0.2599, "step": 28248 }, { "epoch": 0.050089585001139206, "grad_norm": 0.65234375, "learning_rate": 0.001658734428186383, "loss": 0.1847, "step": 28250 }, { "epoch": 0.05009313116644902, "grad_norm": 0.2333984375, "learning_rate": 0.0016586877669462635, "loss": 0.2075, "step": 28252 }, { "epoch": 0.050096677331758835, "grad_norm": 0.216796875, "learning_rate": 0.001658641103262768, "loss": 0.1435, "step": 28254 }, { "epoch": 0.05010022349706865, "grad_norm": 0.64453125, "learning_rate": 0.0016585944371361012, "loss": 0.3204, "step": 28256 }, { "epoch": 0.050103769662378464, "grad_norm": 0.62109375, "learning_rate": 0.001658547768566467, "loss": 0.4185, "step": 28258 }, { "epoch": 0.05010731582768828, "grad_norm": 0.357421875, "learning_rate": 0.0016585010975540691, "loss": 0.1776, "step": 28260 }, { "epoch": 0.0501108619929981, "grad_norm": 0.56640625, "learning_rate": 0.0016584544240991117, "loss": 0.2571, "step": 28262 }, { "epoch": 0.050114408158307915, "grad_norm": 0.62109375, "learning_rate": 0.0016584077482017989, "loss": 0.2108, "step": 28264 }, { "epoch": 0.05011795432361773, "grad_norm": 3.015625, "learning_rate": 0.0016583610698623354, "loss": 0.3322, "step": 28266 }, { "epoch": 0.050121500488927544, "grad_norm": 0.2138671875, "learning_rate": 0.0016583143890809245, "loss": 0.2296, "step": 28268 }, { "epoch": 0.05012504665423736, "grad_norm": 0.2490234375, "learning_rate": 0.0016582677058577708, "loss": 0.1322, "step": 28270 }, { "epoch": 0.05012859281954717, "grad_norm": 1.1875, "learning_rate": 0.0016582210201930789, "loss": 0.2008, "step": 28272 }, { "epoch": 0.05013213898485699, "grad_norm": 0.45703125, "learning_rate": 0.0016581743320870522, "loss": 0.1876, "step": 28274 }, { "epoch": 0.0501356851501668, "grad_norm": 0.984375, "learning_rate": 0.001658127641539895, "loss": 0.238, "step": 28276 }, { "epoch": 0.050139231315476616, "grad_norm": 0.66796875, "learning_rate": 0.0016580809485518123, "loss": 0.1863, "step": 28278 }, { "epoch": 0.05014277748078643, "grad_norm": 0.67578125, "learning_rate": 0.0016580342531230068, "loss": 0.2175, "step": 28280 }, { "epoch": 0.050146323646096246, "grad_norm": 1.0625, "learning_rate": 0.0016579875552536843, "loss": 0.1796, "step": 28282 }, { "epoch": 0.05014986981140606, "grad_norm": 1.8203125, "learning_rate": 0.001657940854944048, "loss": 0.3023, "step": 28284 }, { "epoch": 0.05015341597671588, "grad_norm": 0.3671875, "learning_rate": 0.0016578941521943028, "loss": 0.2615, "step": 28286 }, { "epoch": 0.050156962142025696, "grad_norm": 2.15625, "learning_rate": 0.0016578474470046525, "loss": 0.2042, "step": 28288 }, { "epoch": 0.05016050830733551, "grad_norm": 0.62890625, "learning_rate": 0.0016578007393753015, "loss": 0.178, "step": 28290 }, { "epoch": 0.050164054472645325, "grad_norm": 1.0234375, "learning_rate": 0.001657754029306454, "loss": 0.4545, "step": 28292 }, { "epoch": 0.05016760063795514, "grad_norm": 0.921875, "learning_rate": 0.0016577073167983146, "loss": 0.2084, "step": 28294 }, { "epoch": 0.050171146803264954, "grad_norm": 2.140625, "learning_rate": 0.001657660601851087, "loss": 0.2231, "step": 28296 }, { "epoch": 0.05017469296857477, "grad_norm": 6.46875, "learning_rate": 0.001657613884464976, "loss": 0.2285, "step": 28298 }, { "epoch": 0.05017823913388458, "grad_norm": 1.375, "learning_rate": 0.001657567164640186, "loss": 0.2802, "step": 28300 }, { "epoch": 0.0501817852991944, "grad_norm": 0.7578125, "learning_rate": 0.0016575204423769205, "loss": 0.2527, "step": 28302 }, { "epoch": 0.05018533146450421, "grad_norm": 1.0234375, "learning_rate": 0.0016574737176753849, "loss": 0.2037, "step": 28304 }, { "epoch": 0.05018887762981403, "grad_norm": 0.3046875, "learning_rate": 0.001657426990535783, "loss": 0.1911, "step": 28306 }, { "epoch": 0.05019242379512385, "grad_norm": 0.59765625, "learning_rate": 0.0016573802609583196, "loss": 0.2783, "step": 28308 }, { "epoch": 0.05019596996043366, "grad_norm": 0.423828125, "learning_rate": 0.001657333528943198, "loss": 0.2279, "step": 28310 }, { "epoch": 0.05019951612574348, "grad_norm": 1.2109375, "learning_rate": 0.0016572867944906241, "loss": 0.2399, "step": 28312 }, { "epoch": 0.05020306229105329, "grad_norm": 2.921875, "learning_rate": 0.0016572400576008004, "loss": 0.3888, "step": 28314 }, { "epoch": 0.05020660845636311, "grad_norm": 0.23828125, "learning_rate": 0.0016571933182739332, "loss": 0.1937, "step": 28316 }, { "epoch": 0.05021015462167292, "grad_norm": 0.42578125, "learning_rate": 0.0016571465765102258, "loss": 0.2018, "step": 28318 }, { "epoch": 0.050213700786982736, "grad_norm": 1.4296875, "learning_rate": 0.0016570998323098831, "loss": 0.2899, "step": 28320 }, { "epoch": 0.05021724695229255, "grad_norm": 0.671875, "learning_rate": 0.0016570530856731091, "loss": 0.2695, "step": 28322 }, { "epoch": 0.050220793117602365, "grad_norm": 1.8515625, "learning_rate": 0.0016570063366001086, "loss": 0.198, "step": 28324 }, { "epoch": 0.05022433928291218, "grad_norm": 0.4296875, "learning_rate": 0.0016569595850910862, "loss": 0.2423, "step": 28326 }, { "epoch": 0.050227885448221994, "grad_norm": 0.58203125, "learning_rate": 0.0016569128311462455, "loss": 0.2095, "step": 28328 }, { "epoch": 0.050231431613531816, "grad_norm": 3.15625, "learning_rate": 0.0016568660747657918, "loss": 0.3738, "step": 28330 }, { "epoch": 0.05023497777884163, "grad_norm": 0.271484375, "learning_rate": 0.0016568193159499295, "loss": 0.1645, "step": 28332 }, { "epoch": 0.050238523944151445, "grad_norm": 0.61328125, "learning_rate": 0.001656772554698863, "loss": 0.1884, "step": 28334 }, { "epoch": 0.05024207010946126, "grad_norm": 1.8515625, "learning_rate": 0.0016567257910127967, "loss": 0.2278, "step": 28336 }, { "epoch": 0.050245616274771074, "grad_norm": 0.66015625, "learning_rate": 0.001656679024891935, "loss": 0.1277, "step": 28338 }, { "epoch": 0.05024916244008089, "grad_norm": 0.85546875, "learning_rate": 0.0016566322563364828, "loss": 0.1521, "step": 28340 }, { "epoch": 0.0502527086053907, "grad_norm": 0.671875, "learning_rate": 0.0016565854853466443, "loss": 0.2182, "step": 28342 }, { "epoch": 0.05025625477070052, "grad_norm": 0.6640625, "learning_rate": 0.0016565387119226243, "loss": 0.3202, "step": 28344 }, { "epoch": 0.05025980093601033, "grad_norm": 1.25, "learning_rate": 0.001656491936064627, "loss": 0.4765, "step": 28346 }, { "epoch": 0.05026334710132015, "grad_norm": 1.4921875, "learning_rate": 0.0016564451577728574, "loss": 0.2112, "step": 28348 }, { "epoch": 0.05026689326662996, "grad_norm": 2.1875, "learning_rate": 0.0016563983770475198, "loss": 0.1927, "step": 28350 }, { "epoch": 0.050270439431939776, "grad_norm": 0.478515625, "learning_rate": 0.0016563515938888191, "loss": 0.2089, "step": 28352 }, { "epoch": 0.0502739855972496, "grad_norm": 1.078125, "learning_rate": 0.0016563048082969597, "loss": 0.2637, "step": 28354 }, { "epoch": 0.05027753176255941, "grad_norm": 1.2734375, "learning_rate": 0.0016562580202721462, "loss": 0.2384, "step": 28356 }, { "epoch": 0.050281077927869226, "grad_norm": 0.51171875, "learning_rate": 0.001656211229814583, "loss": 0.2029, "step": 28358 }, { "epoch": 0.05028462409317904, "grad_norm": 0.515625, "learning_rate": 0.001656164436924475, "loss": 0.1587, "step": 28360 }, { "epoch": 0.050288170258488855, "grad_norm": 0.58984375, "learning_rate": 0.0016561176416020269, "loss": 0.1497, "step": 28362 }, { "epoch": 0.05029171642379867, "grad_norm": 1.0078125, "learning_rate": 0.0016560708438474435, "loss": 0.2094, "step": 28364 }, { "epoch": 0.050295262589108485, "grad_norm": 0.44140625, "learning_rate": 0.001656024043660929, "loss": 0.1797, "step": 28366 }, { "epoch": 0.0502988087544183, "grad_norm": 2.546875, "learning_rate": 0.0016559772410426883, "loss": 0.2939, "step": 28368 }, { "epoch": 0.050302354919728114, "grad_norm": 2.71875, "learning_rate": 0.0016559304359929261, "loss": 0.3236, "step": 28370 }, { "epoch": 0.05030590108503793, "grad_norm": 0.2470703125, "learning_rate": 0.0016558836285118473, "loss": 0.313, "step": 28372 }, { "epoch": 0.05030944725034774, "grad_norm": 1.0546875, "learning_rate": 0.0016558368185996559, "loss": 0.2158, "step": 28374 }, { "epoch": 0.050312993415657564, "grad_norm": 0.921875, "learning_rate": 0.0016557900062565576, "loss": 0.2823, "step": 28376 }, { "epoch": 0.05031653958096738, "grad_norm": 0.447265625, "learning_rate": 0.0016557431914827566, "loss": 0.1624, "step": 28378 }, { "epoch": 0.05032008574627719, "grad_norm": 0.330078125, "learning_rate": 0.0016556963742784577, "loss": 0.1649, "step": 28380 }, { "epoch": 0.05032363191158701, "grad_norm": 0.59375, "learning_rate": 0.0016556495546438653, "loss": 0.2618, "step": 28382 }, { "epoch": 0.05032717807689682, "grad_norm": 1.921875, "learning_rate": 0.001655602732579185, "loss": 0.3033, "step": 28384 }, { "epoch": 0.05033072424220664, "grad_norm": 0.275390625, "learning_rate": 0.0016555559080846207, "loss": 0.1851, "step": 28386 }, { "epoch": 0.05033427040751645, "grad_norm": 0.49609375, "learning_rate": 0.0016555090811603778, "loss": 0.2547, "step": 28388 }, { "epoch": 0.050337816572826266, "grad_norm": 1.21875, "learning_rate": 0.0016554622518066607, "loss": 0.2015, "step": 28390 }, { "epoch": 0.05034136273813608, "grad_norm": 0.28125, "learning_rate": 0.0016554154200236744, "loss": 0.1935, "step": 28392 }, { "epoch": 0.050344908903445895, "grad_norm": 0.486328125, "learning_rate": 0.0016553685858116235, "loss": 0.2092, "step": 28394 }, { "epoch": 0.05034845506875571, "grad_norm": 0.86328125, "learning_rate": 0.0016553217491707133, "loss": 0.2217, "step": 28396 }, { "epoch": 0.05035200123406553, "grad_norm": 0.4296875, "learning_rate": 0.0016552749101011483, "loss": 0.2027, "step": 28398 }, { "epoch": 0.050355547399375346, "grad_norm": 0.439453125, "learning_rate": 0.0016552280686031333, "loss": 0.214, "step": 28400 }, { "epoch": 0.05035909356468516, "grad_norm": 0.68359375, "learning_rate": 0.0016551812246768734, "loss": 0.1738, "step": 28402 }, { "epoch": 0.050362639729994975, "grad_norm": 0.671875, "learning_rate": 0.0016551343783225729, "loss": 0.1951, "step": 28404 }, { "epoch": 0.05036618589530479, "grad_norm": 0.984375, "learning_rate": 0.0016550875295404377, "loss": 0.2178, "step": 28406 }, { "epoch": 0.050369732060614604, "grad_norm": 0.9765625, "learning_rate": 0.0016550406783306716, "loss": 0.3385, "step": 28408 }, { "epoch": 0.05037327822592442, "grad_norm": 0.54296875, "learning_rate": 0.0016549938246934802, "loss": 0.2505, "step": 28410 }, { "epoch": 0.05037682439123423, "grad_norm": 1.4375, "learning_rate": 0.001654946968629068, "loss": 0.2925, "step": 28412 }, { "epoch": 0.05038037055654405, "grad_norm": 0.421875, "learning_rate": 0.0016549001101376405, "loss": 0.2381, "step": 28414 }, { "epoch": 0.05038391672185386, "grad_norm": 0.388671875, "learning_rate": 0.0016548532492194018, "loss": 0.3066, "step": 28416 }, { "epoch": 0.05038746288716368, "grad_norm": 0.349609375, "learning_rate": 0.0016548063858745576, "loss": 0.1539, "step": 28418 }, { "epoch": 0.05039100905247349, "grad_norm": 1.2578125, "learning_rate": 0.0016547595201033122, "loss": 0.2224, "step": 28420 }, { "epoch": 0.05039455521778331, "grad_norm": 0.44921875, "learning_rate": 0.001654712651905871, "loss": 0.1616, "step": 28422 }, { "epoch": 0.05039810138309313, "grad_norm": 0.38671875, "learning_rate": 0.001654665781282439, "loss": 0.2092, "step": 28424 }, { "epoch": 0.05040164754840294, "grad_norm": 1.1015625, "learning_rate": 0.0016546189082332211, "loss": 0.2487, "step": 28426 }, { "epoch": 0.050405193713712756, "grad_norm": 0.703125, "learning_rate": 0.001654572032758422, "loss": 0.3997, "step": 28428 }, { "epoch": 0.05040873987902257, "grad_norm": 0.8125, "learning_rate": 0.0016545251548582471, "loss": 0.1872, "step": 28430 }, { "epoch": 0.050412286044332386, "grad_norm": 0.427734375, "learning_rate": 0.0016544782745329014, "loss": 0.2251, "step": 28432 }, { "epoch": 0.0504158322096422, "grad_norm": 0.375, "learning_rate": 0.00165443139178259, "loss": 0.1772, "step": 28434 }, { "epoch": 0.050419378374952015, "grad_norm": 0.91796875, "learning_rate": 0.001654384506607517, "loss": 0.2359, "step": 28436 }, { "epoch": 0.05042292454026183, "grad_norm": 1.40625, "learning_rate": 0.001654337619007889, "loss": 0.211, "step": 28438 }, { "epoch": 0.050426470705571644, "grad_norm": 0.57421875, "learning_rate": 0.0016542907289839098, "loss": 0.3137, "step": 28440 }, { "epoch": 0.05043001687088146, "grad_norm": 0.54296875, "learning_rate": 0.0016542438365357852, "loss": 0.2254, "step": 28442 }, { "epoch": 0.05043356303619128, "grad_norm": 0.5546875, "learning_rate": 0.0016541969416637198, "loss": 0.231, "step": 28444 }, { "epoch": 0.050437109201501094, "grad_norm": 0.7890625, "learning_rate": 0.001654150044367919, "loss": 0.346, "step": 28446 }, { "epoch": 0.05044065536681091, "grad_norm": 0.42578125, "learning_rate": 0.0016541031446485877, "loss": 0.2195, "step": 28448 }, { "epoch": 0.050444201532120723, "grad_norm": 0.33203125, "learning_rate": 0.001654056242505931, "loss": 0.224, "step": 28450 }, { "epoch": 0.05044774769743054, "grad_norm": 2.078125, "learning_rate": 0.0016540093379401543, "loss": 0.189, "step": 28452 }, { "epoch": 0.05045129386274035, "grad_norm": 1.5234375, "learning_rate": 0.0016539624309514626, "loss": 0.323, "step": 28454 }, { "epoch": 0.05045484002805017, "grad_norm": 0.80078125, "learning_rate": 0.001653915521540061, "loss": 0.2229, "step": 28456 }, { "epoch": 0.05045838619335998, "grad_norm": 0.408203125, "learning_rate": 0.0016538686097061547, "loss": 0.2165, "step": 28458 }, { "epoch": 0.050461932358669796, "grad_norm": 0.6640625, "learning_rate": 0.0016538216954499485, "loss": 0.2192, "step": 28460 }, { "epoch": 0.05046547852397961, "grad_norm": 0.55078125, "learning_rate": 0.0016537747787716483, "loss": 0.255, "step": 28462 }, { "epoch": 0.050469024689289425, "grad_norm": 0.80859375, "learning_rate": 0.0016537278596714584, "loss": 0.223, "step": 28464 }, { "epoch": 0.05047257085459925, "grad_norm": 0.5859375, "learning_rate": 0.001653680938149585, "loss": 0.1777, "step": 28466 }, { "epoch": 0.05047611701990906, "grad_norm": 0.375, "learning_rate": 0.0016536340142062325, "loss": 0.1761, "step": 28468 }, { "epoch": 0.050479663185218876, "grad_norm": 0.79296875, "learning_rate": 0.0016535870878416066, "loss": 0.2346, "step": 28470 }, { "epoch": 0.05048320935052869, "grad_norm": 3.796875, "learning_rate": 0.001653540159055912, "loss": 0.2976, "step": 28472 }, { "epoch": 0.050486755515838505, "grad_norm": 0.71484375, "learning_rate": 0.0016534932278493549, "loss": 0.2349, "step": 28474 }, { "epoch": 0.05049030168114832, "grad_norm": 0.8046875, "learning_rate": 0.0016534462942221393, "loss": 0.1838, "step": 28476 }, { "epoch": 0.050493847846458134, "grad_norm": 0.625, "learning_rate": 0.0016533993581744715, "loss": 0.5133, "step": 28478 }, { "epoch": 0.05049739401176795, "grad_norm": 1.6953125, "learning_rate": 0.001653352419706556, "loss": 0.2398, "step": 28480 }, { "epoch": 0.05050094017707776, "grad_norm": 0.447265625, "learning_rate": 0.0016533054788185986, "loss": 0.2115, "step": 28482 }, { "epoch": 0.05050448634238758, "grad_norm": 0.7421875, "learning_rate": 0.0016532585355108047, "loss": 0.2186, "step": 28484 }, { "epoch": 0.05050803250769739, "grad_norm": 0.46484375, "learning_rate": 0.001653211589783379, "loss": 0.1774, "step": 28486 }, { "epoch": 0.05051157867300721, "grad_norm": 0.47265625, "learning_rate": 0.0016531646416365275, "loss": 0.1943, "step": 28488 }, { "epoch": 0.05051512483831703, "grad_norm": 10.125, "learning_rate": 0.001653117691070455, "loss": 0.3016, "step": 28490 }, { "epoch": 0.05051867100362684, "grad_norm": 0.462890625, "learning_rate": 0.0016530707380853667, "loss": 0.26, "step": 28492 }, { "epoch": 0.05052221716893666, "grad_norm": 0.392578125, "learning_rate": 0.0016530237826814685, "loss": 0.1705, "step": 28494 }, { "epoch": 0.05052576333424647, "grad_norm": 0.6796875, "learning_rate": 0.0016529768248589656, "loss": 0.4452, "step": 28496 }, { "epoch": 0.05052930949955629, "grad_norm": 1.9140625, "learning_rate": 0.0016529298646180634, "loss": 0.1802, "step": 28498 }, { "epoch": 0.0505328556648661, "grad_norm": 0.30859375, "learning_rate": 0.0016528829019589665, "loss": 0.1679, "step": 28500 }, { "epoch": 0.050536401830175916, "grad_norm": 0.2578125, "learning_rate": 0.0016528359368818816, "loss": 0.3359, "step": 28502 }, { "epoch": 0.05053994799548573, "grad_norm": 0.62109375, "learning_rate": 0.001652788969387013, "loss": 0.2422, "step": 28504 }, { "epoch": 0.050543494160795545, "grad_norm": 0.197265625, "learning_rate": 0.0016527419994745668, "loss": 0.1634, "step": 28506 }, { "epoch": 0.05054704032610536, "grad_norm": 0.369140625, "learning_rate": 0.0016526950271447483, "loss": 0.1769, "step": 28508 }, { "epoch": 0.050550586491415174, "grad_norm": 0.21484375, "learning_rate": 0.0016526480523977622, "loss": 0.2307, "step": 28510 }, { "epoch": 0.050554132656724995, "grad_norm": 0.921875, "learning_rate": 0.0016526010752338151, "loss": 0.2101, "step": 28512 }, { "epoch": 0.05055767882203481, "grad_norm": 1.8046875, "learning_rate": 0.0016525540956531118, "loss": 0.1986, "step": 28514 }, { "epoch": 0.050561224987344625, "grad_norm": 0.390625, "learning_rate": 0.001652507113655858, "loss": 0.2451, "step": 28516 }, { "epoch": 0.05056477115265444, "grad_norm": 1.4375, "learning_rate": 0.0016524601292422587, "loss": 0.2841, "step": 28518 }, { "epoch": 0.050568317317964254, "grad_norm": 0.3828125, "learning_rate": 0.0016524131424125199, "loss": 0.4561, "step": 28520 }, { "epoch": 0.05057186348327407, "grad_norm": 0.58984375, "learning_rate": 0.0016523661531668468, "loss": 0.462, "step": 28522 }, { "epoch": 0.05057540964858388, "grad_norm": 0.8828125, "learning_rate": 0.0016523191615054448, "loss": 0.1718, "step": 28524 }, { "epoch": 0.0505789558138937, "grad_norm": 0.6640625, "learning_rate": 0.00165227216742852, "loss": 0.2612, "step": 28526 }, { "epoch": 0.05058250197920351, "grad_norm": 0.408203125, "learning_rate": 0.0016522251709362775, "loss": 0.1762, "step": 28528 }, { "epoch": 0.050586048144513326, "grad_norm": 0.359375, "learning_rate": 0.0016521781720289224, "loss": 0.2132, "step": 28530 }, { "epoch": 0.05058959430982314, "grad_norm": 0.470703125, "learning_rate": 0.0016521311707066613, "loss": 0.2066, "step": 28532 }, { "epoch": 0.05059314047513296, "grad_norm": 0.37109375, "learning_rate": 0.001652084166969699, "loss": 0.2045, "step": 28534 }, { "epoch": 0.05059668664044278, "grad_norm": 0.84765625, "learning_rate": 0.001652037160818241, "loss": 0.1783, "step": 28536 }, { "epoch": 0.05060023280575259, "grad_norm": 1.4609375, "learning_rate": 0.0016519901522524933, "loss": 0.2025, "step": 28538 }, { "epoch": 0.050603778971062406, "grad_norm": 0.6484375, "learning_rate": 0.001651943141272661, "loss": 0.2168, "step": 28540 }, { "epoch": 0.05060732513637222, "grad_norm": 1.6171875, "learning_rate": 0.0016518961278789506, "loss": 0.4143, "step": 28542 }, { "epoch": 0.050610871301682035, "grad_norm": 0.431640625, "learning_rate": 0.0016518491120715666, "loss": 0.1851, "step": 28544 }, { "epoch": 0.05061441746699185, "grad_norm": 0.96484375, "learning_rate": 0.0016518020938507154, "loss": 0.2218, "step": 28546 }, { "epoch": 0.050617963632301664, "grad_norm": 0.53125, "learning_rate": 0.0016517550732166025, "loss": 0.185, "step": 28548 }, { "epoch": 0.05062150979761148, "grad_norm": 0.52734375, "learning_rate": 0.001651708050169433, "loss": 0.2293, "step": 28550 }, { "epoch": 0.05062505596292129, "grad_norm": 0.2451171875, "learning_rate": 0.0016516610247094131, "loss": 0.1684, "step": 28552 }, { "epoch": 0.05062860212823111, "grad_norm": 0.8046875, "learning_rate": 0.0016516139968367485, "loss": 0.1954, "step": 28554 }, { "epoch": 0.05063214829354092, "grad_norm": 0.86328125, "learning_rate": 0.0016515669665516446, "loss": 0.2238, "step": 28556 }, { "epoch": 0.050635694458850744, "grad_norm": 4.0, "learning_rate": 0.0016515199338543072, "loss": 0.3887, "step": 28558 }, { "epoch": 0.05063924062416056, "grad_norm": 0.53125, "learning_rate": 0.001651472898744942, "loss": 0.2151, "step": 28560 }, { "epoch": 0.05064278678947037, "grad_norm": 0.2890625, "learning_rate": 0.0016514258612237544, "loss": 0.2271, "step": 28562 }, { "epoch": 0.05064633295478019, "grad_norm": 7.59375, "learning_rate": 0.0016513788212909508, "loss": 0.2133, "step": 28564 }, { "epoch": 0.05064987912009, "grad_norm": 0.375, "learning_rate": 0.0016513317789467363, "loss": 0.206, "step": 28566 }, { "epoch": 0.05065342528539982, "grad_norm": 0.314453125, "learning_rate": 0.0016512847341913167, "loss": 0.2106, "step": 28568 }, { "epoch": 0.05065697145070963, "grad_norm": 0.3828125, "learning_rate": 0.0016512376870248982, "loss": 0.2003, "step": 28570 }, { "epoch": 0.050660517616019446, "grad_norm": 2.296875, "learning_rate": 0.001651190637447686, "loss": 0.2541, "step": 28572 }, { "epoch": 0.05066406378132926, "grad_norm": 2.0, "learning_rate": 0.0016511435854598864, "loss": 0.342, "step": 28574 }, { "epoch": 0.050667609946639075, "grad_norm": 0.3828125, "learning_rate": 0.0016510965310617047, "loss": 0.1684, "step": 28576 }, { "epoch": 0.05067115611194889, "grad_norm": 0.65234375, "learning_rate": 0.0016510494742533468, "loss": 0.1694, "step": 28578 }, { "epoch": 0.05067470227725871, "grad_norm": 0.96484375, "learning_rate": 0.0016510024150350187, "loss": 0.1838, "step": 28580 }, { "epoch": 0.050678248442568526, "grad_norm": 0.76953125, "learning_rate": 0.001650955353406926, "loss": 0.1979, "step": 28582 }, { "epoch": 0.05068179460787834, "grad_norm": 0.79296875, "learning_rate": 0.001650908289369275, "loss": 0.1963, "step": 28584 }, { "epoch": 0.050685340773188155, "grad_norm": 0.59765625, "learning_rate": 0.001650861222922271, "loss": 0.1702, "step": 28586 }, { "epoch": 0.05068888693849797, "grad_norm": 0.6953125, "learning_rate": 0.0016508141540661197, "loss": 0.2374, "step": 28588 }, { "epoch": 0.050692433103807784, "grad_norm": 0.859375, "learning_rate": 0.0016507670828010276, "loss": 0.2139, "step": 28590 }, { "epoch": 0.0506959792691176, "grad_norm": 0.8671875, "learning_rate": 0.0016507200091272, "loss": 0.1982, "step": 28592 }, { "epoch": 0.05069952543442741, "grad_norm": 0.490234375, "learning_rate": 0.001650672933044843, "loss": 0.1916, "step": 28594 }, { "epoch": 0.05070307159973723, "grad_norm": 1.421875, "learning_rate": 0.0016506258545541622, "loss": 0.1966, "step": 28596 }, { "epoch": 0.05070661776504704, "grad_norm": 0.7890625, "learning_rate": 0.0016505787736553641, "loss": 0.2751, "step": 28598 }, { "epoch": 0.05071016393035686, "grad_norm": 0.625, "learning_rate": 0.0016505316903486543, "loss": 0.1882, "step": 28600 }, { "epoch": 0.05071371009566668, "grad_norm": 2.546875, "learning_rate": 0.0016504846046342383, "loss": 0.4404, "step": 28602 }, { "epoch": 0.05071725626097649, "grad_norm": 0.7265625, "learning_rate": 0.0016504375165123228, "loss": 0.2123, "step": 28604 }, { "epoch": 0.05072080242628631, "grad_norm": 1.3125, "learning_rate": 0.001650390425983113, "loss": 0.1901, "step": 28606 }, { "epoch": 0.05072434859159612, "grad_norm": 0.5390625, "learning_rate": 0.0016503433330468152, "loss": 0.241, "step": 28608 }, { "epoch": 0.050727894756905936, "grad_norm": 0.97265625, "learning_rate": 0.0016502962377036357, "loss": 0.2277, "step": 28610 }, { "epoch": 0.05073144092221575, "grad_norm": 0.5546875, "learning_rate": 0.0016502491399537798, "loss": 0.313, "step": 28612 }, { "epoch": 0.050734987087525565, "grad_norm": 0.7734375, "learning_rate": 0.0016502020397974539, "loss": 0.5285, "step": 28614 }, { "epoch": 0.05073853325283538, "grad_norm": 0.341796875, "learning_rate": 0.0016501549372348642, "loss": 0.1737, "step": 28616 }, { "epoch": 0.050742079418145195, "grad_norm": 0.484375, "learning_rate": 0.0016501078322662158, "loss": 0.1719, "step": 28618 }, { "epoch": 0.05074562558345501, "grad_norm": 1.25, "learning_rate": 0.0016500607248917155, "loss": 0.2139, "step": 28620 }, { "epoch": 0.050749171748764824, "grad_norm": 0.58203125, "learning_rate": 0.001650013615111569, "loss": 0.1895, "step": 28622 }, { "epoch": 0.05075271791407464, "grad_norm": 0.8359375, "learning_rate": 0.0016499665029259824, "loss": 0.3267, "step": 28624 }, { "epoch": 0.05075626407938446, "grad_norm": 0.42578125, "learning_rate": 0.001649919388335162, "loss": 0.1946, "step": 28626 }, { "epoch": 0.050759810244694274, "grad_norm": 0.314453125, "learning_rate": 0.0016498722713393138, "loss": 0.1703, "step": 28628 }, { "epoch": 0.05076335641000409, "grad_norm": 0.369140625, "learning_rate": 0.0016498251519386433, "loss": 0.1619, "step": 28630 }, { "epoch": 0.0507669025753139, "grad_norm": 1.2265625, "learning_rate": 0.001649778030133357, "loss": 0.3634, "step": 28632 }, { "epoch": 0.05077044874062372, "grad_norm": 1.0625, "learning_rate": 0.001649730905923661, "loss": 0.2644, "step": 28634 }, { "epoch": 0.05077399490593353, "grad_norm": 0.7890625, "learning_rate": 0.0016496837793097613, "loss": 0.1897, "step": 28636 }, { "epoch": 0.05077754107124335, "grad_norm": 1.3203125, "learning_rate": 0.001649636650291864, "loss": 0.1791, "step": 28638 }, { "epoch": 0.05078108723655316, "grad_norm": 0.322265625, "learning_rate": 0.0016495895188701755, "loss": 0.1733, "step": 28640 }, { "epoch": 0.050784633401862976, "grad_norm": 0.396484375, "learning_rate": 0.0016495423850449017, "loss": 0.1832, "step": 28642 }, { "epoch": 0.05078817956717279, "grad_norm": 0.349609375, "learning_rate": 0.0016494952488162484, "loss": 0.1928, "step": 28644 }, { "epoch": 0.050791725732482605, "grad_norm": 0.72265625, "learning_rate": 0.0016494481101844225, "loss": 0.1984, "step": 28646 }, { "epoch": 0.05079527189779243, "grad_norm": 0.78125, "learning_rate": 0.0016494009691496296, "loss": 0.2059, "step": 28648 }, { "epoch": 0.05079881806310224, "grad_norm": 0.6640625, "learning_rate": 0.0016493538257120754, "loss": 0.2088, "step": 28650 }, { "epoch": 0.050802364228412056, "grad_norm": 2.0625, "learning_rate": 0.0016493066798719674, "loss": 0.4083, "step": 28652 }, { "epoch": 0.05080591039372187, "grad_norm": 0.625, "learning_rate": 0.001649259531629511, "loss": 0.2082, "step": 28654 }, { "epoch": 0.050809456559031685, "grad_norm": 0.6015625, "learning_rate": 0.0016492123809849122, "loss": 0.3202, "step": 28656 }, { "epoch": 0.0508130027243415, "grad_norm": 2.09375, "learning_rate": 0.0016491652279383771, "loss": 0.2159, "step": 28658 }, { "epoch": 0.050816548889651314, "grad_norm": 0.267578125, "learning_rate": 0.0016491180724901129, "loss": 0.2132, "step": 28660 }, { "epoch": 0.05082009505496113, "grad_norm": 0.45703125, "learning_rate": 0.0016490709146403252, "loss": 0.1729, "step": 28662 }, { "epoch": 0.05082364122027094, "grad_norm": 0.435546875, "learning_rate": 0.0016490237543892198, "loss": 0.2795, "step": 28664 }, { "epoch": 0.05082718738558076, "grad_norm": 3.390625, "learning_rate": 0.0016489765917370037, "loss": 0.3065, "step": 28666 }, { "epoch": 0.05083073355089057, "grad_norm": 0.4921875, "learning_rate": 0.0016489294266838828, "loss": 0.2114, "step": 28668 }, { "epoch": 0.050834279716200394, "grad_norm": 0.267578125, "learning_rate": 0.0016488822592300636, "loss": 0.2118, "step": 28670 }, { "epoch": 0.05083782588151021, "grad_norm": 0.337890625, "learning_rate": 0.001648835089375752, "loss": 0.2019, "step": 28672 }, { "epoch": 0.05084137204682002, "grad_norm": 0.52734375, "learning_rate": 0.001648787917121155, "loss": 0.2566, "step": 28674 }, { "epoch": 0.05084491821212984, "grad_norm": 0.259765625, "learning_rate": 0.001648740742466478, "loss": 0.1995, "step": 28676 }, { "epoch": 0.05084846437743965, "grad_norm": 0.68359375, "learning_rate": 0.0016486935654119275, "loss": 0.184, "step": 28678 }, { "epoch": 0.050852010542749466, "grad_norm": 1.046875, "learning_rate": 0.0016486463859577106, "loss": 0.268, "step": 28680 }, { "epoch": 0.05085555670805928, "grad_norm": 0.287109375, "learning_rate": 0.0016485992041040329, "loss": 0.3128, "step": 28682 }, { "epoch": 0.050859102873369096, "grad_norm": 0.3125, "learning_rate": 0.001648552019851101, "loss": 0.3102, "step": 28684 }, { "epoch": 0.05086264903867891, "grad_norm": 0.26171875, "learning_rate": 0.001648504833199121, "loss": 0.1639, "step": 28686 }, { "epoch": 0.050866195203988725, "grad_norm": 0.263671875, "learning_rate": 0.0016484576441482997, "loss": 0.2285, "step": 28688 }, { "epoch": 0.05086974136929854, "grad_norm": 1.7421875, "learning_rate": 0.0016484104526988427, "loss": 0.2878, "step": 28690 }, { "epoch": 0.050873287534608354, "grad_norm": 0.72265625, "learning_rate": 0.0016483632588509575, "loss": 0.2177, "step": 28692 }, { "epoch": 0.050876833699918175, "grad_norm": 0.474609375, "learning_rate": 0.0016483160626048498, "loss": 0.1657, "step": 28694 }, { "epoch": 0.05088037986522799, "grad_norm": 0.419921875, "learning_rate": 0.0016482688639607264, "loss": 0.1881, "step": 28696 }, { "epoch": 0.050883926030537804, "grad_norm": 1.609375, "learning_rate": 0.0016482216629187933, "loss": 0.2206, "step": 28698 }, { "epoch": 0.05088747219584762, "grad_norm": 0.271484375, "learning_rate": 0.0016481744594792568, "loss": 0.1681, "step": 28700 }, { "epoch": 0.050891018361157433, "grad_norm": 1.8828125, "learning_rate": 0.0016481272536423238, "loss": 0.3243, "step": 28702 }, { "epoch": 0.05089456452646725, "grad_norm": 0.443359375, "learning_rate": 0.0016480800454082008, "loss": 0.1967, "step": 28704 }, { "epoch": 0.05089811069177706, "grad_norm": 0.345703125, "learning_rate": 0.0016480328347770938, "loss": 0.1687, "step": 28706 }, { "epoch": 0.05090165685708688, "grad_norm": 0.6875, "learning_rate": 0.0016479856217492094, "loss": 0.3906, "step": 28708 }, { "epoch": 0.05090520302239669, "grad_norm": 0.365234375, "learning_rate": 0.0016479384063247545, "loss": 0.1591, "step": 28710 }, { "epoch": 0.050908749187706506, "grad_norm": 0.404296875, "learning_rate": 0.0016478911885039352, "loss": 0.2264, "step": 28712 }, { "epoch": 0.05091229535301632, "grad_norm": 0.75, "learning_rate": 0.001647843968286958, "loss": 0.1519, "step": 28714 }, { "epoch": 0.05091584151832614, "grad_norm": 0.330078125, "learning_rate": 0.0016477967456740292, "loss": 0.4789, "step": 28716 }, { "epoch": 0.05091938768363596, "grad_norm": 0.42578125, "learning_rate": 0.0016477495206653563, "loss": 0.2105, "step": 28718 }, { "epoch": 0.05092293384894577, "grad_norm": 0.322265625, "learning_rate": 0.0016477022932611448, "loss": 0.1669, "step": 28720 }, { "epoch": 0.050926480014255586, "grad_norm": 1.1015625, "learning_rate": 0.001647655063461602, "loss": 0.2072, "step": 28722 }, { "epoch": 0.0509300261795654, "grad_norm": 2.328125, "learning_rate": 0.0016476078312669333, "loss": 0.1838, "step": 28724 }, { "epoch": 0.050933572344875215, "grad_norm": 0.484375, "learning_rate": 0.0016475605966773468, "loss": 0.2668, "step": 28726 }, { "epoch": 0.05093711851018503, "grad_norm": 0.88671875, "learning_rate": 0.0016475133596930479, "loss": 0.1725, "step": 28728 }, { "epoch": 0.050940664675494844, "grad_norm": 0.37890625, "learning_rate": 0.0016474661203142436, "loss": 0.1998, "step": 28730 }, { "epoch": 0.05094421084080466, "grad_norm": 1.9296875, "learning_rate": 0.0016474188785411405, "loss": 0.204, "step": 28732 }, { "epoch": 0.05094775700611447, "grad_norm": 1.796875, "learning_rate": 0.0016473716343739454, "loss": 0.2857, "step": 28734 }, { "epoch": 0.05095130317142429, "grad_norm": 0.427734375, "learning_rate": 0.0016473243878128649, "loss": 0.247, "step": 28736 }, { "epoch": 0.05095484933673411, "grad_norm": 0.392578125, "learning_rate": 0.0016472771388581049, "loss": 0.325, "step": 28738 }, { "epoch": 0.050958395502043924, "grad_norm": 0.41796875, "learning_rate": 0.0016472298875098727, "loss": 0.2842, "step": 28740 }, { "epoch": 0.05096194166735374, "grad_norm": 0.87890625, "learning_rate": 0.0016471826337683753, "loss": 0.1386, "step": 28742 }, { "epoch": 0.05096548783266355, "grad_norm": 0.5859375, "learning_rate": 0.0016471353776338182, "loss": 0.1798, "step": 28744 }, { "epoch": 0.05096903399797337, "grad_norm": 0.65234375, "learning_rate": 0.0016470881191064095, "loss": 0.2383, "step": 28746 }, { "epoch": 0.05097258016328318, "grad_norm": 0.46875, "learning_rate": 0.0016470408581863547, "loss": 0.2776, "step": 28748 }, { "epoch": 0.050976126328593, "grad_norm": 0.57421875, "learning_rate": 0.0016469935948738613, "loss": 0.2373, "step": 28750 }, { "epoch": 0.05097967249390281, "grad_norm": 0.41015625, "learning_rate": 0.0016469463291691351, "loss": 0.1622, "step": 28752 }, { "epoch": 0.050983218659212626, "grad_norm": 0.404296875, "learning_rate": 0.001646899061072384, "loss": 0.2334, "step": 28754 }, { "epoch": 0.05098676482452244, "grad_norm": 0.4765625, "learning_rate": 0.0016468517905838136, "loss": 0.1622, "step": 28756 }, { "epoch": 0.050990310989832255, "grad_norm": 0.5625, "learning_rate": 0.0016468045177036315, "loss": 0.1986, "step": 28758 }, { "epoch": 0.05099385715514207, "grad_norm": 0.5, "learning_rate": 0.0016467572424320436, "loss": 0.1887, "step": 28760 }, { "epoch": 0.05099740332045189, "grad_norm": 0.69921875, "learning_rate": 0.0016467099647692574, "loss": 0.182, "step": 28762 }, { "epoch": 0.051000949485761705, "grad_norm": 0.40625, "learning_rate": 0.0016466626847154793, "loss": 0.1693, "step": 28764 }, { "epoch": 0.05100449565107152, "grad_norm": 0.3828125, "learning_rate": 0.0016466154022709164, "loss": 0.1587, "step": 28766 }, { "epoch": 0.051008041816381335, "grad_norm": 1.296875, "learning_rate": 0.001646568117435775, "loss": 0.205, "step": 28768 }, { "epoch": 0.05101158798169115, "grad_norm": 0.408203125, "learning_rate": 0.0016465208302102621, "loss": 0.2307, "step": 28770 }, { "epoch": 0.051015134147000964, "grad_norm": 0.5078125, "learning_rate": 0.0016464735405945846, "loss": 0.1716, "step": 28772 }, { "epoch": 0.05101868031231078, "grad_norm": 0.546875, "learning_rate": 0.0016464262485889492, "loss": 0.1816, "step": 28774 }, { "epoch": 0.05102222647762059, "grad_norm": 1.203125, "learning_rate": 0.001646378954193563, "loss": 0.2674, "step": 28776 }, { "epoch": 0.05102577264293041, "grad_norm": 0.494140625, "learning_rate": 0.0016463316574086325, "loss": 0.1873, "step": 28778 }, { "epoch": 0.05102931880824022, "grad_norm": 1.5234375, "learning_rate": 0.0016462843582343647, "loss": 0.4065, "step": 28780 }, { "epoch": 0.051032864973550036, "grad_norm": 0.78515625, "learning_rate": 0.0016462370566709664, "loss": 0.196, "step": 28782 }, { "epoch": 0.05103641113885986, "grad_norm": 0.7578125, "learning_rate": 0.0016461897527186445, "loss": 0.23, "step": 28784 }, { "epoch": 0.05103995730416967, "grad_norm": 4.21875, "learning_rate": 0.0016461424463776058, "loss": 0.3921, "step": 28786 }, { "epoch": 0.05104350346947949, "grad_norm": 0.53125, "learning_rate": 0.0016460951376480574, "loss": 0.1672, "step": 28788 }, { "epoch": 0.0510470496347893, "grad_norm": 0.5625, "learning_rate": 0.0016460478265302061, "loss": 0.1965, "step": 28790 }, { "epoch": 0.051050595800099116, "grad_norm": 0.56640625, "learning_rate": 0.0016460005130242588, "loss": 0.19, "step": 28792 }, { "epoch": 0.05105414196540893, "grad_norm": 0.439453125, "learning_rate": 0.0016459531971304221, "loss": 0.2144, "step": 28794 }, { "epoch": 0.051057688130718745, "grad_norm": 1.2578125, "learning_rate": 0.0016459058788489036, "loss": 0.1698, "step": 28796 }, { "epoch": 0.05106123429602856, "grad_norm": 0.88671875, "learning_rate": 0.0016458585581799099, "loss": 0.1734, "step": 28798 }, { "epoch": 0.051064780461338374, "grad_norm": 0.78125, "learning_rate": 0.001645811235123648, "loss": 0.2268, "step": 28800 }, { "epoch": 0.05106832662664819, "grad_norm": 0.68359375, "learning_rate": 0.0016457639096803245, "loss": 0.1987, "step": 28802 }, { "epoch": 0.051071872791958, "grad_norm": 2.390625, "learning_rate": 0.001645716581850147, "loss": 0.3474, "step": 28804 }, { "epoch": 0.051075418957267825, "grad_norm": 0.26953125, "learning_rate": 0.0016456692516333216, "loss": 0.1969, "step": 28806 }, { "epoch": 0.05107896512257764, "grad_norm": 0.478515625, "learning_rate": 0.0016456219190300564, "loss": 0.2385, "step": 28808 }, { "epoch": 0.051082511287887454, "grad_norm": 0.2734375, "learning_rate": 0.0016455745840405576, "loss": 0.2877, "step": 28810 }, { "epoch": 0.05108605745319727, "grad_norm": 0.478515625, "learning_rate": 0.001645527246665033, "loss": 0.1883, "step": 28812 }, { "epoch": 0.05108960361850708, "grad_norm": 0.392578125, "learning_rate": 0.001645479906903688, "loss": 0.2601, "step": 28814 }, { "epoch": 0.0510931497838169, "grad_norm": 0.79296875, "learning_rate": 0.0016454325647567317, "loss": 0.1992, "step": 28816 }, { "epoch": 0.05109669594912671, "grad_norm": 0.408203125, "learning_rate": 0.0016453852202243698, "loss": 0.208, "step": 28818 }, { "epoch": 0.05110024211443653, "grad_norm": 0.625, "learning_rate": 0.00164533787330681, "loss": 0.2227, "step": 28820 }, { "epoch": 0.05110378827974634, "grad_norm": 0.5703125, "learning_rate": 0.0016452905240042588, "loss": 0.1654, "step": 28822 }, { "epoch": 0.051107334445056156, "grad_norm": 0.71875, "learning_rate": 0.0016452431723169242, "loss": 0.3266, "step": 28824 }, { "epoch": 0.05111088061036597, "grad_norm": 0.73828125, "learning_rate": 0.001645195818245012, "loss": 0.2287, "step": 28826 }, { "epoch": 0.051114426775675785, "grad_norm": 0.5390625, "learning_rate": 0.00164514846178873, "loss": 0.1434, "step": 28828 }, { "epoch": 0.051117972940985607, "grad_norm": 0.92578125, "learning_rate": 0.0016451011029482855, "loss": 0.2887, "step": 28830 }, { "epoch": 0.05112151910629542, "grad_norm": 0.58984375, "learning_rate": 0.0016450537417238855, "loss": 0.1941, "step": 28832 }, { "epoch": 0.051125065271605236, "grad_norm": 0.54296875, "learning_rate": 0.001645006378115737, "loss": 0.1971, "step": 28834 }, { "epoch": 0.05112861143691505, "grad_norm": 0.91015625, "learning_rate": 0.0016449590121240469, "loss": 0.2037, "step": 28836 }, { "epoch": 0.051132157602224865, "grad_norm": 0.578125, "learning_rate": 0.0016449116437490228, "loss": 0.2103, "step": 28838 }, { "epoch": 0.05113570376753468, "grad_norm": 0.318359375, "learning_rate": 0.0016448642729908719, "loss": 0.301, "step": 28840 }, { "epoch": 0.051139249932844494, "grad_norm": 1.640625, "learning_rate": 0.001644816899849801, "loss": 0.2176, "step": 28842 }, { "epoch": 0.05114279609815431, "grad_norm": 1.6875, "learning_rate": 0.0016447695243260176, "loss": 0.2285, "step": 28844 }, { "epoch": 0.05114634226346412, "grad_norm": 0.6015625, "learning_rate": 0.0016447221464197285, "loss": 0.2002, "step": 28846 }, { "epoch": 0.05114988842877394, "grad_norm": 0.337890625, "learning_rate": 0.0016446747661311412, "loss": 0.1468, "step": 28848 }, { "epoch": 0.05115343459408375, "grad_norm": 0.734375, "learning_rate": 0.0016446273834604629, "loss": 0.1857, "step": 28850 }, { "epoch": 0.051156980759393574, "grad_norm": 0.306640625, "learning_rate": 0.0016445799984079008, "loss": 0.1961, "step": 28852 }, { "epoch": 0.05116052692470339, "grad_norm": 0.287109375, "learning_rate": 0.0016445326109736618, "loss": 0.1867, "step": 28854 }, { "epoch": 0.0511640730900132, "grad_norm": 0.447265625, "learning_rate": 0.001644485221157954, "loss": 0.1803, "step": 28856 }, { "epoch": 0.05116761925532302, "grad_norm": 0.49609375, "learning_rate": 0.0016444378289609837, "loss": 0.2511, "step": 28858 }, { "epoch": 0.05117116542063283, "grad_norm": 0.73828125, "learning_rate": 0.001644390434382959, "loss": 0.2498, "step": 28860 }, { "epoch": 0.051174711585942646, "grad_norm": 0.83984375, "learning_rate": 0.0016443430374240863, "loss": 0.2328, "step": 28862 }, { "epoch": 0.05117825775125246, "grad_norm": 0.78515625, "learning_rate": 0.001644295638084574, "loss": 0.2114, "step": 28864 }, { "epoch": 0.051181803916562275, "grad_norm": 0.26171875, "learning_rate": 0.0016442482363646278, "loss": 0.1612, "step": 28866 }, { "epoch": 0.05118535008187209, "grad_norm": 0.28515625, "learning_rate": 0.0016442008322644565, "loss": 0.2012, "step": 28868 }, { "epoch": 0.051188896247181905, "grad_norm": 0.302734375, "learning_rate": 0.0016441534257842668, "loss": 0.2724, "step": 28870 }, { "epoch": 0.05119244241249172, "grad_norm": 0.306640625, "learning_rate": 0.0016441060169242662, "loss": 0.1829, "step": 28872 }, { "epoch": 0.05119598857780154, "grad_norm": 0.31640625, "learning_rate": 0.001644058605684662, "loss": 0.2505, "step": 28874 }, { "epoch": 0.051199534743111355, "grad_norm": 0.271484375, "learning_rate": 0.0016440111920656612, "loss": 0.1631, "step": 28876 }, { "epoch": 0.05120308090842117, "grad_norm": 0.439453125, "learning_rate": 0.0016439637760674716, "loss": 0.1606, "step": 28878 }, { "epoch": 0.051206627073730984, "grad_norm": 0.310546875, "learning_rate": 0.0016439163576903004, "loss": 0.1597, "step": 28880 }, { "epoch": 0.0512101732390408, "grad_norm": 0.9453125, "learning_rate": 0.001643868936934355, "loss": 0.217, "step": 28882 }, { "epoch": 0.05121371940435061, "grad_norm": 0.7109375, "learning_rate": 0.0016438215137998427, "loss": 0.2671, "step": 28884 }, { "epoch": 0.05121726556966043, "grad_norm": 0.57421875, "learning_rate": 0.0016437740882869712, "loss": 0.198, "step": 28886 }, { "epoch": 0.05122081173497024, "grad_norm": 0.30859375, "learning_rate": 0.0016437266603959474, "loss": 0.1945, "step": 28888 }, { "epoch": 0.05122435790028006, "grad_norm": 0.2216796875, "learning_rate": 0.0016436792301269794, "loss": 0.2231, "step": 28890 }, { "epoch": 0.05122790406558987, "grad_norm": 0.359375, "learning_rate": 0.0016436317974802739, "loss": 0.2575, "step": 28892 }, { "epoch": 0.051231450230899686, "grad_norm": 0.8046875, "learning_rate": 0.0016435843624560387, "loss": 0.2005, "step": 28894 }, { "epoch": 0.0512349963962095, "grad_norm": 0.423828125, "learning_rate": 0.0016435369250544815, "loss": 0.1654, "step": 28896 }, { "epoch": 0.05123854256151932, "grad_norm": 0.45703125, "learning_rate": 0.0016434894852758095, "loss": 0.1506, "step": 28898 }, { "epoch": 0.05124208872682914, "grad_norm": 0.515625, "learning_rate": 0.0016434420431202302, "loss": 0.1927, "step": 28900 }, { "epoch": 0.05124563489213895, "grad_norm": 6.09375, "learning_rate": 0.0016433945985879508, "loss": 0.3085, "step": 28902 }, { "epoch": 0.051249181057448766, "grad_norm": 0.50390625, "learning_rate": 0.0016433471516791793, "loss": 0.2624, "step": 28904 }, { "epoch": 0.05125272722275858, "grad_norm": 1.34375, "learning_rate": 0.0016432997023941229, "loss": 0.2234, "step": 28906 }, { "epoch": 0.051256273388068395, "grad_norm": 0.310546875, "learning_rate": 0.0016432522507329893, "loss": 0.1908, "step": 28908 }, { "epoch": 0.05125981955337821, "grad_norm": 0.296875, "learning_rate": 0.0016432047966959858, "loss": 0.1326, "step": 28910 }, { "epoch": 0.051263365718688024, "grad_norm": 0.5859375, "learning_rate": 0.0016431573402833199, "loss": 0.2154, "step": 28912 }, { "epoch": 0.05126691188399784, "grad_norm": 0.6015625, "learning_rate": 0.0016431098814951996, "loss": 0.1923, "step": 28914 }, { "epoch": 0.05127045804930765, "grad_norm": 0.66796875, "learning_rate": 0.0016430624203318319, "loss": 0.1887, "step": 28916 }, { "epoch": 0.05127400421461747, "grad_norm": 0.5390625, "learning_rate": 0.0016430149567934248, "loss": 0.208, "step": 28918 }, { "epoch": 0.05127755037992729, "grad_norm": 0.361328125, "learning_rate": 0.0016429674908801856, "loss": 0.167, "step": 28920 }, { "epoch": 0.051281096545237104, "grad_norm": 0.625, "learning_rate": 0.001642920022592322, "loss": 0.1769, "step": 28922 }, { "epoch": 0.05128464271054692, "grad_norm": 0.78515625, "learning_rate": 0.0016428725519300417, "loss": 0.1957, "step": 28924 }, { "epoch": 0.05128818887585673, "grad_norm": 0.322265625, "learning_rate": 0.001642825078893552, "loss": 0.2115, "step": 28926 }, { "epoch": 0.05129173504116655, "grad_norm": 0.42578125, "learning_rate": 0.0016427776034830608, "loss": 0.2172, "step": 28928 }, { "epoch": 0.05129528120647636, "grad_norm": 0.88671875, "learning_rate": 0.0016427301256987758, "loss": 0.1258, "step": 28930 }, { "epoch": 0.051298827371786176, "grad_norm": 0.71875, "learning_rate": 0.0016426826455409043, "loss": 0.2494, "step": 28932 }, { "epoch": 0.05130237353709599, "grad_norm": 0.68359375, "learning_rate": 0.001642635163009654, "loss": 0.1731, "step": 28934 }, { "epoch": 0.051305919702405806, "grad_norm": 1.5, "learning_rate": 0.001642587678105233, "loss": 0.2589, "step": 28936 }, { "epoch": 0.05130946586771562, "grad_norm": 0.5234375, "learning_rate": 0.0016425401908278488, "loss": 0.2342, "step": 28938 }, { "epoch": 0.051313012033025435, "grad_norm": 0.30078125, "learning_rate": 0.0016424927011777085, "loss": 0.1873, "step": 28940 }, { "epoch": 0.05131655819833525, "grad_norm": 0.318359375, "learning_rate": 0.0016424452091550203, "loss": 0.1679, "step": 28942 }, { "epoch": 0.05132010436364507, "grad_norm": 0.490234375, "learning_rate": 0.0016423977147599922, "loss": 0.1611, "step": 28944 }, { "epoch": 0.051323650528954885, "grad_norm": 0.25, "learning_rate": 0.0016423502179928314, "loss": 0.2194, "step": 28946 }, { "epoch": 0.0513271966942647, "grad_norm": 0.8359375, "learning_rate": 0.0016423027188537456, "loss": 0.1665, "step": 28948 }, { "epoch": 0.051330742859574514, "grad_norm": 0.31640625, "learning_rate": 0.0016422552173429431, "loss": 0.2284, "step": 28950 }, { "epoch": 0.05133428902488433, "grad_norm": 1.375, "learning_rate": 0.0016422077134606308, "loss": 0.2934, "step": 28952 }, { "epoch": 0.051337835190194143, "grad_norm": 1.640625, "learning_rate": 0.0016421602072070169, "loss": 0.3084, "step": 28954 }, { "epoch": 0.05134138135550396, "grad_norm": 2.265625, "learning_rate": 0.0016421126985823097, "loss": 0.3871, "step": 28956 }, { "epoch": 0.05134492752081377, "grad_norm": 1.8125, "learning_rate": 0.0016420651875867158, "loss": 0.2129, "step": 28958 }, { "epoch": 0.05134847368612359, "grad_norm": 0.6875, "learning_rate": 0.001642017674220444, "loss": 0.1386, "step": 28960 }, { "epoch": 0.0513520198514334, "grad_norm": 0.546875, "learning_rate": 0.0016419701584837016, "loss": 0.1912, "step": 28962 }, { "epoch": 0.051355566016743216, "grad_norm": 0.625, "learning_rate": 0.0016419226403766966, "loss": 0.3302, "step": 28964 }, { "epoch": 0.05135911218205304, "grad_norm": 0.98046875, "learning_rate": 0.0016418751198996372, "loss": 0.1567, "step": 28966 }, { "epoch": 0.05136265834736285, "grad_norm": 1.0546875, "learning_rate": 0.0016418275970527302, "loss": 0.1539, "step": 28968 }, { "epoch": 0.05136620451267267, "grad_norm": 0.60546875, "learning_rate": 0.0016417800718361842, "loss": 0.2275, "step": 28970 }, { "epoch": 0.05136975067798248, "grad_norm": 0.68359375, "learning_rate": 0.0016417325442502069, "loss": 0.2154, "step": 28972 }, { "epoch": 0.051373296843292296, "grad_norm": 0.51953125, "learning_rate": 0.001641685014295006, "loss": 0.2856, "step": 28974 }, { "epoch": 0.05137684300860211, "grad_norm": 0.49609375, "learning_rate": 0.0016416374819707897, "loss": 0.2403, "step": 28976 }, { "epoch": 0.051380389173911925, "grad_norm": 0.5078125, "learning_rate": 0.0016415899472777655, "loss": 0.181, "step": 28978 }, { "epoch": 0.05138393533922174, "grad_norm": 0.494140625, "learning_rate": 0.0016415424102161416, "loss": 0.1782, "step": 28980 }, { "epoch": 0.051387481504531554, "grad_norm": 0.5703125, "learning_rate": 0.0016414948707861258, "loss": 0.1594, "step": 28982 }, { "epoch": 0.05139102766984137, "grad_norm": 0.3203125, "learning_rate": 0.001641447328987926, "loss": 0.2049, "step": 28984 }, { "epoch": 0.05139457383515118, "grad_norm": 0.255859375, "learning_rate": 0.0016413997848217497, "loss": 0.1624, "step": 28986 }, { "epoch": 0.051398120000461005, "grad_norm": 1.78125, "learning_rate": 0.0016413522382878057, "loss": 0.2854, "step": 28988 }, { "epoch": 0.05140166616577082, "grad_norm": 2.265625, "learning_rate": 0.001641304689386301, "loss": 0.2871, "step": 28990 }, { "epoch": 0.051405212331080634, "grad_norm": 0.578125, "learning_rate": 0.0016412571381174447, "loss": 0.1582, "step": 28992 }, { "epoch": 0.05140875849639045, "grad_norm": 0.498046875, "learning_rate": 0.001641209584481443, "loss": 0.1664, "step": 28994 }, { "epoch": 0.05141230466170026, "grad_norm": 0.8828125, "learning_rate": 0.001641162028478506, "loss": 0.2886, "step": 28996 }, { "epoch": 0.05141585082701008, "grad_norm": 0.294921875, "learning_rate": 0.00164111447010884, "loss": 0.2259, "step": 28998 }, { "epoch": 0.05141939699231989, "grad_norm": 0.8515625, "learning_rate": 0.0016410669093726543, "loss": 0.1939, "step": 29000 }, { "epoch": 0.05142294315762971, "grad_norm": 1.1640625, "learning_rate": 0.0016410193462701556, "loss": 0.1788, "step": 29002 }, { "epoch": 0.05142648932293952, "grad_norm": 0.2333984375, "learning_rate": 0.0016409717808015527, "loss": 0.289, "step": 29004 }, { "epoch": 0.051430035488249336, "grad_norm": 0.66015625, "learning_rate": 0.0016409242129670535, "loss": 0.1965, "step": 29006 }, { "epoch": 0.05143358165355915, "grad_norm": 0.81640625, "learning_rate": 0.001640876642766866, "loss": 0.3609, "step": 29008 }, { "epoch": 0.051437127818868965, "grad_norm": 0.79296875, "learning_rate": 0.0016408290702011983, "loss": 0.1386, "step": 29010 }, { "epoch": 0.051440673984178786, "grad_norm": 0.416015625, "learning_rate": 0.0016407814952702584, "loss": 0.2418, "step": 29012 }, { "epoch": 0.0514442201494886, "grad_norm": 2.046875, "learning_rate": 0.0016407339179742542, "loss": 0.1988, "step": 29014 }, { "epoch": 0.051447766314798415, "grad_norm": 0.240234375, "learning_rate": 0.0016406863383133942, "loss": 0.2546, "step": 29016 }, { "epoch": 0.05145131248010823, "grad_norm": 0.72265625, "learning_rate": 0.0016406387562878857, "loss": 0.2214, "step": 29018 }, { "epoch": 0.051454858645418045, "grad_norm": 0.9765625, "learning_rate": 0.0016405911718979378, "loss": 0.1649, "step": 29020 }, { "epoch": 0.05145840481072786, "grad_norm": 0.349609375, "learning_rate": 0.001640543585143758, "loss": 0.1634, "step": 29022 }, { "epoch": 0.051461950976037674, "grad_norm": 0.365234375, "learning_rate": 0.0016404959960255546, "loss": 0.1586, "step": 29024 }, { "epoch": 0.05146549714134749, "grad_norm": 1.8984375, "learning_rate": 0.0016404484045435357, "loss": 0.4484, "step": 29026 }, { "epoch": 0.0514690433066573, "grad_norm": 0.65625, "learning_rate": 0.0016404008106979094, "loss": 0.1628, "step": 29028 }, { "epoch": 0.05147258947196712, "grad_norm": 0.279296875, "learning_rate": 0.0016403532144888837, "loss": 0.3513, "step": 29030 }, { "epoch": 0.05147613563727693, "grad_norm": 0.306640625, "learning_rate": 0.001640305615916667, "loss": 0.1816, "step": 29032 }, { "epoch": 0.05147968180258675, "grad_norm": 0.291015625, "learning_rate": 0.0016402580149814674, "loss": 0.2215, "step": 29034 }, { "epoch": 0.05148322796789657, "grad_norm": 0.279296875, "learning_rate": 0.001640210411683493, "loss": 0.1874, "step": 29036 }, { "epoch": 0.05148677413320638, "grad_norm": 2.859375, "learning_rate": 0.001640162806022952, "loss": 0.4283, "step": 29038 }, { "epoch": 0.0514903202985162, "grad_norm": 5.1875, "learning_rate": 0.001640115198000053, "loss": 0.2846, "step": 29040 }, { "epoch": 0.05149386646382601, "grad_norm": 0.515625, "learning_rate": 0.0016400675876150034, "loss": 0.2094, "step": 29042 }, { "epoch": 0.051497412629135826, "grad_norm": 0.7890625, "learning_rate": 0.0016400199748680122, "loss": 0.1953, "step": 29044 }, { "epoch": 0.05150095879444564, "grad_norm": 0.39453125, "learning_rate": 0.001639972359759287, "loss": 0.1638, "step": 29046 }, { "epoch": 0.051504504959755455, "grad_norm": 1.0625, "learning_rate": 0.0016399247422890367, "loss": 0.2819, "step": 29048 }, { "epoch": 0.05150805112506527, "grad_norm": 2.203125, "learning_rate": 0.0016398771224574692, "loss": 0.2604, "step": 29050 }, { "epoch": 0.051511597290375084, "grad_norm": 2.859375, "learning_rate": 0.0016398295002647922, "loss": 0.1812, "step": 29052 }, { "epoch": 0.0515151434556849, "grad_norm": 0.5859375, "learning_rate": 0.001639781875711215, "loss": 0.1874, "step": 29054 }, { "epoch": 0.05151868962099472, "grad_norm": 0.392578125, "learning_rate": 0.0016397342487969454, "loss": 0.2109, "step": 29056 }, { "epoch": 0.051522235786304535, "grad_norm": 0.3515625, "learning_rate": 0.001639686619522192, "loss": 0.1957, "step": 29058 }, { "epoch": 0.05152578195161435, "grad_norm": 0.39453125, "learning_rate": 0.0016396389878871622, "loss": 0.176, "step": 29060 }, { "epoch": 0.051529328116924164, "grad_norm": 0.91796875, "learning_rate": 0.0016395913538920655, "loss": 0.1947, "step": 29062 }, { "epoch": 0.05153287428223398, "grad_norm": 0.43359375, "learning_rate": 0.0016395437175371092, "loss": 0.2163, "step": 29064 }, { "epoch": 0.05153642044754379, "grad_norm": 0.431640625, "learning_rate": 0.0016394960788225024, "loss": 0.2346, "step": 29066 }, { "epoch": 0.05153996661285361, "grad_norm": 0.34765625, "learning_rate": 0.0016394484377484529, "loss": 0.1645, "step": 29068 }, { "epoch": 0.05154351277816342, "grad_norm": 0.5625, "learning_rate": 0.0016394007943151695, "loss": 0.2055, "step": 29070 }, { "epoch": 0.05154705894347324, "grad_norm": 0.390625, "learning_rate": 0.0016393531485228599, "loss": 0.1977, "step": 29072 }, { "epoch": 0.05155060510878305, "grad_norm": 0.455078125, "learning_rate": 0.0016393055003717334, "loss": 0.2311, "step": 29074 }, { "epoch": 0.051554151274092866, "grad_norm": 1.5546875, "learning_rate": 0.0016392578498619977, "loss": 0.2774, "step": 29076 }, { "epoch": 0.05155769743940268, "grad_norm": 0.2578125, "learning_rate": 0.0016392101969938612, "loss": 0.1874, "step": 29078 }, { "epoch": 0.0515612436047125, "grad_norm": 0.421875, "learning_rate": 0.0016391625417675332, "loss": 0.18, "step": 29080 }, { "epoch": 0.051564789770022317, "grad_norm": 0.392578125, "learning_rate": 0.0016391148841832207, "loss": 0.2366, "step": 29082 }, { "epoch": 0.05156833593533213, "grad_norm": 0.45703125, "learning_rate": 0.0016390672242411334, "loss": 0.1684, "step": 29084 }, { "epoch": 0.051571882100641946, "grad_norm": 0.416015625, "learning_rate": 0.0016390195619414786, "loss": 0.2153, "step": 29086 }, { "epoch": 0.05157542826595176, "grad_norm": 1.03125, "learning_rate": 0.0016389718972844656, "loss": 0.1968, "step": 29088 }, { "epoch": 0.051578974431261575, "grad_norm": 1.9609375, "learning_rate": 0.001638924230270303, "loss": 0.2094, "step": 29090 }, { "epoch": 0.05158252059657139, "grad_norm": 0.55859375, "learning_rate": 0.0016388765608991982, "loss": 0.2178, "step": 29092 }, { "epoch": 0.051586066761881204, "grad_norm": 2.0625, "learning_rate": 0.0016388288891713608, "loss": 0.2435, "step": 29094 }, { "epoch": 0.05158961292719102, "grad_norm": 0.2333984375, "learning_rate": 0.0016387812150869987, "loss": 0.1626, "step": 29096 }, { "epoch": 0.05159315909250083, "grad_norm": 0.5, "learning_rate": 0.0016387335386463205, "loss": 0.164, "step": 29098 }, { "epoch": 0.05159670525781065, "grad_norm": 0.333984375, "learning_rate": 0.0016386858598495346, "loss": 0.2015, "step": 29100 }, { "epoch": 0.05160025142312047, "grad_norm": 0.3359375, "learning_rate": 0.0016386381786968498, "loss": 0.318, "step": 29102 }, { "epoch": 0.051603797588430284, "grad_norm": 1.9765625, "learning_rate": 0.0016385904951884747, "loss": 0.2776, "step": 29104 }, { "epoch": 0.0516073437537401, "grad_norm": 0.2255859375, "learning_rate": 0.0016385428093246175, "loss": 0.1563, "step": 29106 }, { "epoch": 0.05161088991904991, "grad_norm": 1.0546875, "learning_rate": 0.0016384951211054863, "loss": 0.1647, "step": 29108 }, { "epoch": 0.05161443608435973, "grad_norm": 0.8125, "learning_rate": 0.0016384474305312909, "loss": 0.1562, "step": 29110 }, { "epoch": 0.05161798224966954, "grad_norm": 3.09375, "learning_rate": 0.001638399737602239, "loss": 0.1979, "step": 29112 }, { "epoch": 0.051621528414979356, "grad_norm": 0.359375, "learning_rate": 0.0016383520423185396, "loss": 0.1997, "step": 29114 }, { "epoch": 0.05162507458028917, "grad_norm": 0.318359375, "learning_rate": 0.001638304344680401, "loss": 0.1806, "step": 29116 }, { "epoch": 0.051628620745598985, "grad_norm": 0.30078125, "learning_rate": 0.0016382566446880315, "loss": 0.1856, "step": 29118 }, { "epoch": 0.0516321669109088, "grad_norm": 0.392578125, "learning_rate": 0.0016382089423416402, "loss": 0.183, "step": 29120 }, { "epoch": 0.051635713076218615, "grad_norm": 0.85546875, "learning_rate": 0.0016381612376414358, "loss": 0.1946, "step": 29122 }, { "epoch": 0.051639259241528436, "grad_norm": 0.29296875, "learning_rate": 0.0016381135305876266, "loss": 0.1728, "step": 29124 }, { "epoch": 0.05164280540683825, "grad_norm": 0.91796875, "learning_rate": 0.0016380658211804218, "loss": 0.2502, "step": 29126 }, { "epoch": 0.051646351572148065, "grad_norm": 0.77734375, "learning_rate": 0.0016380181094200292, "loss": 0.2109, "step": 29128 }, { "epoch": 0.05164989773745788, "grad_norm": 0.291015625, "learning_rate": 0.0016379703953066582, "loss": 0.2181, "step": 29130 }, { "epoch": 0.051653443902767694, "grad_norm": 0.251953125, "learning_rate": 0.001637922678840517, "loss": 0.1973, "step": 29132 }, { "epoch": 0.05165699006807751, "grad_norm": 0.69140625, "learning_rate": 0.0016378749600218145, "loss": 0.2957, "step": 29134 }, { "epoch": 0.05166053623338732, "grad_norm": 0.349609375, "learning_rate": 0.0016378272388507594, "loss": 0.213, "step": 29136 }, { "epoch": 0.05166408239869714, "grad_norm": 0.2353515625, "learning_rate": 0.0016377795153275602, "loss": 0.2658, "step": 29138 }, { "epoch": 0.05166762856400695, "grad_norm": 0.169921875, "learning_rate": 0.0016377317894524259, "loss": 0.1811, "step": 29140 }, { "epoch": 0.05167117472931677, "grad_norm": 0.484375, "learning_rate": 0.0016376840612255653, "loss": 0.1603, "step": 29142 }, { "epoch": 0.05167472089462658, "grad_norm": 0.4296875, "learning_rate": 0.0016376363306471868, "loss": 0.1678, "step": 29144 }, { "epoch": 0.051678267059936396, "grad_norm": 0.8828125, "learning_rate": 0.001637588597717499, "loss": 0.1716, "step": 29146 }, { "epoch": 0.05168181322524622, "grad_norm": 0.74609375, "learning_rate": 0.0016375408624367115, "loss": 0.1571, "step": 29148 }, { "epoch": 0.05168535939055603, "grad_norm": 3.3125, "learning_rate": 0.001637493124805032, "loss": 0.3265, "step": 29150 }, { "epoch": 0.05168890555586585, "grad_norm": 0.51171875, "learning_rate": 0.00163744538482267, "loss": 0.2879, "step": 29152 }, { "epoch": 0.05169245172117566, "grad_norm": 0.51171875, "learning_rate": 0.001637397642489834, "loss": 0.322, "step": 29154 }, { "epoch": 0.051695997886485476, "grad_norm": 0.306640625, "learning_rate": 0.0016373498978067332, "loss": 0.172, "step": 29156 }, { "epoch": 0.05169954405179529, "grad_norm": 0.486328125, "learning_rate": 0.0016373021507735757, "loss": 0.185, "step": 29158 }, { "epoch": 0.051703090217105105, "grad_norm": 0.7109375, "learning_rate": 0.0016372544013905708, "loss": 0.1881, "step": 29160 }, { "epoch": 0.05170663638241492, "grad_norm": 0.23828125, "learning_rate": 0.0016372066496579273, "loss": 0.1878, "step": 29162 }, { "epoch": 0.051710182547724734, "grad_norm": 2.203125, "learning_rate": 0.0016371588955758538, "loss": 0.2924, "step": 29164 }, { "epoch": 0.05171372871303455, "grad_norm": 1.0859375, "learning_rate": 0.0016371111391445593, "loss": 0.399, "step": 29166 }, { "epoch": 0.05171727487834436, "grad_norm": 0.5234375, "learning_rate": 0.0016370633803642527, "loss": 0.1622, "step": 29168 }, { "epoch": 0.051720821043654185, "grad_norm": 0.7734375, "learning_rate": 0.0016370156192351429, "loss": 0.224, "step": 29170 }, { "epoch": 0.051724367208964, "grad_norm": 1.3203125, "learning_rate": 0.001636967855757439, "loss": 0.2693, "step": 29172 }, { "epoch": 0.051727913374273814, "grad_norm": 0.49609375, "learning_rate": 0.001636920089931349, "loss": 0.1806, "step": 29174 }, { "epoch": 0.05173145953958363, "grad_norm": 0.384765625, "learning_rate": 0.001636872321757083, "loss": 0.2247, "step": 29176 }, { "epoch": 0.05173500570489344, "grad_norm": 4.625, "learning_rate": 0.0016368245512348488, "loss": 0.2579, "step": 29178 }, { "epoch": 0.05173855187020326, "grad_norm": 0.267578125, "learning_rate": 0.0016367767783648565, "loss": 0.2064, "step": 29180 }, { "epoch": 0.05174209803551307, "grad_norm": 0.3515625, "learning_rate": 0.0016367290031473135, "loss": 0.2169, "step": 29182 }, { "epoch": 0.051745644200822886, "grad_norm": 0.96875, "learning_rate": 0.0016366812255824304, "loss": 0.1918, "step": 29184 }, { "epoch": 0.0517491903661327, "grad_norm": 1.9609375, "learning_rate": 0.0016366334456704146, "loss": 0.1457, "step": 29186 }, { "epoch": 0.051752736531442516, "grad_norm": 0.4375, "learning_rate": 0.0016365856634114764, "loss": 0.1656, "step": 29188 }, { "epoch": 0.05175628269675233, "grad_norm": 6.65625, "learning_rate": 0.0016365378788058239, "loss": 0.1749, "step": 29190 }, { "epoch": 0.05175982886206215, "grad_norm": 0.45703125, "learning_rate": 0.0016364900918536668, "loss": 0.2483, "step": 29192 }, { "epoch": 0.051763375027371966, "grad_norm": 0.486328125, "learning_rate": 0.0016364423025552134, "loss": 0.2089, "step": 29194 }, { "epoch": 0.05176692119268178, "grad_norm": 0.8046875, "learning_rate": 0.001636394510910673, "loss": 0.2163, "step": 29196 }, { "epoch": 0.051770467357991595, "grad_norm": 0.54296875, "learning_rate": 0.0016363467169202546, "loss": 0.2075, "step": 29198 }, { "epoch": 0.05177401352330141, "grad_norm": 0.58984375, "learning_rate": 0.001636298920584167, "loss": 0.1855, "step": 29200 }, { "epoch": 0.051777559688611224, "grad_norm": 0.44140625, "learning_rate": 0.0016362511219026195, "loss": 0.2332, "step": 29202 }, { "epoch": 0.05178110585392104, "grad_norm": 2.328125, "learning_rate": 0.001636203320875821, "loss": 0.3503, "step": 29204 }, { "epoch": 0.051784652019230853, "grad_norm": 0.4609375, "learning_rate": 0.001636155517503981, "loss": 0.2012, "step": 29206 }, { "epoch": 0.05178819818454067, "grad_norm": 0.65234375, "learning_rate": 0.001636107711787308, "loss": 0.1714, "step": 29208 }, { "epoch": 0.05179174434985048, "grad_norm": 0.34375, "learning_rate": 0.0016360599037260114, "loss": 0.204, "step": 29210 }, { "epoch": 0.0517952905151603, "grad_norm": 0.875, "learning_rate": 0.0016360120933202997, "loss": 0.2471, "step": 29212 }, { "epoch": 0.05179883668047011, "grad_norm": 0.54296875, "learning_rate": 0.0016359642805703829, "loss": 0.2223, "step": 29214 }, { "epoch": 0.05180238284577993, "grad_norm": 3.515625, "learning_rate": 0.0016359164654764697, "loss": 0.2981, "step": 29216 }, { "epoch": 0.05180592901108975, "grad_norm": 2.09375, "learning_rate": 0.001635868648038769, "loss": 0.3408, "step": 29218 }, { "epoch": 0.05180947517639956, "grad_norm": 0.57421875, "learning_rate": 0.00163582082825749, "loss": 0.1672, "step": 29220 }, { "epoch": 0.05181302134170938, "grad_norm": 5.71875, "learning_rate": 0.0016357730061328422, "loss": 0.2878, "step": 29222 }, { "epoch": 0.05181656750701919, "grad_norm": 0.333984375, "learning_rate": 0.0016357251816650343, "loss": 0.2449, "step": 29224 }, { "epoch": 0.051820113672329006, "grad_norm": 2.3125, "learning_rate": 0.0016356773548542758, "loss": 0.3094, "step": 29226 }, { "epoch": 0.05182365983763882, "grad_norm": 0.57421875, "learning_rate": 0.0016356295257007757, "loss": 0.244, "step": 29228 }, { "epoch": 0.051827206002948635, "grad_norm": 2.328125, "learning_rate": 0.0016355816942047433, "loss": 0.1978, "step": 29230 }, { "epoch": 0.05183075216825845, "grad_norm": 0.2578125, "learning_rate": 0.0016355338603663875, "loss": 0.1815, "step": 29232 }, { "epoch": 0.051834298333568264, "grad_norm": 0.48828125, "learning_rate": 0.0016354860241859174, "loss": 0.2086, "step": 29234 }, { "epoch": 0.05183784449887808, "grad_norm": 0.55859375, "learning_rate": 0.0016354381856635428, "loss": 0.1613, "step": 29236 }, { "epoch": 0.0518413906641879, "grad_norm": 1.21875, "learning_rate": 0.0016353903447994724, "loss": 0.1759, "step": 29238 }, { "epoch": 0.051844936829497715, "grad_norm": 0.87890625, "learning_rate": 0.0016353425015939158, "loss": 0.1763, "step": 29240 }, { "epoch": 0.05184848299480753, "grad_norm": 2.296875, "learning_rate": 0.001635294656047082, "loss": 0.2838, "step": 29242 }, { "epoch": 0.051852029160117344, "grad_norm": 2.703125, "learning_rate": 0.0016352468081591803, "loss": 0.1977, "step": 29244 }, { "epoch": 0.05185557532542716, "grad_norm": 0.46875, "learning_rate": 0.0016351989579304201, "loss": 0.1569, "step": 29246 }, { "epoch": 0.05185912149073697, "grad_norm": 2.03125, "learning_rate": 0.0016351511053610101, "loss": 0.3726, "step": 29248 }, { "epoch": 0.05186266765604679, "grad_norm": 0.7109375, "learning_rate": 0.0016351032504511606, "loss": 0.1895, "step": 29250 }, { "epoch": 0.0518662138213566, "grad_norm": 0.291015625, "learning_rate": 0.00163505539320108, "loss": 0.1581, "step": 29252 }, { "epoch": 0.05186975998666642, "grad_norm": 0.40234375, "learning_rate": 0.001635007533610978, "loss": 0.1826, "step": 29254 }, { "epoch": 0.05187330615197623, "grad_norm": 0.55078125, "learning_rate": 0.0016349596716810637, "loss": 0.2037, "step": 29256 }, { "epoch": 0.051876852317286046, "grad_norm": 1.4375, "learning_rate": 0.0016349118074115469, "loss": 0.2556, "step": 29258 }, { "epoch": 0.05188039848259587, "grad_norm": 0.546875, "learning_rate": 0.001634863940802636, "loss": 0.3034, "step": 29260 }, { "epoch": 0.05188394464790568, "grad_norm": 1.3984375, "learning_rate": 0.0016348160718545413, "loss": 0.1233, "step": 29262 }, { "epoch": 0.051887490813215496, "grad_norm": 0.365234375, "learning_rate": 0.0016347682005674715, "loss": 0.2243, "step": 29264 }, { "epoch": 0.05189103697852531, "grad_norm": 0.5546875, "learning_rate": 0.0016347203269416364, "loss": 0.1984, "step": 29266 }, { "epoch": 0.051894583143835125, "grad_norm": 0.4296875, "learning_rate": 0.0016346724509772451, "loss": 0.1251, "step": 29268 }, { "epoch": 0.05189812930914494, "grad_norm": 0.39453125, "learning_rate": 0.0016346245726745075, "loss": 0.1835, "step": 29270 }, { "epoch": 0.051901675474454755, "grad_norm": 0.306640625, "learning_rate": 0.0016345766920336322, "loss": 0.2736, "step": 29272 }, { "epoch": 0.05190522163976457, "grad_norm": 0.546875, "learning_rate": 0.0016345288090548289, "loss": 0.2261, "step": 29274 }, { "epoch": 0.051908767805074384, "grad_norm": 2.40625, "learning_rate": 0.001634480923738307, "loss": 0.2026, "step": 29276 }, { "epoch": 0.0519123139703842, "grad_norm": 1.15625, "learning_rate": 0.0016344330360842762, "loss": 0.218, "step": 29278 }, { "epoch": 0.05191586013569401, "grad_norm": 0.3828125, "learning_rate": 0.0016343851460929458, "loss": 0.1586, "step": 29280 }, { "epoch": 0.05191940630100383, "grad_norm": 0.408203125, "learning_rate": 0.001634337253764525, "loss": 0.2128, "step": 29282 }, { "epoch": 0.05192295246631365, "grad_norm": 0.4375, "learning_rate": 0.0016342893590992234, "loss": 0.1637, "step": 29284 }, { "epoch": 0.05192649863162346, "grad_norm": 1.03125, "learning_rate": 0.0016342414620972507, "loss": 0.4581, "step": 29286 }, { "epoch": 0.05193004479693328, "grad_norm": 0.353515625, "learning_rate": 0.001634193562758816, "loss": 0.2068, "step": 29288 }, { "epoch": 0.05193359096224309, "grad_norm": 0.46484375, "learning_rate": 0.001634145661084129, "loss": 0.2024, "step": 29290 }, { "epoch": 0.05193713712755291, "grad_norm": 1.4375, "learning_rate": 0.0016340977570733996, "loss": 0.2584, "step": 29292 }, { "epoch": 0.05194068329286272, "grad_norm": 0.5859375, "learning_rate": 0.001634049850726836, "loss": 0.2244, "step": 29294 }, { "epoch": 0.051944229458172536, "grad_norm": 0.4140625, "learning_rate": 0.001634001942044649, "loss": 0.1889, "step": 29296 }, { "epoch": 0.05194777562348235, "grad_norm": 0.52734375, "learning_rate": 0.0016339540310270479, "loss": 0.2325, "step": 29298 }, { "epoch": 0.051951321788792165, "grad_norm": 0.5703125, "learning_rate": 0.0016339061176742417, "loss": 0.3686, "step": 29300 }, { "epoch": 0.05195486795410198, "grad_norm": 0.59765625, "learning_rate": 0.0016338582019864403, "loss": 0.193, "step": 29302 }, { "epoch": 0.051958414119411794, "grad_norm": 0.26953125, "learning_rate": 0.0016338102839638532, "loss": 0.1572, "step": 29304 }, { "epoch": 0.051961960284721616, "grad_norm": 0.98046875, "learning_rate": 0.00163376236360669, "loss": 0.1793, "step": 29306 }, { "epoch": 0.05196550645003143, "grad_norm": 0.6015625, "learning_rate": 0.0016337144409151606, "loss": 0.1965, "step": 29308 }, { "epoch": 0.051969052615341245, "grad_norm": 0.296875, "learning_rate": 0.001633666515889474, "loss": 0.1683, "step": 29310 }, { "epoch": 0.05197259878065106, "grad_norm": 1.25, "learning_rate": 0.00163361858852984, "loss": 0.277, "step": 29312 }, { "epoch": 0.051976144945960874, "grad_norm": 0.640625, "learning_rate": 0.001633570658836468, "loss": 0.3299, "step": 29314 }, { "epoch": 0.05197969111127069, "grad_norm": 0.365234375, "learning_rate": 0.0016335227268095686, "loss": 0.1567, "step": 29316 }, { "epoch": 0.0519832372765805, "grad_norm": 2.171875, "learning_rate": 0.0016334747924493503, "loss": 0.1947, "step": 29318 }, { "epoch": 0.05198678344189032, "grad_norm": 0.57421875, "learning_rate": 0.001633426855756023, "loss": 0.232, "step": 29320 }, { "epoch": 0.05199032960720013, "grad_norm": 0.369140625, "learning_rate": 0.0016333789167297965, "loss": 0.184, "step": 29322 }, { "epoch": 0.05199387577250995, "grad_norm": 0.474609375, "learning_rate": 0.001633330975370881, "loss": 0.2409, "step": 29324 }, { "epoch": 0.05199742193781976, "grad_norm": 0.421875, "learning_rate": 0.001633283031679485, "loss": 0.1686, "step": 29326 }, { "epoch": 0.05200096810312958, "grad_norm": 0.64453125, "learning_rate": 0.0016332350856558188, "loss": 0.2088, "step": 29328 }, { "epoch": 0.0520045142684394, "grad_norm": 0.484375, "learning_rate": 0.0016331871373000924, "loss": 0.2295, "step": 29330 }, { "epoch": 0.05200806043374921, "grad_norm": 0.86328125, "learning_rate": 0.001633139186612515, "loss": 0.2255, "step": 29332 }, { "epoch": 0.052011606599059027, "grad_norm": 0.35546875, "learning_rate": 0.0016330912335932963, "loss": 0.1924, "step": 29334 }, { "epoch": 0.05201515276436884, "grad_norm": 0.427734375, "learning_rate": 0.0016330432782426464, "loss": 0.1573, "step": 29336 }, { "epoch": 0.052018698929678656, "grad_norm": 0.27734375, "learning_rate": 0.001632995320560775, "loss": 0.1698, "step": 29338 }, { "epoch": 0.05202224509498847, "grad_norm": 0.2216796875, "learning_rate": 0.0016329473605478915, "loss": 0.2038, "step": 29340 }, { "epoch": 0.052025791260298285, "grad_norm": 1.109375, "learning_rate": 0.0016328993982042054, "loss": 0.2349, "step": 29342 }, { "epoch": 0.0520293374256081, "grad_norm": 0.61328125, "learning_rate": 0.0016328514335299275, "loss": 0.1905, "step": 29344 }, { "epoch": 0.052032883590917914, "grad_norm": 1.328125, "learning_rate": 0.0016328034665252667, "loss": 0.2348, "step": 29346 }, { "epoch": 0.05203642975622773, "grad_norm": 0.65234375, "learning_rate": 0.001632755497190433, "loss": 0.2071, "step": 29348 }, { "epoch": 0.05203997592153754, "grad_norm": 0.3984375, "learning_rate": 0.0016327075255256362, "loss": 0.1488, "step": 29350 }, { "epoch": 0.052043522086847364, "grad_norm": 0.63671875, "learning_rate": 0.0016326595515310865, "loss": 0.1989, "step": 29352 }, { "epoch": 0.05204706825215718, "grad_norm": 3.84375, "learning_rate": 0.0016326115752069927, "loss": 0.3219, "step": 29354 }, { "epoch": 0.052050614417466994, "grad_norm": 0.2392578125, "learning_rate": 0.0016325635965535657, "loss": 0.1545, "step": 29356 }, { "epoch": 0.05205416058277681, "grad_norm": 3.609375, "learning_rate": 0.0016325156155710147, "loss": 0.3998, "step": 29358 }, { "epoch": 0.05205770674808662, "grad_norm": 0.69921875, "learning_rate": 0.0016324676322595497, "loss": 0.1625, "step": 29360 }, { "epoch": 0.05206125291339644, "grad_norm": 0.35546875, "learning_rate": 0.0016324196466193806, "loss": 0.1878, "step": 29362 }, { "epoch": 0.05206479907870625, "grad_norm": 0.7734375, "learning_rate": 0.0016323716586507174, "loss": 0.2318, "step": 29364 }, { "epoch": 0.052068345244016066, "grad_norm": 0.6484375, "learning_rate": 0.0016323236683537697, "loss": 0.1865, "step": 29366 }, { "epoch": 0.05207189140932588, "grad_norm": 0.431640625, "learning_rate": 0.0016322756757287474, "loss": 0.2153, "step": 29368 }, { "epoch": 0.052075437574635695, "grad_norm": 0.2353515625, "learning_rate": 0.0016322276807758606, "loss": 0.1726, "step": 29370 }, { "epoch": 0.05207898373994551, "grad_norm": 0.7265625, "learning_rate": 0.0016321796834953188, "loss": 0.17, "step": 29372 }, { "epoch": 0.05208252990525533, "grad_norm": 3.15625, "learning_rate": 0.0016321316838873326, "loss": 0.1804, "step": 29374 }, { "epoch": 0.052086076070565146, "grad_norm": 3.890625, "learning_rate": 0.0016320836819521112, "loss": 0.2338, "step": 29376 }, { "epoch": 0.05208962223587496, "grad_norm": 0.263671875, "learning_rate": 0.0016320356776898647, "loss": 0.2036, "step": 29378 }, { "epoch": 0.052093168401184775, "grad_norm": 0.3515625, "learning_rate": 0.0016319876711008037, "loss": 0.1791, "step": 29380 }, { "epoch": 0.05209671456649459, "grad_norm": 2.75, "learning_rate": 0.0016319396621851372, "loss": 0.2519, "step": 29382 }, { "epoch": 0.052100260731804404, "grad_norm": 0.419921875, "learning_rate": 0.0016318916509430758, "loss": 0.2777, "step": 29384 }, { "epoch": 0.05210380689711422, "grad_norm": 0.69140625, "learning_rate": 0.0016318436373748294, "loss": 0.1787, "step": 29386 }, { "epoch": 0.05210735306242403, "grad_norm": 0.39453125, "learning_rate": 0.0016317956214806078, "loss": 0.1795, "step": 29388 }, { "epoch": 0.05211089922773385, "grad_norm": 1.8828125, "learning_rate": 0.001631747603260621, "loss": 0.302, "step": 29390 }, { "epoch": 0.05211444539304366, "grad_norm": 0.3828125, "learning_rate": 0.001631699582715079, "loss": 0.1661, "step": 29392 }, { "epoch": 0.05211799155835348, "grad_norm": 0.96875, "learning_rate": 0.0016316515598441919, "loss": 0.1707, "step": 29394 }, { "epoch": 0.0521215377236633, "grad_norm": 0.44921875, "learning_rate": 0.0016316035346481696, "loss": 0.1799, "step": 29396 }, { "epoch": 0.05212508388897311, "grad_norm": 0.416015625, "learning_rate": 0.0016315555071272224, "loss": 0.5051, "step": 29398 }, { "epoch": 0.05212863005428293, "grad_norm": 1.2421875, "learning_rate": 0.00163150747728156, "loss": 0.2236, "step": 29400 }, { "epoch": 0.05213217621959274, "grad_norm": 0.53515625, "learning_rate": 0.0016314594451113929, "loss": 0.2237, "step": 29402 }, { "epoch": 0.05213572238490256, "grad_norm": 0.390625, "learning_rate": 0.0016314114106169306, "loss": 0.1617, "step": 29404 }, { "epoch": 0.05213926855021237, "grad_norm": 1.0859375, "learning_rate": 0.0016313633737983838, "loss": 0.1486, "step": 29406 }, { "epoch": 0.052142814715522186, "grad_norm": 2.203125, "learning_rate": 0.001631315334655962, "loss": 0.2649, "step": 29408 }, { "epoch": 0.052146360880832, "grad_norm": 0.640625, "learning_rate": 0.0016312672931898756, "loss": 0.1933, "step": 29410 }, { "epoch": 0.052149907046141815, "grad_norm": 0.54296875, "learning_rate": 0.0016312192494003345, "loss": 0.1847, "step": 29412 }, { "epoch": 0.05215345321145163, "grad_norm": 5.71875, "learning_rate": 0.0016311712032875494, "loss": 0.3264, "step": 29414 }, { "epoch": 0.052156999376761444, "grad_norm": 5.84375, "learning_rate": 0.0016311231548517297, "loss": 0.262, "step": 29416 }, { "epoch": 0.05216054554207126, "grad_norm": 0.58984375, "learning_rate": 0.0016310751040930858, "loss": 0.2145, "step": 29418 }, { "epoch": 0.05216409170738108, "grad_norm": 0.44140625, "learning_rate": 0.0016310270510118282, "loss": 0.1632, "step": 29420 }, { "epoch": 0.052167637872690895, "grad_norm": 0.45703125, "learning_rate": 0.0016309789956081664, "loss": 0.1752, "step": 29422 }, { "epoch": 0.05217118403800071, "grad_norm": 0.41796875, "learning_rate": 0.001630930937882311, "loss": 0.2039, "step": 29424 }, { "epoch": 0.052174730203310524, "grad_norm": 0.62109375, "learning_rate": 0.001630882877834472, "loss": 0.1499, "step": 29426 }, { "epoch": 0.05217827636862034, "grad_norm": 0.71875, "learning_rate": 0.0016308348154648595, "loss": 0.2638, "step": 29428 }, { "epoch": 0.05218182253393015, "grad_norm": 1.015625, "learning_rate": 0.0016307867507736843, "loss": 0.1717, "step": 29430 }, { "epoch": 0.05218536869923997, "grad_norm": 0.9140625, "learning_rate": 0.001630738683761156, "loss": 0.1901, "step": 29432 }, { "epoch": 0.05218891486454978, "grad_norm": 0.84375, "learning_rate": 0.001630690614427485, "loss": 0.3374, "step": 29434 }, { "epoch": 0.052192461029859596, "grad_norm": 0.55859375, "learning_rate": 0.0016306425427728814, "loss": 0.2287, "step": 29436 }, { "epoch": 0.05219600719516941, "grad_norm": 0.80078125, "learning_rate": 0.0016305944687975556, "loss": 0.1366, "step": 29438 }, { "epoch": 0.052199553360479226, "grad_norm": 0.287109375, "learning_rate": 0.0016305463925017178, "loss": 0.2093, "step": 29440 }, { "epoch": 0.05220309952578905, "grad_norm": 0.50390625, "learning_rate": 0.0016304983138855785, "loss": 0.1907, "step": 29442 }, { "epoch": 0.05220664569109886, "grad_norm": 0.703125, "learning_rate": 0.0016304502329493474, "loss": 0.1577, "step": 29444 }, { "epoch": 0.052210191856408676, "grad_norm": 0.66015625, "learning_rate": 0.0016304021496932352, "loss": 0.1942, "step": 29446 }, { "epoch": 0.05221373802171849, "grad_norm": 6.53125, "learning_rate": 0.0016303540641174519, "loss": 0.1805, "step": 29448 }, { "epoch": 0.052217284187028305, "grad_norm": 1.1015625, "learning_rate": 0.0016303059762222084, "loss": 0.2016, "step": 29450 }, { "epoch": 0.05222083035233812, "grad_norm": 0.462890625, "learning_rate": 0.0016302578860077144, "loss": 0.1799, "step": 29452 }, { "epoch": 0.052224376517647934, "grad_norm": 0.61328125, "learning_rate": 0.0016302097934741803, "loss": 0.2282, "step": 29454 }, { "epoch": 0.05222792268295775, "grad_norm": 0.578125, "learning_rate": 0.0016301616986218168, "loss": 0.1945, "step": 29456 }, { "epoch": 0.052231468848267563, "grad_norm": 0.69140625, "learning_rate": 0.0016301136014508337, "loss": 0.1745, "step": 29458 }, { "epoch": 0.05223501501357738, "grad_norm": 0.259765625, "learning_rate": 0.0016300655019614418, "loss": 0.2347, "step": 29460 }, { "epoch": 0.05223856117888719, "grad_norm": 0.3046875, "learning_rate": 0.0016300174001538516, "loss": 0.1843, "step": 29462 }, { "epoch": 0.052242107344197014, "grad_norm": 0.85546875, "learning_rate": 0.0016299692960282726, "loss": 0.2718, "step": 29464 }, { "epoch": 0.05224565350950683, "grad_norm": 0.349609375, "learning_rate": 0.001629921189584916, "loss": 0.211, "step": 29466 }, { "epoch": 0.05224919967481664, "grad_norm": 0.51953125, "learning_rate": 0.001629873080823992, "loss": 0.1883, "step": 29468 }, { "epoch": 0.05225274584012646, "grad_norm": 2.1875, "learning_rate": 0.0016298249697457108, "loss": 0.2801, "step": 29470 }, { "epoch": 0.05225629200543627, "grad_norm": 0.7578125, "learning_rate": 0.001629776856350283, "loss": 0.1534, "step": 29472 }, { "epoch": 0.05225983817074609, "grad_norm": 0.34375, "learning_rate": 0.0016297287406379186, "loss": 0.1646, "step": 29474 }, { "epoch": 0.0522633843360559, "grad_norm": 0.1865234375, "learning_rate": 0.001629680622608829, "loss": 0.1829, "step": 29476 }, { "epoch": 0.052266930501365716, "grad_norm": 0.3828125, "learning_rate": 0.0016296325022632234, "loss": 0.2562, "step": 29478 }, { "epoch": 0.05227047666667553, "grad_norm": 0.376953125, "learning_rate": 0.0016295843796013136, "loss": 0.1795, "step": 29480 }, { "epoch": 0.052274022831985345, "grad_norm": 1.0625, "learning_rate": 0.0016295362546233088, "loss": 0.1924, "step": 29482 }, { "epoch": 0.05227756899729516, "grad_norm": 0.65625, "learning_rate": 0.0016294881273294201, "loss": 0.1864, "step": 29484 }, { "epoch": 0.052281115162604974, "grad_norm": 0.34765625, "learning_rate": 0.0016294399977198582, "loss": 0.2138, "step": 29486 }, { "epoch": 0.052284661327914796, "grad_norm": 0.64453125, "learning_rate": 0.0016293918657948332, "loss": 0.1827, "step": 29488 }, { "epoch": 0.05228820749322461, "grad_norm": 2.515625, "learning_rate": 0.0016293437315545553, "loss": 0.2159, "step": 29490 }, { "epoch": 0.052291753658534425, "grad_norm": 0.625, "learning_rate": 0.0016292955949992359, "loss": 0.1871, "step": 29492 }, { "epoch": 0.05229529982384424, "grad_norm": 0.5, "learning_rate": 0.0016292474561290845, "loss": 0.1805, "step": 29494 }, { "epoch": 0.052298845989154054, "grad_norm": 0.76171875, "learning_rate": 0.0016291993149443128, "loss": 0.1579, "step": 29496 }, { "epoch": 0.05230239215446387, "grad_norm": 0.87890625, "learning_rate": 0.0016291511714451304, "loss": 0.1807, "step": 29498 }, { "epoch": 0.05230593831977368, "grad_norm": 0.49609375, "learning_rate": 0.0016291030256317478, "loss": 0.2025, "step": 29500 }, { "epoch": 0.0523094844850835, "grad_norm": 0.44140625, "learning_rate": 0.0016290548775043762, "loss": 0.1863, "step": 29502 }, { "epoch": 0.05231303065039331, "grad_norm": 1.5, "learning_rate": 0.001629006727063226, "loss": 0.2028, "step": 29504 }, { "epoch": 0.05231657681570313, "grad_norm": 0.75390625, "learning_rate": 0.0016289585743085074, "loss": 0.2303, "step": 29506 }, { "epoch": 0.05232012298101294, "grad_norm": 0.39453125, "learning_rate": 0.0016289104192404313, "loss": 0.2042, "step": 29508 }, { "epoch": 0.05232366914632276, "grad_norm": 0.40234375, "learning_rate": 0.0016288622618592082, "loss": 0.1922, "step": 29510 }, { "epoch": 0.05232721531163258, "grad_norm": 0.74609375, "learning_rate": 0.0016288141021650491, "loss": 0.1618, "step": 29512 }, { "epoch": 0.05233076147694239, "grad_norm": 0.5, "learning_rate": 0.0016287659401581643, "loss": 0.1618, "step": 29514 }, { "epoch": 0.052334307642252206, "grad_norm": 0.357421875, "learning_rate": 0.001628717775838764, "loss": 0.2084, "step": 29516 }, { "epoch": 0.05233785380756202, "grad_norm": 0.6328125, "learning_rate": 0.0016286696092070595, "loss": 0.1842, "step": 29518 }, { "epoch": 0.052341399972871835, "grad_norm": 0.5703125, "learning_rate": 0.0016286214402632608, "loss": 0.1768, "step": 29520 }, { "epoch": 0.05234494613818165, "grad_norm": 1.3984375, "learning_rate": 0.0016285732690075794, "loss": 0.2792, "step": 29522 }, { "epoch": 0.052348492303491465, "grad_norm": 0.412109375, "learning_rate": 0.0016285250954402258, "loss": 0.2141, "step": 29524 }, { "epoch": 0.05235203846880128, "grad_norm": 1.2890625, "learning_rate": 0.0016284769195614099, "loss": 0.2979, "step": 29526 }, { "epoch": 0.052355584634111094, "grad_norm": 1.6875, "learning_rate": 0.0016284287413713433, "loss": 0.2029, "step": 29528 }, { "epoch": 0.05235913079942091, "grad_norm": 0.84375, "learning_rate": 0.0016283805608702361, "loss": 0.146, "step": 29530 }, { "epoch": 0.05236267696473073, "grad_norm": 0.53515625, "learning_rate": 0.0016283323780582994, "loss": 0.2192, "step": 29532 }, { "epoch": 0.052366223130040544, "grad_norm": 0.48046875, "learning_rate": 0.001628284192935744, "loss": 0.1646, "step": 29534 }, { "epoch": 0.05236976929535036, "grad_norm": 0.8203125, "learning_rate": 0.00162823600550278, "loss": 0.2177, "step": 29536 }, { "epoch": 0.05237331546066017, "grad_norm": 0.77734375, "learning_rate": 0.0016281878157596186, "loss": 0.2215, "step": 29538 }, { "epoch": 0.05237686162596999, "grad_norm": 0.58984375, "learning_rate": 0.0016281396237064706, "loss": 0.2279, "step": 29540 }, { "epoch": 0.0523804077912798, "grad_norm": 0.7265625, "learning_rate": 0.0016280914293435467, "loss": 0.2202, "step": 29542 }, { "epoch": 0.05238395395658962, "grad_norm": 0.455078125, "learning_rate": 0.0016280432326710576, "loss": 0.1756, "step": 29544 }, { "epoch": 0.05238750012189943, "grad_norm": 0.69140625, "learning_rate": 0.0016279950336892139, "loss": 0.2074, "step": 29546 }, { "epoch": 0.052391046287209246, "grad_norm": 0.408203125, "learning_rate": 0.0016279468323982269, "loss": 0.1944, "step": 29548 }, { "epoch": 0.05239459245251906, "grad_norm": 0.5859375, "learning_rate": 0.001627898628798307, "loss": 0.2113, "step": 29550 }, { "epoch": 0.052398138617828875, "grad_norm": 0.376953125, "learning_rate": 0.0016278504228896653, "loss": 0.2352, "step": 29552 }, { "epoch": 0.05240168478313869, "grad_norm": 0.2353515625, "learning_rate": 0.0016278022146725122, "loss": 0.1654, "step": 29554 }, { "epoch": 0.05240523094844851, "grad_norm": 0.283203125, "learning_rate": 0.001627754004147059, "loss": 0.1673, "step": 29556 }, { "epoch": 0.052408777113758326, "grad_norm": 0.287109375, "learning_rate": 0.0016277057913135166, "loss": 0.2707, "step": 29558 }, { "epoch": 0.05241232327906814, "grad_norm": 0.3125, "learning_rate": 0.001627657576172095, "loss": 0.2114, "step": 29560 }, { "epoch": 0.052415869444377955, "grad_norm": 0.341796875, "learning_rate": 0.001627609358723006, "loss": 0.3996, "step": 29562 }, { "epoch": 0.05241941560968777, "grad_norm": 0.4765625, "learning_rate": 0.0016275611389664601, "loss": 0.2247, "step": 29564 }, { "epoch": 0.052422961774997584, "grad_norm": 0.6953125, "learning_rate": 0.0016275129169026682, "loss": 0.1819, "step": 29566 }, { "epoch": 0.0524265079403074, "grad_norm": 2.078125, "learning_rate": 0.0016274646925318412, "loss": 0.2822, "step": 29568 }, { "epoch": 0.05243005410561721, "grad_norm": 1.1328125, "learning_rate": 0.00162741646585419, "loss": 0.1856, "step": 29570 }, { "epoch": 0.05243360027092703, "grad_norm": 0.78515625, "learning_rate": 0.001627368236869926, "loss": 0.2253, "step": 29572 }, { "epoch": 0.05243714643623684, "grad_norm": 0.6015625, "learning_rate": 0.0016273200055792587, "loss": 0.1541, "step": 29574 }, { "epoch": 0.05244069260154666, "grad_norm": 0.341796875, "learning_rate": 0.0016272717719824005, "loss": 0.1829, "step": 29576 }, { "epoch": 0.05244423876685648, "grad_norm": 0.67578125, "learning_rate": 0.0016272235360795622, "loss": 0.1794, "step": 29578 }, { "epoch": 0.05244778493216629, "grad_norm": 0.53125, "learning_rate": 0.001627175297870954, "loss": 0.164, "step": 29580 }, { "epoch": 0.05245133109747611, "grad_norm": 0.4765625, "learning_rate": 0.0016271270573567874, "loss": 0.2013, "step": 29582 }, { "epoch": 0.05245487726278592, "grad_norm": 0.5078125, "learning_rate": 0.0016270788145372733, "loss": 0.2372, "step": 29584 }, { "epoch": 0.052458423428095736, "grad_norm": 0.345703125, "learning_rate": 0.0016270305694126225, "loss": 0.1612, "step": 29586 }, { "epoch": 0.05246196959340555, "grad_norm": 0.7109375, "learning_rate": 0.0016269823219830463, "loss": 0.1487, "step": 29588 }, { "epoch": 0.052465515758715366, "grad_norm": 0.5078125, "learning_rate": 0.0016269340722487554, "loss": 0.2297, "step": 29590 }, { "epoch": 0.05246906192402518, "grad_norm": 0.87890625, "learning_rate": 0.0016268858202099607, "loss": 0.1796, "step": 29592 }, { "epoch": 0.052472608089334995, "grad_norm": 1.1015625, "learning_rate": 0.0016268375658668739, "loss": 0.211, "step": 29594 }, { "epoch": 0.05247615425464481, "grad_norm": 4.25, "learning_rate": 0.0016267893092197052, "loss": 0.2365, "step": 29596 }, { "epoch": 0.052479700419954624, "grad_norm": 0.412109375, "learning_rate": 0.0016267410502686663, "loss": 0.2132, "step": 29598 }, { "epoch": 0.052483246585264445, "grad_norm": 0.46484375, "learning_rate": 0.0016266927890139682, "loss": 0.2015, "step": 29600 }, { "epoch": 0.05248679275057426, "grad_norm": 2.359375, "learning_rate": 0.0016266445254558212, "loss": 0.2166, "step": 29602 }, { "epoch": 0.052490338915884074, "grad_norm": 0.859375, "learning_rate": 0.0016265962595944374, "loss": 0.1964, "step": 29604 }, { "epoch": 0.05249388508119389, "grad_norm": 0.41015625, "learning_rate": 0.0016265479914300276, "loss": 0.2009, "step": 29606 }, { "epoch": 0.052497431246503704, "grad_norm": 0.4609375, "learning_rate": 0.0016264997209628025, "loss": 0.1939, "step": 29608 }, { "epoch": 0.05250097741181352, "grad_norm": 1.2890625, "learning_rate": 0.0016264514481929735, "loss": 0.2974, "step": 29610 }, { "epoch": 0.05250452357712333, "grad_norm": 0.3046875, "learning_rate": 0.0016264031731207517, "loss": 0.2368, "step": 29612 }, { "epoch": 0.05250806974243315, "grad_norm": 0.2138671875, "learning_rate": 0.0016263548957463482, "loss": 0.2116, "step": 29614 }, { "epoch": 0.05251161590774296, "grad_norm": 0.2353515625, "learning_rate": 0.0016263066160699738, "loss": 0.1626, "step": 29616 }, { "epoch": 0.052515162073052776, "grad_norm": 0.93359375, "learning_rate": 0.0016262583340918401, "loss": 0.1594, "step": 29618 }, { "epoch": 0.05251870823836259, "grad_norm": 0.298828125, "learning_rate": 0.0016262100498121586, "loss": 0.3052, "step": 29620 }, { "epoch": 0.052522254403672405, "grad_norm": 0.96875, "learning_rate": 0.0016261617632311396, "loss": 0.3892, "step": 29622 }, { "epoch": 0.05252580056898223, "grad_norm": 0.345703125, "learning_rate": 0.0016261134743489948, "loss": 0.1799, "step": 29624 }, { "epoch": 0.05252934673429204, "grad_norm": 0.68359375, "learning_rate": 0.0016260651831659352, "loss": 0.1742, "step": 29626 }, { "epoch": 0.052532892899601856, "grad_norm": 0.9453125, "learning_rate": 0.0016260168896821718, "loss": 0.1977, "step": 29628 }, { "epoch": 0.05253643906491167, "grad_norm": 0.34375, "learning_rate": 0.0016259685938979165, "loss": 0.1951, "step": 29630 }, { "epoch": 0.052539985230221485, "grad_norm": 0.5078125, "learning_rate": 0.0016259202958133802, "loss": 0.1511, "step": 29632 }, { "epoch": 0.0525435313955313, "grad_norm": 0.427734375, "learning_rate": 0.0016258719954287736, "loss": 0.1983, "step": 29634 }, { "epoch": 0.052547077560841114, "grad_norm": 0.265625, "learning_rate": 0.0016258236927443083, "loss": 0.4247, "step": 29636 }, { "epoch": 0.05255062372615093, "grad_norm": 0.70703125, "learning_rate": 0.001625775387760196, "loss": 0.3383, "step": 29638 }, { "epoch": 0.05255416989146074, "grad_norm": 0.30078125, "learning_rate": 0.0016257270804766473, "loss": 0.1874, "step": 29640 }, { "epoch": 0.05255771605677056, "grad_norm": 1.1875, "learning_rate": 0.0016256787708938739, "loss": 0.2622, "step": 29642 }, { "epoch": 0.05256126222208037, "grad_norm": 0.470703125, "learning_rate": 0.0016256304590120867, "loss": 0.1846, "step": 29644 }, { "epoch": 0.052564808387390194, "grad_norm": 0.318359375, "learning_rate": 0.001625582144831497, "loss": 0.2108, "step": 29646 }, { "epoch": 0.05256835455270001, "grad_norm": 0.275390625, "learning_rate": 0.0016255338283523168, "loss": 0.1857, "step": 29648 }, { "epoch": 0.05257190071800982, "grad_norm": 0.59375, "learning_rate": 0.0016254855095747564, "loss": 0.2121, "step": 29650 }, { "epoch": 0.05257544688331964, "grad_norm": 0.37109375, "learning_rate": 0.0016254371884990282, "loss": 0.2481, "step": 29652 }, { "epoch": 0.05257899304862945, "grad_norm": 1.0703125, "learning_rate": 0.0016253888651253426, "loss": 0.2126, "step": 29654 }, { "epoch": 0.05258253921393927, "grad_norm": 0.2734375, "learning_rate": 0.0016253405394539115, "loss": 0.2171, "step": 29656 }, { "epoch": 0.05258608537924908, "grad_norm": 0.6640625, "learning_rate": 0.0016252922114849456, "loss": 0.152, "step": 29658 }, { "epoch": 0.052589631544558896, "grad_norm": 0.30078125, "learning_rate": 0.0016252438812186572, "loss": 0.139, "step": 29660 }, { "epoch": 0.05259317770986871, "grad_norm": 0.61328125, "learning_rate": 0.0016251955486552567, "loss": 0.1927, "step": 29662 }, { "epoch": 0.052596723875178525, "grad_norm": 0.609375, "learning_rate": 0.0016251472137949565, "loss": 0.2182, "step": 29664 }, { "epoch": 0.05260027004048834, "grad_norm": 0.2294921875, "learning_rate": 0.001625098876637967, "loss": 0.2097, "step": 29666 }, { "epoch": 0.05260381620579816, "grad_norm": 0.306640625, "learning_rate": 0.0016250505371845, "loss": 0.2595, "step": 29668 }, { "epoch": 0.052607362371107975, "grad_norm": 0.46484375, "learning_rate": 0.001625002195434767, "loss": 0.2114, "step": 29670 }, { "epoch": 0.05261090853641779, "grad_norm": 0.451171875, "learning_rate": 0.0016249538513889797, "loss": 0.2018, "step": 29672 }, { "epoch": 0.052614454701727605, "grad_norm": 0.69140625, "learning_rate": 0.0016249055050473486, "loss": 0.1507, "step": 29674 }, { "epoch": 0.05261800086703742, "grad_norm": 2.15625, "learning_rate": 0.0016248571564100862, "loss": 0.282, "step": 29676 }, { "epoch": 0.052621547032347234, "grad_norm": 0.2412109375, "learning_rate": 0.0016248088054774032, "loss": 0.1664, "step": 29678 }, { "epoch": 0.05262509319765705, "grad_norm": 0.439453125, "learning_rate": 0.0016247604522495115, "loss": 0.1773, "step": 29680 }, { "epoch": 0.05262863936296686, "grad_norm": 0.59375, "learning_rate": 0.0016247120967266225, "loss": 0.1947, "step": 29682 }, { "epoch": 0.05263218552827668, "grad_norm": 0.59765625, "learning_rate": 0.0016246637389089474, "loss": 0.2936, "step": 29684 }, { "epoch": 0.05263573169358649, "grad_norm": 1.171875, "learning_rate": 0.0016246153787966978, "loss": 0.2047, "step": 29686 }, { "epoch": 0.052639277858896306, "grad_norm": 0.640625, "learning_rate": 0.0016245670163900854, "loss": 0.2071, "step": 29688 }, { "epoch": 0.05264282402420612, "grad_norm": 0.498046875, "learning_rate": 0.0016245186516893214, "loss": 0.2114, "step": 29690 }, { "epoch": 0.05264637018951594, "grad_norm": 0.625, "learning_rate": 0.0016244702846946177, "loss": 0.1754, "step": 29692 }, { "epoch": 0.05264991635482576, "grad_norm": 0.58984375, "learning_rate": 0.0016244219154061853, "loss": 0.2319, "step": 29694 }, { "epoch": 0.05265346252013557, "grad_norm": 0.5703125, "learning_rate": 0.0016243735438242364, "loss": 0.3021, "step": 29696 }, { "epoch": 0.052657008685445386, "grad_norm": 0.984375, "learning_rate": 0.0016243251699489822, "loss": 0.2135, "step": 29698 }, { "epoch": 0.0526605548507552, "grad_norm": 2.46875, "learning_rate": 0.0016242767937806341, "loss": 0.3793, "step": 29700 }, { "epoch": 0.052664101016065015, "grad_norm": 0.3203125, "learning_rate": 0.001624228415319404, "loss": 0.1934, "step": 29702 }, { "epoch": 0.05266764718137483, "grad_norm": 0.498046875, "learning_rate": 0.0016241800345655033, "loss": 0.1993, "step": 29704 }, { "epoch": 0.052671193346684644, "grad_norm": 1.890625, "learning_rate": 0.0016241316515191436, "loss": 0.2571, "step": 29706 }, { "epoch": 0.05267473951199446, "grad_norm": 0.80859375, "learning_rate": 0.0016240832661805363, "loss": 0.1905, "step": 29708 }, { "epoch": 0.052678285677304273, "grad_norm": 0.25, "learning_rate": 0.0016240348785498933, "loss": 0.1586, "step": 29710 }, { "epoch": 0.05268183184261409, "grad_norm": 0.236328125, "learning_rate": 0.0016239864886274263, "loss": 0.203, "step": 29712 }, { "epoch": 0.05268537800792391, "grad_norm": 0.3515625, "learning_rate": 0.0016239380964133466, "loss": 0.222, "step": 29714 }, { "epoch": 0.052688924173233724, "grad_norm": 1.3671875, "learning_rate": 0.0016238897019078661, "loss": 0.2967, "step": 29716 }, { "epoch": 0.05269247033854354, "grad_norm": 0.216796875, "learning_rate": 0.0016238413051111963, "loss": 0.1596, "step": 29718 }, { "epoch": 0.05269601650385335, "grad_norm": 0.439453125, "learning_rate": 0.001623792906023549, "loss": 0.1662, "step": 29720 }, { "epoch": 0.05269956266916317, "grad_norm": 0.486328125, "learning_rate": 0.0016237445046451357, "loss": 0.2142, "step": 29722 }, { "epoch": 0.05270310883447298, "grad_norm": 0.51171875, "learning_rate": 0.0016236961009761682, "loss": 0.2055, "step": 29724 }, { "epoch": 0.0527066549997828, "grad_norm": 0.330078125, "learning_rate": 0.001623647695016858, "loss": 0.1847, "step": 29726 }, { "epoch": 0.05271020116509261, "grad_norm": 0.91796875, "learning_rate": 0.0016235992867674172, "loss": 0.2133, "step": 29728 }, { "epoch": 0.052713747330402426, "grad_norm": 0.8984375, "learning_rate": 0.001623550876228057, "loss": 0.2431, "step": 29730 }, { "epoch": 0.05271729349571224, "grad_norm": 0.8671875, "learning_rate": 0.0016235024633989897, "loss": 0.1802, "step": 29732 }, { "epoch": 0.052720839661022055, "grad_norm": 2.59375, "learning_rate": 0.0016234540482804268, "loss": 0.2533, "step": 29734 }, { "epoch": 0.05272438582633188, "grad_norm": 2.71875, "learning_rate": 0.0016234056308725795, "loss": 0.3452, "step": 29736 }, { "epoch": 0.05272793199164169, "grad_norm": 0.6171875, "learning_rate": 0.0016233572111756603, "loss": 0.1522, "step": 29738 }, { "epoch": 0.052731478156951506, "grad_norm": 0.41796875, "learning_rate": 0.0016233087891898805, "loss": 0.2215, "step": 29740 }, { "epoch": 0.05273502432226132, "grad_norm": 0.609375, "learning_rate": 0.0016232603649154523, "loss": 0.3195, "step": 29742 }, { "epoch": 0.052738570487571135, "grad_norm": 0.361328125, "learning_rate": 0.0016232119383525869, "loss": 0.2127, "step": 29744 }, { "epoch": 0.05274211665288095, "grad_norm": 0.58203125, "learning_rate": 0.0016231635095014963, "loss": 0.2001, "step": 29746 }, { "epoch": 0.052745662818190764, "grad_norm": 2.34375, "learning_rate": 0.0016231150783623926, "loss": 0.1826, "step": 29748 }, { "epoch": 0.05274920898350058, "grad_norm": 0.46875, "learning_rate": 0.0016230666449354877, "loss": 0.2034, "step": 29750 }, { "epoch": 0.05275275514881039, "grad_norm": 0.423828125, "learning_rate": 0.0016230182092209927, "loss": 0.185, "step": 29752 }, { "epoch": 0.05275630131412021, "grad_norm": 0.25, "learning_rate": 0.00162296977121912, "loss": 0.1915, "step": 29754 }, { "epoch": 0.05275984747943002, "grad_norm": 0.9609375, "learning_rate": 0.0016229213309300811, "loss": 0.1818, "step": 29756 }, { "epoch": 0.05276339364473984, "grad_norm": 1.015625, "learning_rate": 0.0016228728883540885, "loss": 0.2412, "step": 29758 }, { "epoch": 0.05276693981004966, "grad_norm": 0.3828125, "learning_rate": 0.0016228244434913534, "loss": 0.205, "step": 29760 }, { "epoch": 0.05277048597535947, "grad_norm": 0.263671875, "learning_rate": 0.0016227759963420877, "loss": 0.1706, "step": 29762 }, { "epoch": 0.05277403214066929, "grad_norm": 1.3359375, "learning_rate": 0.0016227275469065034, "loss": 0.226, "step": 29764 }, { "epoch": 0.0527775783059791, "grad_norm": 0.31640625, "learning_rate": 0.0016226790951848129, "loss": 0.1616, "step": 29766 }, { "epoch": 0.052781124471288916, "grad_norm": 0.6484375, "learning_rate": 0.0016226306411772273, "loss": 0.2207, "step": 29768 }, { "epoch": 0.05278467063659873, "grad_norm": 0.640625, "learning_rate": 0.001622582184883959, "loss": 0.2462, "step": 29770 }, { "epoch": 0.052788216801908545, "grad_norm": 0.6640625, "learning_rate": 0.0016225337263052195, "loss": 0.1971, "step": 29772 }, { "epoch": 0.05279176296721836, "grad_norm": 0.2490234375, "learning_rate": 0.0016224852654412214, "loss": 0.2246, "step": 29774 }, { "epoch": 0.052795309132528175, "grad_norm": 0.5703125, "learning_rate": 0.0016224368022921762, "loss": 0.2175, "step": 29776 }, { "epoch": 0.05279885529783799, "grad_norm": 0.66015625, "learning_rate": 0.0016223883368582956, "loss": 0.1955, "step": 29778 }, { "epoch": 0.052802401463147804, "grad_norm": 0.41796875, "learning_rate": 0.0016223398691397922, "loss": 0.2095, "step": 29780 }, { "epoch": 0.052805947628457625, "grad_norm": 0.365234375, "learning_rate": 0.0016222913991368773, "loss": 0.18, "step": 29782 }, { "epoch": 0.05280949379376744, "grad_norm": 0.71875, "learning_rate": 0.0016222429268497633, "loss": 0.1555, "step": 29784 }, { "epoch": 0.052813039959077254, "grad_norm": 0.84765625, "learning_rate": 0.0016221944522786622, "loss": 0.2272, "step": 29786 }, { "epoch": 0.05281658612438707, "grad_norm": 0.5390625, "learning_rate": 0.0016221459754237858, "loss": 0.232, "step": 29788 }, { "epoch": 0.05282013228969688, "grad_norm": 0.55859375, "learning_rate": 0.0016220974962853462, "loss": 0.1653, "step": 29790 }, { "epoch": 0.0528236784550067, "grad_norm": 0.62109375, "learning_rate": 0.0016220490148635555, "loss": 0.1785, "step": 29792 }, { "epoch": 0.05282722462031651, "grad_norm": 1.7734375, "learning_rate": 0.0016220005311586257, "loss": 0.2984, "step": 29794 }, { "epoch": 0.05283077078562633, "grad_norm": 0.380859375, "learning_rate": 0.0016219520451707685, "loss": 0.2497, "step": 29796 }, { "epoch": 0.05283431695093614, "grad_norm": 0.474609375, "learning_rate": 0.0016219035569001966, "loss": 0.2375, "step": 29798 }, { "epoch": 0.052837863116245956, "grad_norm": 2.828125, "learning_rate": 0.0016218550663471213, "loss": 0.3587, "step": 29800 }, { "epoch": 0.05284140928155577, "grad_norm": 0.23828125, "learning_rate": 0.0016218065735117554, "loss": 0.1905, "step": 29802 }, { "epoch": 0.05284495544686559, "grad_norm": 0.53125, "learning_rate": 0.0016217580783943104, "loss": 0.2421, "step": 29804 }, { "epoch": 0.05284850161217541, "grad_norm": 2.0, "learning_rate": 0.0016217095809949987, "loss": 0.2129, "step": 29806 }, { "epoch": 0.05285204777748522, "grad_norm": 0.9609375, "learning_rate": 0.0016216610813140327, "loss": 0.2363, "step": 29808 }, { "epoch": 0.052855593942795036, "grad_norm": 0.3359375, "learning_rate": 0.0016216125793516237, "loss": 0.163, "step": 29810 }, { "epoch": 0.05285914010810485, "grad_norm": 2.0, "learning_rate": 0.0016215640751079845, "loss": 0.2343, "step": 29812 }, { "epoch": 0.052862686273414665, "grad_norm": 0.48828125, "learning_rate": 0.001621515568583327, "loss": 0.1859, "step": 29814 }, { "epoch": 0.05286623243872448, "grad_norm": 0.453125, "learning_rate": 0.0016214670597778633, "loss": 0.2553, "step": 29816 }, { "epoch": 0.052869778604034294, "grad_norm": 0.58203125, "learning_rate": 0.0016214185486918054, "loss": 0.2598, "step": 29818 }, { "epoch": 0.05287332476934411, "grad_norm": 0.52734375, "learning_rate": 0.0016213700353253657, "loss": 0.2664, "step": 29820 }, { "epoch": 0.05287687093465392, "grad_norm": 0.373046875, "learning_rate": 0.0016213215196787564, "loss": 0.2222, "step": 29822 }, { "epoch": 0.05288041709996374, "grad_norm": 1.265625, "learning_rate": 0.0016212730017521897, "loss": 0.2209, "step": 29824 }, { "epoch": 0.05288396326527355, "grad_norm": 0.44921875, "learning_rate": 0.0016212244815458777, "loss": 0.14, "step": 29826 }, { "epoch": 0.052887509430583374, "grad_norm": 0.265625, "learning_rate": 0.0016211759590600327, "loss": 0.1926, "step": 29828 }, { "epoch": 0.05289105559589319, "grad_norm": 1.703125, "learning_rate": 0.0016211274342948661, "loss": 0.2439, "step": 29830 }, { "epoch": 0.052894601761203, "grad_norm": 0.9765625, "learning_rate": 0.0016210789072505917, "loss": 0.1515, "step": 29832 }, { "epoch": 0.05289814792651282, "grad_norm": 0.5546875, "learning_rate": 0.0016210303779274201, "loss": 0.1706, "step": 29834 }, { "epoch": 0.05290169409182263, "grad_norm": 0.361328125, "learning_rate": 0.0016209818463255646, "loss": 0.2086, "step": 29836 }, { "epoch": 0.052905240257132446, "grad_norm": 2.171875, "learning_rate": 0.001620933312445237, "loss": 0.2092, "step": 29838 }, { "epoch": 0.05290878642244226, "grad_norm": 0.98828125, "learning_rate": 0.0016208847762866497, "loss": 0.2348, "step": 29840 }, { "epoch": 0.052912332587752076, "grad_norm": 0.2470703125, "learning_rate": 0.0016208362378500151, "loss": 0.1826, "step": 29842 }, { "epoch": 0.05291587875306189, "grad_norm": 0.482421875, "learning_rate": 0.0016207876971355455, "loss": 0.23, "step": 29844 }, { "epoch": 0.052919424918371705, "grad_norm": 0.361328125, "learning_rate": 0.0016207391541434527, "loss": 0.1895, "step": 29846 }, { "epoch": 0.05292297108368152, "grad_norm": 0.296875, "learning_rate": 0.0016206906088739492, "loss": 0.2238, "step": 29848 }, { "epoch": 0.05292651724899134, "grad_norm": 0.271484375, "learning_rate": 0.0016206420613272476, "loss": 0.1558, "step": 29850 }, { "epoch": 0.052930063414301155, "grad_norm": 0.41796875, "learning_rate": 0.0016205935115035605, "loss": 0.2035, "step": 29852 }, { "epoch": 0.05293360957961097, "grad_norm": 0.3984375, "learning_rate": 0.0016205449594030993, "loss": 0.2166, "step": 29854 }, { "epoch": 0.052937155744920784, "grad_norm": 0.353515625, "learning_rate": 0.0016204964050260768, "loss": 0.3389, "step": 29856 }, { "epoch": 0.0529407019102306, "grad_norm": 1.40625, "learning_rate": 0.0016204478483727053, "loss": 0.1471, "step": 29858 }, { "epoch": 0.052944248075540414, "grad_norm": 0.984375, "learning_rate": 0.0016203992894431973, "loss": 0.2698, "step": 29860 }, { "epoch": 0.05294779424085023, "grad_norm": 0.42578125, "learning_rate": 0.0016203507282377654, "loss": 0.1759, "step": 29862 }, { "epoch": 0.05295134040616004, "grad_norm": 0.54296875, "learning_rate": 0.0016203021647566214, "loss": 0.2376, "step": 29864 }, { "epoch": 0.05295488657146986, "grad_norm": 0.23828125, "learning_rate": 0.001620253598999978, "loss": 0.1783, "step": 29866 }, { "epoch": 0.05295843273677967, "grad_norm": 1.9140625, "learning_rate": 0.0016202050309680476, "loss": 0.2444, "step": 29868 }, { "epoch": 0.052961978902089486, "grad_norm": 1.1328125, "learning_rate": 0.0016201564606610427, "loss": 0.272, "step": 29870 }, { "epoch": 0.05296552506739931, "grad_norm": 0.90234375, "learning_rate": 0.0016201078880791753, "loss": 0.1937, "step": 29872 }, { "epoch": 0.05296907123270912, "grad_norm": 0.55078125, "learning_rate": 0.0016200593132226582, "loss": 0.1605, "step": 29874 }, { "epoch": 0.05297261739801894, "grad_norm": 31.125, "learning_rate": 0.0016200107360917037, "loss": 0.216, "step": 29876 }, { "epoch": 0.05297616356332875, "grad_norm": 1.2265625, "learning_rate": 0.0016199621566865246, "loss": 0.4193, "step": 29878 }, { "epoch": 0.052979709728638566, "grad_norm": 0.423828125, "learning_rate": 0.0016199135750073325, "loss": 0.2499, "step": 29880 }, { "epoch": 0.05298325589394838, "grad_norm": 0.71484375, "learning_rate": 0.0016198649910543407, "loss": 0.1839, "step": 29882 }, { "epoch": 0.052986802059258195, "grad_norm": 0.455078125, "learning_rate": 0.0016198164048277616, "loss": 0.1915, "step": 29884 }, { "epoch": 0.05299034822456801, "grad_norm": 0.80078125, "learning_rate": 0.001619767816327807, "loss": 0.2381, "step": 29886 }, { "epoch": 0.052993894389877824, "grad_norm": 6.5, "learning_rate": 0.0016197192255546907, "loss": 0.2103, "step": 29888 }, { "epoch": 0.05299744055518764, "grad_norm": 0.291015625, "learning_rate": 0.001619670632508624, "loss": 0.1878, "step": 29890 }, { "epoch": 0.05300098672049745, "grad_norm": 0.5546875, "learning_rate": 0.0016196220371898196, "loss": 0.2761, "step": 29892 }, { "epoch": 0.05300453288580727, "grad_norm": 0.29296875, "learning_rate": 0.0016195734395984902, "loss": 0.1884, "step": 29894 }, { "epoch": 0.05300807905111709, "grad_norm": 0.58984375, "learning_rate": 0.0016195248397348487, "loss": 0.2003, "step": 29896 }, { "epoch": 0.053011625216426904, "grad_norm": 0.6796875, "learning_rate": 0.0016194762375991072, "loss": 0.2415, "step": 29898 }, { "epoch": 0.05301517138173672, "grad_norm": 0.44140625, "learning_rate": 0.0016194276331914783, "loss": 0.2148, "step": 29900 }, { "epoch": 0.05301871754704653, "grad_norm": 0.578125, "learning_rate": 0.0016193790265121748, "loss": 0.2437, "step": 29902 }, { "epoch": 0.05302226371235635, "grad_norm": 0.515625, "learning_rate": 0.0016193304175614088, "loss": 0.2223, "step": 29904 }, { "epoch": 0.05302580987766616, "grad_norm": 0.58203125, "learning_rate": 0.0016192818063393937, "loss": 0.4571, "step": 29906 }, { "epoch": 0.05302935604297598, "grad_norm": 0.2421875, "learning_rate": 0.0016192331928463412, "loss": 0.1659, "step": 29908 }, { "epoch": 0.05303290220828579, "grad_norm": 0.58984375, "learning_rate": 0.0016191845770824648, "loss": 0.1907, "step": 29910 }, { "epoch": 0.053036448373595606, "grad_norm": 0.470703125, "learning_rate": 0.001619135959047976, "loss": 0.2333, "step": 29912 }, { "epoch": 0.05303999453890542, "grad_norm": 0.234375, "learning_rate": 0.0016190873387430887, "loss": 0.2752, "step": 29914 }, { "epoch": 0.053043540704215235, "grad_norm": 0.46484375, "learning_rate": 0.0016190387161680144, "loss": 0.1899, "step": 29916 }, { "epoch": 0.053047086869525056, "grad_norm": 1.0625, "learning_rate": 0.0016189900913229666, "loss": 0.1774, "step": 29918 }, { "epoch": 0.05305063303483487, "grad_norm": 0.376953125, "learning_rate": 0.0016189414642081574, "loss": 0.1977, "step": 29920 }, { "epoch": 0.053054179200144685, "grad_norm": 0.498046875, "learning_rate": 0.0016188928348238, "loss": 0.1784, "step": 29922 }, { "epoch": 0.0530577253654545, "grad_norm": 2.015625, "learning_rate": 0.001618844203170106, "loss": 0.2859, "step": 29924 }, { "epoch": 0.053061271530764315, "grad_norm": 0.5234375, "learning_rate": 0.0016187955692472896, "loss": 0.2218, "step": 29926 }, { "epoch": 0.05306481769607413, "grad_norm": 0.2890625, "learning_rate": 0.0016187469330555624, "loss": 0.1677, "step": 29928 }, { "epoch": 0.053068363861383944, "grad_norm": 0.326171875, "learning_rate": 0.0016186982945951376, "loss": 0.2135, "step": 29930 }, { "epoch": 0.05307191002669376, "grad_norm": 0.326171875, "learning_rate": 0.0016186496538662277, "loss": 0.2232, "step": 29932 }, { "epoch": 0.05307545619200357, "grad_norm": 1.8984375, "learning_rate": 0.0016186010108690454, "loss": 0.4095, "step": 29934 }, { "epoch": 0.05307900235731339, "grad_norm": 0.443359375, "learning_rate": 0.0016185523656038034, "loss": 0.2274, "step": 29936 }, { "epoch": 0.0530825485226232, "grad_norm": 2.078125, "learning_rate": 0.001618503718070715, "loss": 0.2847, "step": 29938 }, { "epoch": 0.05308609468793302, "grad_norm": 0.267578125, "learning_rate": 0.001618455068269992, "loss": 0.1717, "step": 29940 }, { "epoch": 0.05308964085324284, "grad_norm": 0.451171875, "learning_rate": 0.001618406416201848, "loss": 0.1974, "step": 29942 }, { "epoch": 0.05309318701855265, "grad_norm": 0.375, "learning_rate": 0.0016183577618664954, "loss": 0.1414, "step": 29944 }, { "epoch": 0.05309673318386247, "grad_norm": 0.50390625, "learning_rate": 0.001618309105264147, "loss": 0.2029, "step": 29946 }, { "epoch": 0.05310027934917228, "grad_norm": 0.400390625, "learning_rate": 0.0016182604463950156, "loss": 0.2113, "step": 29948 }, { "epoch": 0.053103825514482096, "grad_norm": 0.466796875, "learning_rate": 0.0016182117852593142, "loss": 0.1709, "step": 29950 }, { "epoch": 0.05310737167979191, "grad_norm": 0.3828125, "learning_rate": 0.0016181631218572551, "loss": 0.1845, "step": 29952 }, { "epoch": 0.053110917845101725, "grad_norm": 0.6328125, "learning_rate": 0.001618114456189052, "loss": 0.2254, "step": 29954 }, { "epoch": 0.05311446401041154, "grad_norm": 1.21875, "learning_rate": 0.001618065788254917, "loss": 0.2466, "step": 29956 }, { "epoch": 0.053118010175721354, "grad_norm": 0.283203125, "learning_rate": 0.0016180171180550634, "loss": 0.1719, "step": 29958 }, { "epoch": 0.05312155634103117, "grad_norm": 0.474609375, "learning_rate": 0.0016179684455897034, "loss": 0.2249, "step": 29960 }, { "epoch": 0.05312510250634098, "grad_norm": 1.46875, "learning_rate": 0.0016179197708590504, "loss": 0.2285, "step": 29962 }, { "epoch": 0.053128648671650805, "grad_norm": 0.8828125, "learning_rate": 0.0016178710938633174, "loss": 0.19, "step": 29964 }, { "epoch": 0.05313219483696062, "grad_norm": 0.384765625, "learning_rate": 0.001617822414602717, "loss": 0.1839, "step": 29966 }, { "epoch": 0.053135741002270434, "grad_norm": 0.6328125, "learning_rate": 0.001617773733077462, "loss": 0.1799, "step": 29968 }, { "epoch": 0.05313928716758025, "grad_norm": 1.6328125, "learning_rate": 0.0016177250492877655, "loss": 0.3135, "step": 29970 }, { "epoch": 0.05314283333289006, "grad_norm": 0.40625, "learning_rate": 0.0016176763632338403, "loss": 0.1667, "step": 29972 }, { "epoch": 0.05314637949819988, "grad_norm": 0.384765625, "learning_rate": 0.0016176276749158994, "loss": 0.1796, "step": 29974 }, { "epoch": 0.05314992566350969, "grad_norm": 0.50390625, "learning_rate": 0.001617578984334156, "loss": 0.1526, "step": 29976 }, { "epoch": 0.05315347182881951, "grad_norm": 0.408203125, "learning_rate": 0.0016175302914888223, "loss": 0.2037, "step": 29978 }, { "epoch": 0.05315701799412932, "grad_norm": 13.625, "learning_rate": 0.0016174815963801122, "loss": 0.5022, "step": 29980 }, { "epoch": 0.053160564159439136, "grad_norm": 0.375, "learning_rate": 0.0016174328990082379, "loss": 0.1799, "step": 29982 }, { "epoch": 0.05316411032474895, "grad_norm": 0.59765625, "learning_rate": 0.0016173841993734129, "loss": 0.2383, "step": 29984 }, { "epoch": 0.05316765649005877, "grad_norm": 2.765625, "learning_rate": 0.0016173354974758495, "loss": 0.5299, "step": 29986 }, { "epoch": 0.05317120265536859, "grad_norm": 1.5078125, "learning_rate": 0.0016172867933157615, "loss": 0.2048, "step": 29988 }, { "epoch": 0.0531747488206784, "grad_norm": 0.384765625, "learning_rate": 0.0016172380868933614, "loss": 0.2437, "step": 29990 }, { "epoch": 0.053178294985988216, "grad_norm": 3.96875, "learning_rate": 0.0016171893782088624, "loss": 0.2554, "step": 29992 }, { "epoch": 0.05318184115129803, "grad_norm": 0.5, "learning_rate": 0.0016171406672624775, "loss": 0.1529, "step": 29994 }, { "epoch": 0.053185387316607845, "grad_norm": 0.53515625, "learning_rate": 0.0016170919540544196, "loss": 0.2231, "step": 29996 }, { "epoch": 0.05318893348191766, "grad_norm": 0.56640625, "learning_rate": 0.001617043238584902, "loss": 0.2097, "step": 29998 }, { "epoch": 0.053192479647227474, "grad_norm": 0.54296875, "learning_rate": 0.0016169945208541375, "loss": 0.1765, "step": 30000 }, { "epoch": 0.05319602581253729, "grad_norm": 0.267578125, "learning_rate": 0.0016169458008623394, "loss": 0.1791, "step": 30002 }, { "epoch": 0.0531995719778471, "grad_norm": 0.412109375, "learning_rate": 0.0016168970786097205, "loss": 0.519, "step": 30004 }, { "epoch": 0.05320311814315692, "grad_norm": 0.5234375, "learning_rate": 0.001616848354096494, "loss": 0.1665, "step": 30006 }, { "epoch": 0.05320666430846674, "grad_norm": 1.453125, "learning_rate": 0.0016167996273228733, "loss": 0.2041, "step": 30008 }, { "epoch": 0.053210210473776554, "grad_norm": 0.3671875, "learning_rate": 0.0016167508982890709, "loss": 0.1714, "step": 30010 }, { "epoch": 0.05321375663908637, "grad_norm": 0.6015625, "learning_rate": 0.0016167021669953006, "loss": 0.1902, "step": 30012 }, { "epoch": 0.05321730280439618, "grad_norm": 1.015625, "learning_rate": 0.0016166534334417747, "loss": 0.1505, "step": 30014 }, { "epoch": 0.053220848969706, "grad_norm": 0.392578125, "learning_rate": 0.0016166046976287072, "loss": 0.1893, "step": 30016 }, { "epoch": 0.05322439513501581, "grad_norm": 0.89453125, "learning_rate": 0.0016165559595563105, "loss": 0.1841, "step": 30018 }, { "epoch": 0.053227941300325626, "grad_norm": 0.64453125, "learning_rate": 0.0016165072192247986, "loss": 0.1735, "step": 30020 }, { "epoch": 0.05323148746563544, "grad_norm": 2.234375, "learning_rate": 0.0016164584766343833, "loss": 0.2483, "step": 30022 }, { "epoch": 0.053235033630945255, "grad_norm": 0.8984375, "learning_rate": 0.0016164097317852796, "loss": 0.2257, "step": 30024 }, { "epoch": 0.05323857979625507, "grad_norm": 1.8359375, "learning_rate": 0.001616360984677699, "loss": 0.199, "step": 30026 }, { "epoch": 0.053242125961564885, "grad_norm": 0.5078125, "learning_rate": 0.0016163122353118555, "loss": 0.2051, "step": 30028 }, { "epoch": 0.0532456721268747, "grad_norm": 0.75, "learning_rate": 0.0016162634836879622, "loss": 0.2237, "step": 30030 }, { "epoch": 0.05324921829218452, "grad_norm": 0.59765625, "learning_rate": 0.0016162147298062323, "loss": 0.221, "step": 30032 }, { "epoch": 0.053252764457494335, "grad_norm": 0.22265625, "learning_rate": 0.0016161659736668795, "loss": 0.3134, "step": 30034 }, { "epoch": 0.05325631062280415, "grad_norm": 2.59375, "learning_rate": 0.001616117215270116, "loss": 0.241, "step": 30036 }, { "epoch": 0.053259856788113964, "grad_norm": 0.671875, "learning_rate": 0.0016160684546161557, "loss": 0.1838, "step": 30038 }, { "epoch": 0.05326340295342378, "grad_norm": 0.62109375, "learning_rate": 0.001616019691705212, "loss": 0.2569, "step": 30040 }, { "epoch": 0.05326694911873359, "grad_norm": 0.640625, "learning_rate": 0.0016159709265374978, "loss": 0.1877, "step": 30042 }, { "epoch": 0.05327049528404341, "grad_norm": 0.67578125, "learning_rate": 0.0016159221591132262, "loss": 0.284, "step": 30044 }, { "epoch": 0.05327404144935322, "grad_norm": 0.8828125, "learning_rate": 0.001615873389432611, "loss": 0.1867, "step": 30046 }, { "epoch": 0.05327758761466304, "grad_norm": 1.265625, "learning_rate": 0.0016158246174958653, "loss": 0.2916, "step": 30048 }, { "epoch": 0.05328113377997285, "grad_norm": 1.6015625, "learning_rate": 0.001615775843303202, "loss": 0.1687, "step": 30050 }, { "epoch": 0.053284679945282666, "grad_norm": 1.0234375, "learning_rate": 0.0016157270668548352, "loss": 0.135, "step": 30052 }, { "epoch": 0.05328822611059249, "grad_norm": 1.875, "learning_rate": 0.0016156782881509774, "loss": 0.2789, "step": 30054 }, { "epoch": 0.0532917722759023, "grad_norm": 0.84765625, "learning_rate": 0.0016156295071918425, "loss": 0.1831, "step": 30056 }, { "epoch": 0.05329531844121212, "grad_norm": 1.2578125, "learning_rate": 0.0016155807239776438, "loss": 0.2967, "step": 30058 }, { "epoch": 0.05329886460652193, "grad_norm": 0.421875, "learning_rate": 0.0016155319385085943, "loss": 0.2206, "step": 30060 }, { "epoch": 0.053302410771831746, "grad_norm": 1.984375, "learning_rate": 0.0016154831507849078, "loss": 0.1807, "step": 30062 }, { "epoch": 0.05330595693714156, "grad_norm": 1.3515625, "learning_rate": 0.0016154343608067972, "loss": 0.2563, "step": 30064 }, { "epoch": 0.053309503102451375, "grad_norm": 0.953125, "learning_rate": 0.0016153855685744761, "loss": 0.1797, "step": 30066 }, { "epoch": 0.05331304926776119, "grad_norm": 1.6484375, "learning_rate": 0.001615336774088158, "loss": 0.3054, "step": 30068 }, { "epoch": 0.053316595433071004, "grad_norm": 0.216796875, "learning_rate": 0.0016152879773480562, "loss": 0.1894, "step": 30070 }, { "epoch": 0.05332014159838082, "grad_norm": 0.4296875, "learning_rate": 0.0016152391783543839, "loss": 0.2619, "step": 30072 }, { "epoch": 0.05332368776369063, "grad_norm": 0.6015625, "learning_rate": 0.0016151903771073547, "loss": 0.1696, "step": 30074 }, { "epoch": 0.053327233929000455, "grad_norm": 1.2421875, "learning_rate": 0.0016151415736071824, "loss": 0.2048, "step": 30076 }, { "epoch": 0.05333078009431027, "grad_norm": 0.412109375, "learning_rate": 0.0016150927678540798, "loss": 0.1362, "step": 30078 }, { "epoch": 0.053334326259620084, "grad_norm": 0.79296875, "learning_rate": 0.0016150439598482606, "loss": 0.2515, "step": 30080 }, { "epoch": 0.0533378724249299, "grad_norm": 0.28515625, "learning_rate": 0.0016149951495899387, "loss": 0.2648, "step": 30082 }, { "epoch": 0.05334141859023971, "grad_norm": 0.255859375, "learning_rate": 0.0016149463370793266, "loss": 0.2562, "step": 30084 }, { "epoch": 0.05334496475554953, "grad_norm": 0.6640625, "learning_rate": 0.0016148975223166388, "loss": 0.2949, "step": 30086 }, { "epoch": 0.05334851092085934, "grad_norm": 0.91796875, "learning_rate": 0.0016148487053020881, "loss": 0.2062, "step": 30088 }, { "epoch": 0.053352057086169156, "grad_norm": 1.4375, "learning_rate": 0.0016147998860358884, "loss": 0.4868, "step": 30090 }, { "epoch": 0.05335560325147897, "grad_norm": 0.353515625, "learning_rate": 0.001614751064518253, "loss": 0.2011, "step": 30092 }, { "epoch": 0.053359149416788786, "grad_norm": 0.205078125, "learning_rate": 0.001614702240749395, "loss": 0.1211, "step": 30094 }, { "epoch": 0.0533626955820986, "grad_norm": 0.8828125, "learning_rate": 0.001614653414729529, "loss": 0.293, "step": 30096 }, { "epoch": 0.053366241747408415, "grad_norm": 1.0234375, "learning_rate": 0.0016146045864588675, "loss": 0.195, "step": 30098 }, { "epoch": 0.053369787912718236, "grad_norm": 0.318359375, "learning_rate": 0.0016145557559376245, "loss": 0.1289, "step": 30100 }, { "epoch": 0.05337333407802805, "grad_norm": 2.109375, "learning_rate": 0.0016145069231660138, "loss": 0.2366, "step": 30102 }, { "epoch": 0.053376880243337865, "grad_norm": 0.76171875, "learning_rate": 0.0016144580881442484, "loss": 0.1691, "step": 30104 }, { "epoch": 0.05338042640864768, "grad_norm": 0.283203125, "learning_rate": 0.0016144092508725424, "loss": 0.1592, "step": 30106 }, { "epoch": 0.053383972573957494, "grad_norm": 0.515625, "learning_rate": 0.0016143604113511088, "loss": 0.2334, "step": 30108 }, { "epoch": 0.05338751873926731, "grad_norm": 0.28125, "learning_rate": 0.0016143115695801617, "loss": 0.1757, "step": 30110 }, { "epoch": 0.053391064904577124, "grad_norm": 0.578125, "learning_rate": 0.0016142627255599144, "loss": 0.1853, "step": 30112 }, { "epoch": 0.05339461106988694, "grad_norm": 0.357421875, "learning_rate": 0.0016142138792905809, "loss": 0.1895, "step": 30114 }, { "epoch": 0.05339815723519675, "grad_norm": 1.2578125, "learning_rate": 0.0016141650307723745, "loss": 0.1871, "step": 30116 }, { "epoch": 0.05340170340050657, "grad_norm": 0.42578125, "learning_rate": 0.001614116180005509, "loss": 0.2007, "step": 30118 }, { "epoch": 0.05340524956581638, "grad_norm": 1.203125, "learning_rate": 0.0016140673269901978, "loss": 0.205, "step": 30120 }, { "epoch": 0.0534087957311262, "grad_norm": 0.36328125, "learning_rate": 0.0016140184717266547, "loss": 0.3975, "step": 30122 }, { "epoch": 0.05341234189643602, "grad_norm": 0.58203125, "learning_rate": 0.0016139696142150938, "loss": 0.163, "step": 30124 }, { "epoch": 0.05341588806174583, "grad_norm": 0.443359375, "learning_rate": 0.0016139207544557277, "loss": 0.18, "step": 30126 }, { "epoch": 0.05341943422705565, "grad_norm": 1.3515625, "learning_rate": 0.0016138718924487711, "loss": 0.1896, "step": 30128 }, { "epoch": 0.05342298039236546, "grad_norm": 0.8203125, "learning_rate": 0.0016138230281944376, "loss": 0.2709, "step": 30130 }, { "epoch": 0.053426526557675276, "grad_norm": 0.69140625, "learning_rate": 0.0016137741616929402, "loss": 0.2834, "step": 30132 }, { "epoch": 0.05343007272298509, "grad_norm": 0.9921875, "learning_rate": 0.0016137252929444932, "loss": 0.1855, "step": 30134 }, { "epoch": 0.053433618888294905, "grad_norm": 0.25, "learning_rate": 0.0016136764219493102, "loss": 0.1867, "step": 30136 }, { "epoch": 0.05343716505360472, "grad_norm": 0.41796875, "learning_rate": 0.0016136275487076048, "loss": 0.2201, "step": 30138 }, { "epoch": 0.053440711218914534, "grad_norm": 1.953125, "learning_rate": 0.001613578673219591, "loss": 0.287, "step": 30140 }, { "epoch": 0.05344425738422435, "grad_norm": 2.046875, "learning_rate": 0.0016135297954854822, "loss": 0.1829, "step": 30142 }, { "epoch": 0.05344780354953417, "grad_norm": 0.6796875, "learning_rate": 0.0016134809155054926, "loss": 0.1335, "step": 30144 }, { "epoch": 0.053451349714843985, "grad_norm": 2.28125, "learning_rate": 0.0016134320332798356, "loss": 0.2341, "step": 30146 }, { "epoch": 0.0534548958801538, "grad_norm": 1.8671875, "learning_rate": 0.0016133831488087255, "loss": 0.1898, "step": 30148 }, { "epoch": 0.053458442045463614, "grad_norm": 0.6875, "learning_rate": 0.001613334262092375, "loss": 0.1827, "step": 30150 }, { "epoch": 0.05346198821077343, "grad_norm": 0.6328125, "learning_rate": 0.0016132853731309993, "loss": 0.1941, "step": 30152 }, { "epoch": 0.05346553437608324, "grad_norm": 0.63671875, "learning_rate": 0.0016132364819248111, "loss": 0.4495, "step": 30154 }, { "epoch": 0.05346908054139306, "grad_norm": 1.796875, "learning_rate": 0.0016131875884740248, "loss": 0.4899, "step": 30156 }, { "epoch": 0.05347262670670287, "grad_norm": 2.0625, "learning_rate": 0.0016131386927788539, "loss": 0.2207, "step": 30158 }, { "epoch": 0.05347617287201269, "grad_norm": 2.78125, "learning_rate": 0.0016130897948395126, "loss": 0.2931, "step": 30160 }, { "epoch": 0.0534797190373225, "grad_norm": 0.4375, "learning_rate": 0.0016130408946562148, "loss": 0.1933, "step": 30162 }, { "epoch": 0.053483265202632316, "grad_norm": 0.3828125, "learning_rate": 0.0016129919922291739, "loss": 0.152, "step": 30164 }, { "epoch": 0.05348681136794213, "grad_norm": 0.796875, "learning_rate": 0.001612943087558604, "loss": 0.2469, "step": 30166 }, { "epoch": 0.05349035753325195, "grad_norm": 0.51171875, "learning_rate": 0.0016128941806447192, "loss": 0.2912, "step": 30168 }, { "epoch": 0.053493903698561766, "grad_norm": 0.51953125, "learning_rate": 0.0016128452714877328, "loss": 0.1329, "step": 30170 }, { "epoch": 0.05349744986387158, "grad_norm": 0.490234375, "learning_rate": 0.0016127963600878592, "loss": 0.2066, "step": 30172 }, { "epoch": 0.053500996029181395, "grad_norm": 0.94140625, "learning_rate": 0.0016127474464453122, "loss": 0.1938, "step": 30174 }, { "epoch": 0.05350454219449121, "grad_norm": 0.58203125, "learning_rate": 0.0016126985305603056, "loss": 0.2503, "step": 30176 }, { "epoch": 0.053508088359801025, "grad_norm": 0.470703125, "learning_rate": 0.0016126496124330538, "loss": 0.1586, "step": 30178 }, { "epoch": 0.05351163452511084, "grad_norm": 0.890625, "learning_rate": 0.0016126006920637703, "loss": 0.1956, "step": 30180 }, { "epoch": 0.053515180690420654, "grad_norm": 0.66796875, "learning_rate": 0.001612551769452669, "loss": 0.2153, "step": 30182 }, { "epoch": 0.05351872685573047, "grad_norm": 0.30859375, "learning_rate": 0.0016125028445999638, "loss": 0.2065, "step": 30184 }, { "epoch": 0.05352227302104028, "grad_norm": 0.26953125, "learning_rate": 0.001612453917505869, "loss": 0.1392, "step": 30186 }, { "epoch": 0.0535258191863501, "grad_norm": 0.244140625, "learning_rate": 0.0016124049881705985, "loss": 0.1874, "step": 30188 }, { "epoch": 0.05352936535165992, "grad_norm": 1.28125, "learning_rate": 0.0016123560565943662, "loss": 0.3, "step": 30190 }, { "epoch": 0.05353291151696973, "grad_norm": 0.92578125, "learning_rate": 0.0016123071227773862, "loss": 0.2318, "step": 30192 }, { "epoch": 0.05353645768227955, "grad_norm": 1.7734375, "learning_rate": 0.0016122581867198723, "loss": 0.1831, "step": 30194 }, { "epoch": 0.05354000384758936, "grad_norm": 0.72265625, "learning_rate": 0.0016122092484220387, "loss": 0.2002, "step": 30196 }, { "epoch": 0.05354355001289918, "grad_norm": 1.5234375, "learning_rate": 0.0016121603078840994, "loss": 0.3693, "step": 30198 }, { "epoch": 0.05354709617820899, "grad_norm": 1.1171875, "learning_rate": 0.0016121113651062684, "loss": 0.236, "step": 30200 }, { "epoch": 0.053550642343518806, "grad_norm": 1.1640625, "learning_rate": 0.0016120624200887594, "loss": 0.1791, "step": 30202 }, { "epoch": 0.05355418850882862, "grad_norm": 0.33984375, "learning_rate": 0.0016120134728317874, "loss": 0.2037, "step": 30204 }, { "epoch": 0.053557734674138435, "grad_norm": 0.54296875, "learning_rate": 0.0016119645233355654, "loss": 0.2192, "step": 30206 }, { "epoch": 0.05356128083944825, "grad_norm": 0.455078125, "learning_rate": 0.0016119155716003085, "loss": 0.2055, "step": 30208 }, { "epoch": 0.053564827004758064, "grad_norm": 1.53125, "learning_rate": 0.0016118666176262297, "loss": 0.4839, "step": 30210 }, { "epoch": 0.053568373170067886, "grad_norm": 0.828125, "learning_rate": 0.001611817661413544, "loss": 0.1697, "step": 30212 }, { "epoch": 0.0535719193353777, "grad_norm": 1.0234375, "learning_rate": 0.0016117687029624651, "loss": 0.2439, "step": 30214 }, { "epoch": 0.053575465500687515, "grad_norm": 0.416015625, "learning_rate": 0.0016117197422732073, "loss": 0.1705, "step": 30216 }, { "epoch": 0.05357901166599733, "grad_norm": 0.177734375, "learning_rate": 0.0016116707793459842, "loss": 0.1517, "step": 30218 }, { "epoch": 0.053582557831307144, "grad_norm": 0.4140625, "learning_rate": 0.0016116218141810105, "loss": 0.2059, "step": 30220 }, { "epoch": 0.05358610399661696, "grad_norm": 0.625, "learning_rate": 0.0016115728467785, "loss": 0.2248, "step": 30222 }, { "epoch": 0.05358965016192677, "grad_norm": 0.435546875, "learning_rate": 0.0016115238771386676, "loss": 0.24, "step": 30224 }, { "epoch": 0.05359319632723659, "grad_norm": 1.0, "learning_rate": 0.0016114749052617264, "loss": 0.1943, "step": 30226 }, { "epoch": 0.0535967424925464, "grad_norm": 0.8203125, "learning_rate": 0.0016114259311478907, "loss": 0.2096, "step": 30228 }, { "epoch": 0.05360028865785622, "grad_norm": 1.828125, "learning_rate": 0.0016113769547973758, "loss": 0.2475, "step": 30230 }, { "epoch": 0.05360383482316603, "grad_norm": 0.71875, "learning_rate": 0.0016113279762103947, "loss": 0.1708, "step": 30232 }, { "epoch": 0.053607380988475846, "grad_norm": 1.8359375, "learning_rate": 0.0016112789953871623, "loss": 0.2306, "step": 30234 }, { "epoch": 0.05361092715378567, "grad_norm": 1.0390625, "learning_rate": 0.0016112300123278924, "loss": 0.2153, "step": 30236 }, { "epoch": 0.05361447331909548, "grad_norm": 0.400390625, "learning_rate": 0.0016111810270327996, "loss": 0.1798, "step": 30238 }, { "epoch": 0.053618019484405297, "grad_norm": 0.34375, "learning_rate": 0.0016111320395020977, "loss": 0.2075, "step": 30240 }, { "epoch": 0.05362156564971511, "grad_norm": 0.80859375, "learning_rate": 0.001611083049736001, "loss": 0.1727, "step": 30242 }, { "epoch": 0.053625111815024926, "grad_norm": 0.25, "learning_rate": 0.0016110340577347242, "loss": 0.1972, "step": 30244 }, { "epoch": 0.05362865798033474, "grad_norm": 1.9375, "learning_rate": 0.0016109850634984813, "loss": 0.3131, "step": 30246 }, { "epoch": 0.053632204145644555, "grad_norm": 0.77734375, "learning_rate": 0.0016109360670274863, "loss": 0.2086, "step": 30248 }, { "epoch": 0.05363575031095437, "grad_norm": 0.8984375, "learning_rate": 0.001610887068321954, "loss": 0.2136, "step": 30250 }, { "epoch": 0.053639296476264184, "grad_norm": 0.455078125, "learning_rate": 0.0016108380673820985, "loss": 0.3134, "step": 30252 }, { "epoch": 0.053642842641574, "grad_norm": 0.3203125, "learning_rate": 0.0016107890642081337, "loss": 0.2045, "step": 30254 }, { "epoch": 0.05364638880688381, "grad_norm": 2.578125, "learning_rate": 0.0016107400588002742, "loss": 0.3101, "step": 30256 }, { "epoch": 0.053649934972193634, "grad_norm": 0.71875, "learning_rate": 0.0016106910511587344, "loss": 0.277, "step": 30258 }, { "epoch": 0.05365348113750345, "grad_norm": 0.53125, "learning_rate": 0.0016106420412837288, "loss": 0.1468, "step": 30260 }, { "epoch": 0.053657027302813264, "grad_norm": 0.96875, "learning_rate": 0.0016105930291754715, "loss": 0.168, "step": 30262 }, { "epoch": 0.05366057346812308, "grad_norm": 0.3828125, "learning_rate": 0.0016105440148341769, "loss": 0.1657, "step": 30264 }, { "epoch": 0.05366411963343289, "grad_norm": 0.333984375, "learning_rate": 0.0016104949982600591, "loss": 0.2149, "step": 30266 }, { "epoch": 0.05366766579874271, "grad_norm": 0.625, "learning_rate": 0.0016104459794533327, "loss": 0.1907, "step": 30268 }, { "epoch": 0.05367121196405252, "grad_norm": 2.546875, "learning_rate": 0.0016103969584142124, "loss": 0.4859, "step": 30270 }, { "epoch": 0.053674758129362336, "grad_norm": 0.322265625, "learning_rate": 0.0016103479351429121, "loss": 0.2418, "step": 30272 }, { "epoch": 0.05367830429467215, "grad_norm": 3.453125, "learning_rate": 0.0016102989096396466, "loss": 0.2072, "step": 30274 }, { "epoch": 0.053681850459981965, "grad_norm": 0.275390625, "learning_rate": 0.0016102498819046298, "loss": 0.2053, "step": 30276 }, { "epoch": 0.05368539662529178, "grad_norm": 0.4765625, "learning_rate": 0.0016102008519380765, "loss": 0.1824, "step": 30278 }, { "epoch": 0.0536889427906016, "grad_norm": 1.046875, "learning_rate": 0.001610151819740201, "loss": 0.2191, "step": 30280 }, { "epoch": 0.053692488955911416, "grad_norm": 0.23828125, "learning_rate": 0.0016101027853112177, "loss": 0.153, "step": 30282 }, { "epoch": 0.05369603512122123, "grad_norm": 0.279296875, "learning_rate": 0.0016100537486513414, "loss": 0.2053, "step": 30284 }, { "epoch": 0.053699581286531045, "grad_norm": 0.3046875, "learning_rate": 0.001610004709760786, "loss": 0.1755, "step": 30286 }, { "epoch": 0.05370312745184086, "grad_norm": 0.73828125, "learning_rate": 0.0016099556686397663, "loss": 0.3132, "step": 30288 }, { "epoch": 0.053706673617150674, "grad_norm": 0.46875, "learning_rate": 0.001609906625288497, "loss": 0.1979, "step": 30290 }, { "epoch": 0.05371021978246049, "grad_norm": 1.953125, "learning_rate": 0.0016098575797071919, "loss": 0.1899, "step": 30292 }, { "epoch": 0.0537137659477703, "grad_norm": 0.3828125, "learning_rate": 0.0016098085318960662, "loss": 0.1634, "step": 30294 }, { "epoch": 0.05371731211308012, "grad_norm": 1.671875, "learning_rate": 0.0016097594818553337, "loss": 0.173, "step": 30296 }, { "epoch": 0.05372085827838993, "grad_norm": 0.39453125, "learning_rate": 0.0016097104295852097, "loss": 0.139, "step": 30298 }, { "epoch": 0.05372440444369975, "grad_norm": 0.5546875, "learning_rate": 0.0016096613750859084, "loss": 0.1768, "step": 30300 }, { "epoch": 0.05372795060900956, "grad_norm": 0.6015625, "learning_rate": 0.0016096123183576443, "loss": 0.2449, "step": 30302 }, { "epoch": 0.05373149677431938, "grad_norm": 0.4765625, "learning_rate": 0.0016095632594006315, "loss": 0.1766, "step": 30304 }, { "epoch": 0.0537350429396292, "grad_norm": 0.61328125, "learning_rate": 0.0016095141982150854, "loss": 0.1757, "step": 30306 }, { "epoch": 0.05373858910493901, "grad_norm": 0.27734375, "learning_rate": 0.0016094651348012201, "loss": 0.1242, "step": 30308 }, { "epoch": 0.05374213527024883, "grad_norm": 1.09375, "learning_rate": 0.0016094160691592502, "loss": 0.2557, "step": 30310 }, { "epoch": 0.05374568143555864, "grad_norm": 6.9375, "learning_rate": 0.0016093670012893903, "loss": 0.3109, "step": 30312 }, { "epoch": 0.053749227600868456, "grad_norm": 1.375, "learning_rate": 0.0016093179311918548, "loss": 0.3202, "step": 30314 }, { "epoch": 0.05375277376617827, "grad_norm": 0.50390625, "learning_rate": 0.001609268858866859, "loss": 0.1697, "step": 30316 }, { "epoch": 0.053756319931488085, "grad_norm": 0.546875, "learning_rate": 0.0016092197843146168, "loss": 0.1813, "step": 30318 }, { "epoch": 0.0537598660967979, "grad_norm": 0.205078125, "learning_rate": 0.0016091707075353429, "loss": 0.2039, "step": 30320 }, { "epoch": 0.053763412262107714, "grad_norm": 0.283203125, "learning_rate": 0.0016091216285292521, "loss": 0.2334, "step": 30322 }, { "epoch": 0.05376695842741753, "grad_norm": 0.44921875, "learning_rate": 0.0016090725472965592, "loss": 0.1955, "step": 30324 }, { "epoch": 0.05377050459272735, "grad_norm": 0.404296875, "learning_rate": 0.0016090234638374786, "loss": 0.1502, "step": 30326 }, { "epoch": 0.053774050758037165, "grad_norm": 0.4609375, "learning_rate": 0.001608974378152225, "loss": 0.4652, "step": 30328 }, { "epoch": 0.05377759692334698, "grad_norm": 0.45703125, "learning_rate": 0.0016089252902410133, "loss": 0.2246, "step": 30330 }, { "epoch": 0.053781143088656794, "grad_norm": 0.5234375, "learning_rate": 0.0016088762001040579, "loss": 0.1789, "step": 30332 }, { "epoch": 0.05378468925396661, "grad_norm": 0.640625, "learning_rate": 0.0016088271077415734, "loss": 0.2032, "step": 30334 }, { "epoch": 0.05378823541927642, "grad_norm": 1.671875, "learning_rate": 0.001608778013153775, "loss": 0.2143, "step": 30336 }, { "epoch": 0.05379178158458624, "grad_norm": 1.7890625, "learning_rate": 0.0016087289163408771, "loss": 0.2351, "step": 30338 }, { "epoch": 0.05379532774989605, "grad_norm": 0.1953125, "learning_rate": 0.0016086798173030941, "loss": 0.1614, "step": 30340 }, { "epoch": 0.053798873915205866, "grad_norm": 0.27734375, "learning_rate": 0.0016086307160406413, "loss": 0.1267, "step": 30342 }, { "epoch": 0.05380242008051568, "grad_norm": 0.51953125, "learning_rate": 0.0016085816125537332, "loss": 0.18, "step": 30344 }, { "epoch": 0.053805966245825496, "grad_norm": 0.45703125, "learning_rate": 0.0016085325068425844, "loss": 0.2146, "step": 30346 }, { "epoch": 0.05380951241113532, "grad_norm": 0.45703125, "learning_rate": 0.0016084833989074099, "loss": 0.1895, "step": 30348 }, { "epoch": 0.05381305857644513, "grad_norm": 0.8671875, "learning_rate": 0.0016084342887484242, "loss": 0.186, "step": 30350 }, { "epoch": 0.053816604741754946, "grad_norm": 2.84375, "learning_rate": 0.0016083851763658429, "loss": 0.3351, "step": 30352 }, { "epoch": 0.05382015090706476, "grad_norm": 1.421875, "learning_rate": 0.0016083360617598793, "loss": 0.4249, "step": 30354 }, { "epoch": 0.053823697072374575, "grad_norm": 0.369140625, "learning_rate": 0.0016082869449307493, "loss": 0.197, "step": 30356 }, { "epoch": 0.05382724323768439, "grad_norm": 0.419921875, "learning_rate": 0.0016082378258786675, "loss": 0.2066, "step": 30358 }, { "epoch": 0.053830789402994204, "grad_norm": 0.765625, "learning_rate": 0.001608188704603849, "loss": 0.3675, "step": 30360 }, { "epoch": 0.05383433556830402, "grad_norm": 0.251953125, "learning_rate": 0.0016081395811065077, "loss": 0.1596, "step": 30362 }, { "epoch": 0.053837881733613834, "grad_norm": 0.68359375, "learning_rate": 0.0016080904553868597, "loss": 0.1661, "step": 30364 }, { "epoch": 0.05384142789892365, "grad_norm": 0.27734375, "learning_rate": 0.001608041327445119, "loss": 0.1953, "step": 30366 }, { "epoch": 0.05384497406423346, "grad_norm": 0.26953125, "learning_rate": 0.0016079921972815003, "loss": 0.1929, "step": 30368 }, { "epoch": 0.05384852022954328, "grad_norm": 0.431640625, "learning_rate": 0.0016079430648962192, "loss": 0.2732, "step": 30370 }, { "epoch": 0.0538520663948531, "grad_norm": 0.306640625, "learning_rate": 0.00160789393028949, "loss": 0.1648, "step": 30372 }, { "epoch": 0.05385561256016291, "grad_norm": 1.6640625, "learning_rate": 0.0016078447934615274, "loss": 0.2648, "step": 30374 }, { "epoch": 0.05385915872547273, "grad_norm": 0.6484375, "learning_rate": 0.0016077956544125471, "loss": 0.2382, "step": 30376 }, { "epoch": 0.05386270489078254, "grad_norm": 0.435546875, "learning_rate": 0.0016077465131427635, "loss": 0.1946, "step": 30378 }, { "epoch": 0.05386625105609236, "grad_norm": 0.90234375, "learning_rate": 0.0016076973696523915, "loss": 0.3324, "step": 30380 }, { "epoch": 0.05386979722140217, "grad_norm": 0.298828125, "learning_rate": 0.0016076482239416462, "loss": 0.2954, "step": 30382 }, { "epoch": 0.053873343386711986, "grad_norm": 0.203125, "learning_rate": 0.0016075990760107425, "loss": 0.1574, "step": 30384 }, { "epoch": 0.0538768895520218, "grad_norm": 2.15625, "learning_rate": 0.0016075499258598953, "loss": 0.2176, "step": 30386 }, { "epoch": 0.053880435717331615, "grad_norm": 0.373046875, "learning_rate": 0.0016075007734893195, "loss": 0.2247, "step": 30388 }, { "epoch": 0.05388398188264143, "grad_norm": 0.71875, "learning_rate": 0.0016074516188992298, "loss": 0.1882, "step": 30390 }, { "epoch": 0.053887528047951244, "grad_norm": 1.203125, "learning_rate": 0.001607402462089842, "loss": 0.1884, "step": 30392 }, { "epoch": 0.053891074213261066, "grad_norm": 1.859375, "learning_rate": 0.00160735330306137, "loss": 0.3325, "step": 30394 }, { "epoch": 0.05389462037857088, "grad_norm": 0.255859375, "learning_rate": 0.0016073041418140299, "loss": 0.1664, "step": 30396 }, { "epoch": 0.053898166543880695, "grad_norm": 0.26171875, "learning_rate": 0.0016072549783480358, "loss": 0.1435, "step": 30398 }, { "epoch": 0.05390171270919051, "grad_norm": 0.451171875, "learning_rate": 0.001607205812663603, "loss": 0.2192, "step": 30400 }, { "epoch": 0.053905258874500324, "grad_norm": 0.453125, "learning_rate": 0.0016071566447609469, "loss": 0.2505, "step": 30402 }, { "epoch": 0.05390880503981014, "grad_norm": 0.43359375, "learning_rate": 0.0016071074746402824, "loss": 0.2431, "step": 30404 }, { "epoch": 0.05391235120511995, "grad_norm": 1.3671875, "learning_rate": 0.0016070583023018242, "loss": 0.2181, "step": 30406 }, { "epoch": 0.05391589737042977, "grad_norm": 0.369140625, "learning_rate": 0.0016070091277457872, "loss": 0.2545, "step": 30408 }, { "epoch": 0.05391944353573958, "grad_norm": 1.21875, "learning_rate": 0.001606959950972387, "loss": 0.2087, "step": 30410 }, { "epoch": 0.0539229897010494, "grad_norm": 0.58203125, "learning_rate": 0.0016069107719818383, "loss": 0.3003, "step": 30412 }, { "epoch": 0.05392653586635921, "grad_norm": 0.2275390625, "learning_rate": 0.0016068615907743567, "loss": 0.2239, "step": 30414 }, { "epoch": 0.05393008203166903, "grad_norm": 0.734375, "learning_rate": 0.0016068124073501564, "loss": 0.1933, "step": 30416 }, { "epoch": 0.05393362819697885, "grad_norm": 0.267578125, "learning_rate": 0.0016067632217094537, "loss": 0.2393, "step": 30418 }, { "epoch": 0.05393717436228866, "grad_norm": 0.6171875, "learning_rate": 0.0016067140338524625, "loss": 0.1425, "step": 30420 }, { "epoch": 0.053940720527598476, "grad_norm": 0.283203125, "learning_rate": 0.0016066648437793986, "loss": 0.1718, "step": 30422 }, { "epoch": 0.05394426669290829, "grad_norm": 0.54296875, "learning_rate": 0.001606615651490477, "loss": 0.2131, "step": 30424 }, { "epoch": 0.053947812858218105, "grad_norm": 0.447265625, "learning_rate": 0.001606566456985913, "loss": 0.1607, "step": 30426 }, { "epoch": 0.05395135902352792, "grad_norm": 0.359375, "learning_rate": 0.001606517260265921, "loss": 0.1724, "step": 30428 }, { "epoch": 0.053954905188837735, "grad_norm": 0.54296875, "learning_rate": 0.0016064680613307172, "loss": 0.2221, "step": 30430 }, { "epoch": 0.05395845135414755, "grad_norm": 0.3125, "learning_rate": 0.0016064188601805163, "loss": 0.1986, "step": 30432 }, { "epoch": 0.053961997519457364, "grad_norm": 0.7890625, "learning_rate": 0.0016063696568155332, "loss": 0.2529, "step": 30434 }, { "epoch": 0.05396554368476718, "grad_norm": 0.5625, "learning_rate": 0.0016063204512359835, "loss": 0.2338, "step": 30436 }, { "epoch": 0.05396908985007699, "grad_norm": 0.4453125, "learning_rate": 0.0016062712434420823, "loss": 0.2036, "step": 30438 }, { "epoch": 0.053972636015386814, "grad_norm": 1.015625, "learning_rate": 0.0016062220334340446, "loss": 0.2063, "step": 30440 }, { "epoch": 0.05397618218069663, "grad_norm": 0.2236328125, "learning_rate": 0.0016061728212120863, "loss": 0.2124, "step": 30442 }, { "epoch": 0.05397972834600644, "grad_norm": 0.251953125, "learning_rate": 0.0016061236067764215, "loss": 0.1872, "step": 30444 }, { "epoch": 0.05398327451131626, "grad_norm": 0.267578125, "learning_rate": 0.001606074390127266, "loss": 0.1405, "step": 30446 }, { "epoch": 0.05398682067662607, "grad_norm": 1.4296875, "learning_rate": 0.0016060251712648355, "loss": 0.3397, "step": 30448 }, { "epoch": 0.05399036684193589, "grad_norm": 0.51171875, "learning_rate": 0.0016059759501893447, "loss": 0.1771, "step": 30450 }, { "epoch": 0.0539939130072457, "grad_norm": 0.3125, "learning_rate": 0.001605926726901009, "loss": 0.1875, "step": 30452 }, { "epoch": 0.053997459172555516, "grad_norm": 0.95703125, "learning_rate": 0.0016058775014000436, "loss": 0.1682, "step": 30454 }, { "epoch": 0.05400100533786533, "grad_norm": 0.671875, "learning_rate": 0.001605828273686664, "loss": 0.1876, "step": 30456 }, { "epoch": 0.054004551503175145, "grad_norm": 0.8125, "learning_rate": 0.0016057790437610851, "loss": 0.2482, "step": 30458 }, { "epoch": 0.05400809766848496, "grad_norm": 0.85546875, "learning_rate": 0.0016057298116235227, "loss": 0.2488, "step": 30460 }, { "epoch": 0.05401164383379478, "grad_norm": 0.23828125, "learning_rate": 0.0016056805772741915, "loss": 0.136, "step": 30462 }, { "epoch": 0.054015189999104596, "grad_norm": 0.7421875, "learning_rate": 0.0016056313407133076, "loss": 0.2024, "step": 30464 }, { "epoch": 0.05401873616441441, "grad_norm": 0.671875, "learning_rate": 0.0016055821019410857, "loss": 0.1844, "step": 30466 }, { "epoch": 0.054022282329724225, "grad_norm": 0.82421875, "learning_rate": 0.0016055328609577413, "loss": 0.2291, "step": 30468 }, { "epoch": 0.05402582849503404, "grad_norm": 0.5078125, "learning_rate": 0.00160548361776349, "loss": 0.1829, "step": 30470 }, { "epoch": 0.054029374660343854, "grad_norm": 0.66015625, "learning_rate": 0.0016054343723585472, "loss": 0.2912, "step": 30472 }, { "epoch": 0.05403292082565367, "grad_norm": 0.208984375, "learning_rate": 0.0016053851247431275, "loss": 0.1754, "step": 30474 }, { "epoch": 0.05403646699096348, "grad_norm": 0.244140625, "learning_rate": 0.001605335874917447, "loss": 0.1896, "step": 30476 }, { "epoch": 0.0540400131562733, "grad_norm": 0.359375, "learning_rate": 0.001605286622881721, "loss": 0.2124, "step": 30478 }, { "epoch": 0.05404355932158311, "grad_norm": 0.36328125, "learning_rate": 0.001605237368636165, "loss": 0.2255, "step": 30480 }, { "epoch": 0.05404710548689293, "grad_norm": 0.41796875, "learning_rate": 0.001605188112180994, "loss": 0.2725, "step": 30482 }, { "epoch": 0.05405065165220275, "grad_norm": 0.39453125, "learning_rate": 0.0016051388535164236, "loss": 0.2462, "step": 30484 }, { "epoch": 0.05405419781751256, "grad_norm": 0.5390625, "learning_rate": 0.0016050895926426694, "loss": 0.2302, "step": 30486 }, { "epoch": 0.05405774398282238, "grad_norm": 0.734375, "learning_rate": 0.0016050403295599467, "loss": 0.2155, "step": 30488 }, { "epoch": 0.05406129014813219, "grad_norm": 1.7421875, "learning_rate": 0.0016049910642684708, "loss": 0.3307, "step": 30490 }, { "epoch": 0.054064836313442007, "grad_norm": 0.74609375, "learning_rate": 0.0016049417967684573, "loss": 0.448, "step": 30492 }, { "epoch": 0.05406838247875182, "grad_norm": 1.2265625, "learning_rate": 0.001604892527060122, "loss": 0.1616, "step": 30494 }, { "epoch": 0.054071928644061636, "grad_norm": 0.66796875, "learning_rate": 0.00160484325514368, "loss": 0.232, "step": 30496 }, { "epoch": 0.05407547480937145, "grad_norm": 0.255859375, "learning_rate": 0.0016047939810193467, "loss": 0.1466, "step": 30498 }, { "epoch": 0.054079020974681265, "grad_norm": 0.400390625, "learning_rate": 0.0016047447046873377, "loss": 0.1971, "step": 30500 }, { "epoch": 0.05408256713999108, "grad_norm": 1.6796875, "learning_rate": 0.0016046954261478687, "loss": 0.2503, "step": 30502 }, { "epoch": 0.054086113305300894, "grad_norm": 0.33203125, "learning_rate": 0.0016046461454011552, "loss": 0.1445, "step": 30504 }, { "epoch": 0.05408965947061071, "grad_norm": 0.419921875, "learning_rate": 0.0016045968624474123, "loss": 0.2752, "step": 30506 }, { "epoch": 0.05409320563592053, "grad_norm": 0.20703125, "learning_rate": 0.0016045475772868561, "loss": 0.1763, "step": 30508 }, { "epoch": 0.054096751801230344, "grad_norm": 0.2353515625, "learning_rate": 0.0016044982899197015, "loss": 0.2575, "step": 30510 }, { "epoch": 0.05410029796654016, "grad_norm": 0.388671875, "learning_rate": 0.001604449000346165, "loss": 0.2194, "step": 30512 }, { "epoch": 0.054103844131849974, "grad_norm": 0.453125, "learning_rate": 0.001604399708566461, "loss": 0.237, "step": 30514 }, { "epoch": 0.05410739029715979, "grad_norm": 0.234375, "learning_rate": 0.0016043504145808061, "loss": 0.1969, "step": 30516 }, { "epoch": 0.0541109364624696, "grad_norm": 0.30859375, "learning_rate": 0.0016043011183894153, "loss": 0.1605, "step": 30518 }, { "epoch": 0.05411448262777942, "grad_norm": 2.109375, "learning_rate": 0.001604251819992504, "loss": 0.2011, "step": 30520 }, { "epoch": 0.05411802879308923, "grad_norm": 0.318359375, "learning_rate": 0.0016042025193902885, "loss": 0.2311, "step": 30522 }, { "epoch": 0.054121574958399046, "grad_norm": 1.4765625, "learning_rate": 0.001604153216582984, "loss": 0.2702, "step": 30524 }, { "epoch": 0.05412512112370886, "grad_norm": 0.3671875, "learning_rate": 0.0016041039115708062, "loss": 0.1629, "step": 30526 }, { "epoch": 0.054128667289018675, "grad_norm": 0.490234375, "learning_rate": 0.0016040546043539707, "loss": 0.2284, "step": 30528 }, { "epoch": 0.0541322134543285, "grad_norm": 0.55859375, "learning_rate": 0.001604005294932693, "loss": 0.3466, "step": 30530 }, { "epoch": 0.05413575961963831, "grad_norm": 0.51953125, "learning_rate": 0.0016039559833071892, "loss": 0.2195, "step": 30532 }, { "epoch": 0.054139305784948126, "grad_norm": 0.373046875, "learning_rate": 0.0016039066694776742, "loss": 0.2143, "step": 30534 }, { "epoch": 0.05414285195025794, "grad_norm": 0.3515625, "learning_rate": 0.0016038573534443647, "loss": 0.2293, "step": 30536 }, { "epoch": 0.054146398115567755, "grad_norm": 0.9140625, "learning_rate": 0.0016038080352074754, "loss": 0.2483, "step": 30538 }, { "epoch": 0.05414994428087757, "grad_norm": 1.6015625, "learning_rate": 0.0016037587147672229, "loss": 0.3931, "step": 30540 }, { "epoch": 0.054153490446187384, "grad_norm": 0.240234375, "learning_rate": 0.0016037093921238215, "loss": 0.2129, "step": 30542 }, { "epoch": 0.0541570366114972, "grad_norm": 0.333984375, "learning_rate": 0.0016036600672774885, "loss": 0.2643, "step": 30544 }, { "epoch": 0.05416058277680701, "grad_norm": 1.078125, "learning_rate": 0.0016036107402284388, "loss": 0.2304, "step": 30546 }, { "epoch": 0.05416412894211683, "grad_norm": 0.61328125, "learning_rate": 0.0016035614109768882, "loss": 0.2067, "step": 30548 }, { "epoch": 0.05416767510742664, "grad_norm": 0.474609375, "learning_rate": 0.0016035120795230527, "loss": 0.1914, "step": 30550 }, { "epoch": 0.054171221272736464, "grad_norm": 0.55078125, "learning_rate": 0.0016034627458671475, "loss": 0.2372, "step": 30552 }, { "epoch": 0.05417476743804628, "grad_norm": 0.1435546875, "learning_rate": 0.001603413410009389, "loss": 0.1996, "step": 30554 }, { "epoch": 0.05417831360335609, "grad_norm": 0.8671875, "learning_rate": 0.0016033640719499925, "loss": 0.3408, "step": 30556 }, { "epoch": 0.05418185976866591, "grad_norm": 0.29296875, "learning_rate": 0.0016033147316891739, "loss": 0.1869, "step": 30558 }, { "epoch": 0.05418540593397572, "grad_norm": 1.2109375, "learning_rate": 0.001603265389227149, "loss": 0.1841, "step": 30560 }, { "epoch": 0.05418895209928554, "grad_norm": 0.52734375, "learning_rate": 0.0016032160445641338, "loss": 0.2004, "step": 30562 }, { "epoch": 0.05419249826459535, "grad_norm": 0.5234375, "learning_rate": 0.0016031666977003435, "loss": 0.2071, "step": 30564 }, { "epoch": 0.054196044429905166, "grad_norm": 0.54296875, "learning_rate": 0.0016031173486359947, "loss": 0.1733, "step": 30566 }, { "epoch": 0.05419959059521498, "grad_norm": 0.40234375, "learning_rate": 0.0016030679973713029, "loss": 0.1807, "step": 30568 }, { "epoch": 0.054203136760524795, "grad_norm": 1.234375, "learning_rate": 0.0016030186439064838, "loss": 0.3036, "step": 30570 }, { "epoch": 0.05420668292583461, "grad_norm": 0.63671875, "learning_rate": 0.0016029692882417535, "loss": 0.1561, "step": 30572 }, { "epoch": 0.054210229091144424, "grad_norm": 0.57421875, "learning_rate": 0.0016029199303773274, "loss": 0.234, "step": 30574 }, { "epoch": 0.054213775256454245, "grad_norm": 2.8125, "learning_rate": 0.0016028705703134219, "loss": 0.2713, "step": 30576 }, { "epoch": 0.05421732142176406, "grad_norm": 0.2001953125, "learning_rate": 0.0016028212080502527, "loss": 0.2623, "step": 30578 }, { "epoch": 0.054220867587073875, "grad_norm": 0.640625, "learning_rate": 0.0016027718435880352, "loss": 0.2357, "step": 30580 }, { "epoch": 0.05422441375238369, "grad_norm": 0.275390625, "learning_rate": 0.001602722476926986, "loss": 0.2578, "step": 30582 }, { "epoch": 0.054227959917693504, "grad_norm": 0.373046875, "learning_rate": 0.0016026731080673205, "loss": 0.1961, "step": 30584 }, { "epoch": 0.05423150608300332, "grad_norm": 0.1474609375, "learning_rate": 0.0016026237370092551, "loss": 0.1226, "step": 30586 }, { "epoch": 0.05423505224831313, "grad_norm": 0.322265625, "learning_rate": 0.0016025743637530052, "loss": 0.2679, "step": 30588 }, { "epoch": 0.05423859841362295, "grad_norm": 0.494140625, "learning_rate": 0.0016025249882987871, "loss": 0.1528, "step": 30590 }, { "epoch": 0.05424214457893276, "grad_norm": 0.75, "learning_rate": 0.0016024756106468164, "loss": 0.2176, "step": 30592 }, { "epoch": 0.054245690744242576, "grad_norm": 0.25390625, "learning_rate": 0.0016024262307973095, "loss": 0.1601, "step": 30594 }, { "epoch": 0.05424923690955239, "grad_norm": 0.408203125, "learning_rate": 0.0016023768487504822, "loss": 0.3534, "step": 30596 }, { "epoch": 0.05425278307486221, "grad_norm": 0.369140625, "learning_rate": 0.00160232746450655, "loss": 0.207, "step": 30598 }, { "epoch": 0.05425632924017203, "grad_norm": 0.6796875, "learning_rate": 0.0016022780780657296, "loss": 0.3981, "step": 30600 }, { "epoch": 0.05425987540548184, "grad_norm": 0.7734375, "learning_rate": 0.0016022286894282365, "loss": 0.1895, "step": 30602 }, { "epoch": 0.054263421570791656, "grad_norm": 0.5703125, "learning_rate": 0.0016021792985942867, "loss": 0.2045, "step": 30604 }, { "epoch": 0.05426696773610147, "grad_norm": 0.47265625, "learning_rate": 0.0016021299055640965, "loss": 0.3999, "step": 30606 }, { "epoch": 0.054270513901411285, "grad_norm": 3.140625, "learning_rate": 0.0016020805103378818, "loss": 0.2054, "step": 30608 }, { "epoch": 0.0542740600667211, "grad_norm": 0.455078125, "learning_rate": 0.0016020311129158586, "loss": 0.2272, "step": 30610 }, { "epoch": 0.054277606232030914, "grad_norm": 1.75, "learning_rate": 0.0016019817132982427, "loss": 0.2382, "step": 30612 }, { "epoch": 0.05428115239734073, "grad_norm": 2.1875, "learning_rate": 0.0016019323114852505, "loss": 0.2293, "step": 30614 }, { "epoch": 0.054284698562650543, "grad_norm": 0.1796875, "learning_rate": 0.001601882907477098, "loss": 0.2867, "step": 30616 }, { "epoch": 0.05428824472796036, "grad_norm": 0.546875, "learning_rate": 0.001601833501274001, "loss": 0.2205, "step": 30618 }, { "epoch": 0.05429179089327018, "grad_norm": 0.859375, "learning_rate": 0.001601784092876176, "loss": 0.1531, "step": 30620 }, { "epoch": 0.054295337058579994, "grad_norm": 0.5546875, "learning_rate": 0.001601734682283839, "loss": 0.2055, "step": 30622 }, { "epoch": 0.05429888322388981, "grad_norm": 0.232421875, "learning_rate": 0.0016016852694972054, "loss": 0.1628, "step": 30624 }, { "epoch": 0.05430242938919962, "grad_norm": 0.4140625, "learning_rate": 0.0016016358545164924, "loss": 0.2544, "step": 30626 }, { "epoch": 0.05430597555450944, "grad_norm": 1.5234375, "learning_rate": 0.0016015864373419154, "loss": 0.2836, "step": 30628 }, { "epoch": 0.05430952171981925, "grad_norm": 0.466796875, "learning_rate": 0.0016015370179736906, "loss": 0.1937, "step": 30630 }, { "epoch": 0.05431306788512907, "grad_norm": 0.498046875, "learning_rate": 0.0016014875964120339, "loss": 0.1982, "step": 30632 }, { "epoch": 0.05431661405043888, "grad_norm": 0.5625, "learning_rate": 0.0016014381726571624, "loss": 0.1966, "step": 30634 }, { "epoch": 0.054320160215748696, "grad_norm": 0.435546875, "learning_rate": 0.001601388746709291, "loss": 0.227, "step": 30636 }, { "epoch": 0.05432370638105851, "grad_norm": 0.314453125, "learning_rate": 0.001601339318568637, "loss": 0.1799, "step": 30638 }, { "epoch": 0.054327252546368325, "grad_norm": 0.21484375, "learning_rate": 0.0016012898882354154, "loss": 0.1448, "step": 30640 }, { "epoch": 0.05433079871167814, "grad_norm": 0.6328125, "learning_rate": 0.0016012404557098434, "loss": 0.2403, "step": 30642 }, { "epoch": 0.05433434487698796, "grad_norm": 0.33203125, "learning_rate": 0.001601191020992137, "loss": 0.1722, "step": 30644 }, { "epoch": 0.054337891042297776, "grad_norm": 1.2734375, "learning_rate": 0.0016011415840825117, "loss": 0.2315, "step": 30646 }, { "epoch": 0.05434143720760759, "grad_norm": 2.671875, "learning_rate": 0.0016010921449811843, "loss": 0.2808, "step": 30648 }, { "epoch": 0.054344983372917405, "grad_norm": 0.39453125, "learning_rate": 0.0016010427036883712, "loss": 0.2065, "step": 30650 }, { "epoch": 0.05434852953822722, "grad_norm": 0.890625, "learning_rate": 0.0016009932602042884, "loss": 0.3017, "step": 30652 }, { "epoch": 0.054352075703537034, "grad_norm": 2.734375, "learning_rate": 0.0016009438145291519, "loss": 0.1957, "step": 30654 }, { "epoch": 0.05435562186884685, "grad_norm": 0.240234375, "learning_rate": 0.0016008943666631784, "loss": 0.1674, "step": 30656 }, { "epoch": 0.05435916803415666, "grad_norm": 1.2421875, "learning_rate": 0.0016008449166065837, "loss": 0.2269, "step": 30658 }, { "epoch": 0.05436271419946648, "grad_norm": 0.66015625, "learning_rate": 0.0016007954643595841, "loss": 0.2626, "step": 30660 }, { "epoch": 0.05436626036477629, "grad_norm": 0.5390625, "learning_rate": 0.0016007460099223961, "loss": 0.1403, "step": 30662 }, { "epoch": 0.05436980653008611, "grad_norm": 0.94140625, "learning_rate": 0.001600696553295236, "loss": 0.4077, "step": 30664 }, { "epoch": 0.05437335269539593, "grad_norm": 0.8125, "learning_rate": 0.00160064709447832, "loss": 0.2254, "step": 30666 }, { "epoch": 0.05437689886070574, "grad_norm": 0.5234375, "learning_rate": 0.0016005976334718646, "loss": 0.206, "step": 30668 }, { "epoch": 0.05438044502601556, "grad_norm": 0.2490234375, "learning_rate": 0.0016005481702760856, "loss": 0.2006, "step": 30670 }, { "epoch": 0.05438399119132537, "grad_norm": 0.392578125, "learning_rate": 0.0016004987048911996, "loss": 0.2185, "step": 30672 }, { "epoch": 0.054387537356635186, "grad_norm": 0.259765625, "learning_rate": 0.0016004492373174234, "loss": 0.1875, "step": 30674 }, { "epoch": 0.054391083521945, "grad_norm": 0.75, "learning_rate": 0.0016003997675549729, "loss": 0.1695, "step": 30676 }, { "epoch": 0.054394629687254815, "grad_norm": 0.30859375, "learning_rate": 0.0016003502956040642, "loss": 0.1778, "step": 30678 }, { "epoch": 0.05439817585256463, "grad_norm": 0.83203125, "learning_rate": 0.0016003008214649141, "loss": 0.197, "step": 30680 }, { "epoch": 0.054401722017874445, "grad_norm": 0.2412109375, "learning_rate": 0.0016002513451377387, "loss": 0.1892, "step": 30682 }, { "epoch": 0.05440526818318426, "grad_norm": 2.59375, "learning_rate": 0.0016002018666227548, "loss": 0.2213, "step": 30684 }, { "epoch": 0.054408814348494074, "grad_norm": 0.30859375, "learning_rate": 0.001600152385920178, "loss": 0.2284, "step": 30686 }, { "epoch": 0.054412360513803895, "grad_norm": 0.96875, "learning_rate": 0.0016001029030302254, "loss": 0.2105, "step": 30688 }, { "epoch": 0.05441590667911371, "grad_norm": 0.482421875, "learning_rate": 0.0016000534179531132, "loss": 0.1829, "step": 30690 }, { "epoch": 0.054419452844423524, "grad_norm": 0.205078125, "learning_rate": 0.0016000039306890582, "loss": 0.1809, "step": 30692 }, { "epoch": 0.05442299900973334, "grad_norm": 0.6015625, "learning_rate": 0.0015999544412382757, "loss": 0.2531, "step": 30694 }, { "epoch": 0.05442654517504315, "grad_norm": 1.1875, "learning_rate": 0.0015999049496009836, "loss": 0.3955, "step": 30696 }, { "epoch": 0.05443009134035297, "grad_norm": 0.4765625, "learning_rate": 0.0015998554557773972, "loss": 0.2357, "step": 30698 }, { "epoch": 0.05443363750566278, "grad_norm": 3.21875, "learning_rate": 0.0015998059597677338, "loss": 0.3937, "step": 30700 }, { "epoch": 0.0544371836709726, "grad_norm": 0.6484375, "learning_rate": 0.0015997564615722092, "loss": 0.2372, "step": 30702 }, { "epoch": 0.05444072983628241, "grad_norm": 0.3515625, "learning_rate": 0.00159970696119104, "loss": 0.2309, "step": 30704 }, { "epoch": 0.054444276001592226, "grad_norm": 0.4375, "learning_rate": 0.0015996574586244429, "loss": 0.2106, "step": 30706 }, { "epoch": 0.05444782216690204, "grad_norm": 0.60546875, "learning_rate": 0.0015996079538726342, "loss": 0.2356, "step": 30708 }, { "epoch": 0.054451368332211855, "grad_norm": 0.291015625, "learning_rate": 0.0015995584469358304, "loss": 0.1994, "step": 30710 }, { "epoch": 0.05445491449752168, "grad_norm": 0.54296875, "learning_rate": 0.0015995089378142485, "loss": 0.181, "step": 30712 }, { "epoch": 0.05445846066283149, "grad_norm": 0.80078125, "learning_rate": 0.0015994594265081048, "loss": 0.3128, "step": 30714 }, { "epoch": 0.054462006828141306, "grad_norm": 0.38671875, "learning_rate": 0.0015994099130176153, "loss": 0.2318, "step": 30716 }, { "epoch": 0.05446555299345112, "grad_norm": 0.330078125, "learning_rate": 0.0015993603973429968, "loss": 0.1595, "step": 30718 }, { "epoch": 0.054469099158760935, "grad_norm": 0.6484375, "learning_rate": 0.0015993108794844666, "loss": 0.1893, "step": 30720 }, { "epoch": 0.05447264532407075, "grad_norm": 0.31640625, "learning_rate": 0.0015992613594422403, "loss": 0.2067, "step": 30722 }, { "epoch": 0.054476191489380564, "grad_norm": 0.279296875, "learning_rate": 0.0015992118372165347, "loss": 0.1463, "step": 30724 }, { "epoch": 0.05447973765469038, "grad_norm": 0.80859375, "learning_rate": 0.0015991623128075664, "loss": 0.1859, "step": 30726 }, { "epoch": 0.05448328382000019, "grad_norm": 1.8359375, "learning_rate": 0.0015991127862155525, "loss": 0.1553, "step": 30728 }, { "epoch": 0.05448682998531001, "grad_norm": 0.38671875, "learning_rate": 0.001599063257440709, "loss": 0.2474, "step": 30730 }, { "epoch": 0.05449037615061982, "grad_norm": 0.46875, "learning_rate": 0.0015990137264832525, "loss": 0.1908, "step": 30732 }, { "epoch": 0.054493922315929644, "grad_norm": 0.68359375, "learning_rate": 0.0015989641933434001, "loss": 0.2176, "step": 30734 }, { "epoch": 0.05449746848123946, "grad_norm": 0.63671875, "learning_rate": 0.0015989146580213678, "loss": 0.2691, "step": 30736 }, { "epoch": 0.05450101464654927, "grad_norm": 0.259765625, "learning_rate": 0.001598865120517373, "loss": 0.2022, "step": 30738 }, { "epoch": 0.05450456081185909, "grad_norm": 0.384765625, "learning_rate": 0.0015988155808316317, "loss": 0.1979, "step": 30740 }, { "epoch": 0.0545081069771689, "grad_norm": 0.5, "learning_rate": 0.001598766038964361, "loss": 0.2762, "step": 30742 }, { "epoch": 0.054511653142478717, "grad_norm": 0.466796875, "learning_rate": 0.001598716494915777, "loss": 0.13, "step": 30744 }, { "epoch": 0.05451519930778853, "grad_norm": 1.015625, "learning_rate": 0.0015986669486860972, "loss": 0.2554, "step": 30746 }, { "epoch": 0.054518745473098346, "grad_norm": 0.734375, "learning_rate": 0.0015986174002755376, "loss": 0.4373, "step": 30748 }, { "epoch": 0.05452229163840816, "grad_norm": 1.8125, "learning_rate": 0.0015985678496843155, "loss": 0.3827, "step": 30750 }, { "epoch": 0.054525837803717975, "grad_norm": 0.80859375, "learning_rate": 0.0015985182969126465, "loss": 0.1969, "step": 30752 }, { "epoch": 0.05452938396902779, "grad_norm": 0.5859375, "learning_rate": 0.0015984687419607485, "loss": 0.3428, "step": 30754 }, { "epoch": 0.05453293013433761, "grad_norm": 0.50390625, "learning_rate": 0.0015984191848288374, "loss": 0.2079, "step": 30756 }, { "epoch": 0.054536476299647425, "grad_norm": 0.2099609375, "learning_rate": 0.001598369625517131, "loss": 0.1919, "step": 30758 }, { "epoch": 0.05454002246495724, "grad_norm": 0.4296875, "learning_rate": 0.0015983200640258446, "loss": 0.2519, "step": 30760 }, { "epoch": 0.054543568630267054, "grad_norm": 1.6484375, "learning_rate": 0.0015982705003551962, "loss": 0.184, "step": 30762 }, { "epoch": 0.05454711479557687, "grad_norm": 0.255859375, "learning_rate": 0.0015982209345054017, "loss": 0.1823, "step": 30764 }, { "epoch": 0.054550660960886684, "grad_norm": 0.466796875, "learning_rate": 0.0015981713664766784, "loss": 0.1766, "step": 30766 }, { "epoch": 0.0545542071261965, "grad_norm": 1.875, "learning_rate": 0.0015981217962692431, "loss": 0.1635, "step": 30768 }, { "epoch": 0.05455775329150631, "grad_norm": 0.5234375, "learning_rate": 0.0015980722238833124, "loss": 0.2603, "step": 30770 }, { "epoch": 0.05456129945681613, "grad_norm": 0.4140625, "learning_rate": 0.0015980226493191028, "loss": 0.2398, "step": 30772 }, { "epoch": 0.05456484562212594, "grad_norm": 0.54296875, "learning_rate": 0.0015979730725768316, "loss": 0.1806, "step": 30774 }, { "epoch": 0.054568391787435756, "grad_norm": 0.73828125, "learning_rate": 0.0015979234936567153, "loss": 0.1864, "step": 30776 }, { "epoch": 0.05457193795274557, "grad_norm": 0.419921875, "learning_rate": 0.0015978739125589712, "loss": 0.2021, "step": 30778 }, { "epoch": 0.05457548411805539, "grad_norm": 2.453125, "learning_rate": 0.0015978243292838156, "loss": 0.2341, "step": 30780 }, { "epoch": 0.05457903028336521, "grad_norm": 0.9140625, "learning_rate": 0.0015977747438314653, "loss": 0.2685, "step": 30782 }, { "epoch": 0.05458257644867502, "grad_norm": 0.2021484375, "learning_rate": 0.0015977251562021375, "loss": 0.138, "step": 30784 }, { "epoch": 0.054586122613984836, "grad_norm": 0.451171875, "learning_rate": 0.0015976755663960492, "loss": 0.1853, "step": 30786 }, { "epoch": 0.05458966877929465, "grad_norm": 0.1630859375, "learning_rate": 0.001597625974413417, "loss": 0.1989, "step": 30788 }, { "epoch": 0.054593214944604465, "grad_norm": 0.515625, "learning_rate": 0.0015975763802544578, "loss": 0.2289, "step": 30790 }, { "epoch": 0.05459676110991428, "grad_norm": 0.44140625, "learning_rate": 0.0015975267839193885, "loss": 0.2058, "step": 30792 }, { "epoch": 0.054600307275224094, "grad_norm": 0.7578125, "learning_rate": 0.0015974771854084262, "loss": 0.2445, "step": 30794 }, { "epoch": 0.05460385344053391, "grad_norm": 0.55859375, "learning_rate": 0.0015974275847217877, "loss": 0.1863, "step": 30796 }, { "epoch": 0.05460739960584372, "grad_norm": 0.59765625, "learning_rate": 0.0015973779818596898, "loss": 0.1561, "step": 30798 }, { "epoch": 0.05461094577115354, "grad_norm": 1.328125, "learning_rate": 0.0015973283768223492, "loss": 0.1938, "step": 30800 }, { "epoch": 0.05461449193646336, "grad_norm": 0.35546875, "learning_rate": 0.0015972787696099835, "loss": 0.2547, "step": 30802 }, { "epoch": 0.054618038101773174, "grad_norm": 0.2734375, "learning_rate": 0.0015972291602228091, "loss": 0.2128, "step": 30804 }, { "epoch": 0.05462158426708299, "grad_norm": 0.251953125, "learning_rate": 0.0015971795486610437, "loss": 0.1922, "step": 30806 }, { "epoch": 0.0546251304323928, "grad_norm": 0.5234375, "learning_rate": 0.0015971299349249034, "loss": 0.1627, "step": 30808 }, { "epoch": 0.05462867659770262, "grad_norm": 0.63671875, "learning_rate": 0.0015970803190146054, "loss": 0.1893, "step": 30810 }, { "epoch": 0.05463222276301243, "grad_norm": 0.39453125, "learning_rate": 0.0015970307009303669, "loss": 0.2252, "step": 30812 }, { "epoch": 0.05463576892832225, "grad_norm": 0.44140625, "learning_rate": 0.0015969810806724049, "loss": 0.1882, "step": 30814 }, { "epoch": 0.05463931509363206, "grad_norm": 0.359375, "learning_rate": 0.0015969314582409363, "loss": 0.1928, "step": 30816 }, { "epoch": 0.054642861258941876, "grad_norm": 0.353515625, "learning_rate": 0.001596881833636178, "loss": 0.1994, "step": 30818 }, { "epoch": 0.05464640742425169, "grad_norm": 0.5390625, "learning_rate": 0.0015968322068583474, "loss": 0.2247, "step": 30820 }, { "epoch": 0.054649953589561505, "grad_norm": 0.388671875, "learning_rate": 0.0015967825779076613, "loss": 0.1842, "step": 30822 }, { "epoch": 0.054653499754871326, "grad_norm": 0.296875, "learning_rate": 0.0015967329467843365, "loss": 0.13, "step": 30824 }, { "epoch": 0.05465704592018114, "grad_norm": 0.66015625, "learning_rate": 0.0015966833134885906, "loss": 0.2091, "step": 30826 }, { "epoch": 0.054660592085490955, "grad_norm": 0.59375, "learning_rate": 0.0015966336780206404, "loss": 0.1823, "step": 30828 }, { "epoch": 0.05466413825080077, "grad_norm": 6.0, "learning_rate": 0.0015965840403807028, "loss": 0.3909, "step": 30830 }, { "epoch": 0.054667684416110585, "grad_norm": 0.462890625, "learning_rate": 0.001596534400568995, "loss": 0.138, "step": 30832 }, { "epoch": 0.0546712305814204, "grad_norm": 0.2490234375, "learning_rate": 0.0015964847585857348, "loss": 0.162, "step": 30834 }, { "epoch": 0.054674776746730214, "grad_norm": 0.73046875, "learning_rate": 0.0015964351144311377, "loss": 0.1973, "step": 30836 }, { "epoch": 0.05467832291204003, "grad_norm": 0.66015625, "learning_rate": 0.0015963854681054226, "loss": 0.1741, "step": 30838 }, { "epoch": 0.05468186907734984, "grad_norm": 0.51953125, "learning_rate": 0.0015963358196088055, "loss": 0.1636, "step": 30840 }, { "epoch": 0.05468541524265966, "grad_norm": 0.279296875, "learning_rate": 0.0015962861689415035, "loss": 0.1693, "step": 30842 }, { "epoch": 0.05468896140796947, "grad_norm": 0.2041015625, "learning_rate": 0.0015962365161037344, "loss": 0.201, "step": 30844 }, { "epoch": 0.054692507573279286, "grad_norm": 1.234375, "learning_rate": 0.001596186861095715, "loss": 0.1767, "step": 30846 }, { "epoch": 0.05469605373858911, "grad_norm": 0.63671875, "learning_rate": 0.0015961372039176626, "loss": 0.2443, "step": 30848 }, { "epoch": 0.05469959990389892, "grad_norm": 1.0625, "learning_rate": 0.001596087544569794, "loss": 0.2177, "step": 30850 }, { "epoch": 0.05470314606920874, "grad_norm": 2.140625, "learning_rate": 0.0015960378830523268, "loss": 0.2076, "step": 30852 }, { "epoch": 0.05470669223451855, "grad_norm": 5.0, "learning_rate": 0.0015959882193654783, "loss": 0.2235, "step": 30854 }, { "epoch": 0.054710238399828366, "grad_norm": 0.82421875, "learning_rate": 0.0015959385535094651, "loss": 0.2175, "step": 30856 }, { "epoch": 0.05471378456513818, "grad_norm": 1.1953125, "learning_rate": 0.0015958888854845047, "loss": 0.2108, "step": 30858 }, { "epoch": 0.054717330730447995, "grad_norm": 0.5390625, "learning_rate": 0.0015958392152908144, "loss": 0.2801, "step": 30860 }, { "epoch": 0.05472087689575781, "grad_norm": 0.58984375, "learning_rate": 0.0015957895429286116, "loss": 0.2091, "step": 30862 }, { "epoch": 0.054724423061067624, "grad_norm": 0.201171875, "learning_rate": 0.001595739868398113, "loss": 0.1528, "step": 30864 }, { "epoch": 0.05472796922637744, "grad_norm": 0.15625, "learning_rate": 0.0015956901916995363, "loss": 0.1844, "step": 30866 }, { "epoch": 0.054731515391687253, "grad_norm": 1.1796875, "learning_rate": 0.0015956405128330986, "loss": 0.2183, "step": 30868 }, { "epoch": 0.054735061556997075, "grad_norm": 1.5234375, "learning_rate": 0.0015955908317990173, "loss": 0.2666, "step": 30870 }, { "epoch": 0.05473860772230689, "grad_norm": 0.177734375, "learning_rate": 0.0015955411485975097, "loss": 0.2277, "step": 30872 }, { "epoch": 0.054742153887616704, "grad_norm": 0.2578125, "learning_rate": 0.0015954914632287927, "loss": 0.1711, "step": 30874 }, { "epoch": 0.05474570005292652, "grad_norm": 0.50390625, "learning_rate": 0.0015954417756930842, "loss": 0.3657, "step": 30876 }, { "epoch": 0.05474924621823633, "grad_norm": 0.380859375, "learning_rate": 0.0015953920859906007, "loss": 0.2191, "step": 30878 }, { "epoch": 0.05475279238354615, "grad_norm": 0.28515625, "learning_rate": 0.0015953423941215603, "loss": 0.1717, "step": 30880 }, { "epoch": 0.05475633854885596, "grad_norm": 0.2001953125, "learning_rate": 0.0015952927000861797, "loss": 0.173, "step": 30882 }, { "epoch": 0.05475988471416578, "grad_norm": 0.703125, "learning_rate": 0.0015952430038846767, "loss": 0.1923, "step": 30884 }, { "epoch": 0.05476343087947559, "grad_norm": 0.310546875, "learning_rate": 0.0015951933055172688, "loss": 0.1547, "step": 30886 }, { "epoch": 0.054766977044785406, "grad_norm": 0.60546875, "learning_rate": 0.0015951436049841726, "loss": 0.2774, "step": 30888 }, { "epoch": 0.05477052321009522, "grad_norm": 0.359375, "learning_rate": 0.001595093902285606, "loss": 0.1818, "step": 30890 }, { "epoch": 0.05477406937540504, "grad_norm": 0.58984375, "learning_rate": 0.0015950441974217863, "loss": 0.2363, "step": 30892 }, { "epoch": 0.05477761554071486, "grad_norm": 0.58203125, "learning_rate": 0.0015949944903929304, "loss": 0.1686, "step": 30894 }, { "epoch": 0.05478116170602467, "grad_norm": 0.21875, "learning_rate": 0.0015949447811992568, "loss": 0.1653, "step": 30896 }, { "epoch": 0.054784707871334486, "grad_norm": 0.341796875, "learning_rate": 0.0015948950698409818, "loss": 0.1901, "step": 30898 }, { "epoch": 0.0547882540366443, "grad_norm": 0.28125, "learning_rate": 0.0015948453563183235, "loss": 0.2011, "step": 30900 }, { "epoch": 0.054791800201954115, "grad_norm": 0.29296875, "learning_rate": 0.001594795640631499, "loss": 0.1974, "step": 30902 }, { "epoch": 0.05479534636726393, "grad_norm": 0.58203125, "learning_rate": 0.0015947459227807258, "loss": 0.2192, "step": 30904 }, { "epoch": 0.054798892532573744, "grad_norm": 0.2216796875, "learning_rate": 0.0015946962027662212, "loss": 0.1855, "step": 30906 }, { "epoch": 0.05480243869788356, "grad_norm": 0.28515625, "learning_rate": 0.0015946464805882026, "loss": 0.1567, "step": 30908 }, { "epoch": 0.05480598486319337, "grad_norm": 0.48046875, "learning_rate": 0.001594596756246888, "loss": 0.3898, "step": 30910 }, { "epoch": 0.05480953102850319, "grad_norm": 0.56640625, "learning_rate": 0.0015945470297424943, "loss": 0.2634, "step": 30912 }, { "epoch": 0.054813077193813, "grad_norm": 0.310546875, "learning_rate": 0.0015944973010752388, "loss": 0.181, "step": 30914 }, { "epoch": 0.054816623359122824, "grad_norm": 1.0859375, "learning_rate": 0.00159444757024534, "loss": 0.1752, "step": 30916 }, { "epoch": 0.05482016952443264, "grad_norm": 0.45703125, "learning_rate": 0.0015943978372530143, "loss": 0.2169, "step": 30918 }, { "epoch": 0.05482371568974245, "grad_norm": 0.5546875, "learning_rate": 0.0015943481020984796, "loss": 0.3005, "step": 30920 }, { "epoch": 0.05482726185505227, "grad_norm": 0.5703125, "learning_rate": 0.001594298364781954, "loss": 0.4046, "step": 30922 }, { "epoch": 0.05483080802036208, "grad_norm": 0.1962890625, "learning_rate": 0.0015942486253036538, "loss": 0.1589, "step": 30924 }, { "epoch": 0.054834354185671896, "grad_norm": 0.22265625, "learning_rate": 0.0015941988836637975, "loss": 0.1734, "step": 30926 }, { "epoch": 0.05483790035098171, "grad_norm": 1.5390625, "learning_rate": 0.0015941491398626026, "loss": 0.2812, "step": 30928 }, { "epoch": 0.054841446516291525, "grad_norm": 0.25390625, "learning_rate": 0.0015940993939002862, "loss": 0.3145, "step": 30930 }, { "epoch": 0.05484499268160134, "grad_norm": 0.65234375, "learning_rate": 0.0015940496457770659, "loss": 0.18, "step": 30932 }, { "epoch": 0.054848538846911155, "grad_norm": 0.3046875, "learning_rate": 0.0015939998954931597, "loss": 0.1777, "step": 30934 }, { "epoch": 0.05485208501222097, "grad_norm": 5.46875, "learning_rate": 0.0015939501430487846, "loss": 0.2138, "step": 30936 }, { "epoch": 0.05485563117753079, "grad_norm": 0.6484375, "learning_rate": 0.0015939003884441587, "loss": 0.2471, "step": 30938 }, { "epoch": 0.054859177342840605, "grad_norm": 0.90234375, "learning_rate": 0.0015938506316794992, "loss": 0.1951, "step": 30940 }, { "epoch": 0.05486272350815042, "grad_norm": 1.1171875, "learning_rate": 0.001593800872755024, "loss": 0.2983, "step": 30942 }, { "epoch": 0.054866269673460234, "grad_norm": 0.2109375, "learning_rate": 0.0015937511116709507, "loss": 0.1538, "step": 30944 }, { "epoch": 0.05486981583877005, "grad_norm": 0.62109375, "learning_rate": 0.0015937013484274966, "loss": 0.2228, "step": 30946 }, { "epoch": 0.05487336200407986, "grad_norm": 0.42578125, "learning_rate": 0.0015936515830248799, "loss": 0.2227, "step": 30948 }, { "epoch": 0.05487690816938968, "grad_norm": 0.40234375, "learning_rate": 0.001593601815463318, "loss": 0.1814, "step": 30950 }, { "epoch": 0.05488045433469949, "grad_norm": 0.6015625, "learning_rate": 0.001593552045743028, "loss": 0.1935, "step": 30952 }, { "epoch": 0.05488400050000931, "grad_norm": 0.421875, "learning_rate": 0.0015935022738642287, "loss": 0.168, "step": 30954 }, { "epoch": 0.05488754666531912, "grad_norm": 0.234375, "learning_rate": 0.0015934524998271367, "loss": 0.1774, "step": 30956 }, { "epoch": 0.054891092830628936, "grad_norm": 0.50390625, "learning_rate": 0.0015934027236319702, "loss": 0.2093, "step": 30958 }, { "epoch": 0.05489463899593876, "grad_norm": 1.953125, "learning_rate": 0.0015933529452789464, "loss": 0.189, "step": 30960 }, { "epoch": 0.05489818516124857, "grad_norm": 0.349609375, "learning_rate": 0.0015933031647682838, "loss": 0.2227, "step": 30962 }, { "epoch": 0.05490173132655839, "grad_norm": 0.85546875, "learning_rate": 0.0015932533821001996, "loss": 0.2278, "step": 30964 }, { "epoch": 0.0549052774918682, "grad_norm": 0.3046875, "learning_rate": 0.0015932035972749117, "loss": 0.1872, "step": 30966 }, { "epoch": 0.054908823657178016, "grad_norm": 0.7734375, "learning_rate": 0.0015931538102926376, "loss": 0.2553, "step": 30968 }, { "epoch": 0.05491236982248783, "grad_norm": 0.2138671875, "learning_rate": 0.0015931040211535953, "loss": 0.1753, "step": 30970 }, { "epoch": 0.054915915987797645, "grad_norm": 0.3203125, "learning_rate": 0.0015930542298580024, "loss": 0.1487, "step": 30972 }, { "epoch": 0.05491946215310746, "grad_norm": 1.1953125, "learning_rate": 0.0015930044364060767, "loss": 0.2449, "step": 30974 }, { "epoch": 0.054923008318417274, "grad_norm": 0.189453125, "learning_rate": 0.0015929546407980359, "loss": 0.2014, "step": 30976 }, { "epoch": 0.05492655448372709, "grad_norm": 0.6640625, "learning_rate": 0.001592904843034098, "loss": 0.2501, "step": 30978 }, { "epoch": 0.0549301006490369, "grad_norm": 0.5078125, "learning_rate": 0.0015928550431144805, "loss": 0.1558, "step": 30980 }, { "epoch": 0.05493364681434672, "grad_norm": 0.421875, "learning_rate": 0.0015928052410394014, "loss": 0.2331, "step": 30982 }, { "epoch": 0.05493719297965654, "grad_norm": 1.0625, "learning_rate": 0.0015927554368090784, "loss": 0.1704, "step": 30984 }, { "epoch": 0.054940739144966354, "grad_norm": 0.32421875, "learning_rate": 0.0015927056304237293, "loss": 0.1964, "step": 30986 }, { "epoch": 0.05494428531027617, "grad_norm": 0.55078125, "learning_rate": 0.001592655821883572, "loss": 0.2062, "step": 30988 }, { "epoch": 0.05494783147558598, "grad_norm": 0.53515625, "learning_rate": 0.0015926060111888245, "loss": 0.1996, "step": 30990 }, { "epoch": 0.0549513776408958, "grad_norm": 0.29296875, "learning_rate": 0.0015925561983397042, "loss": 0.1684, "step": 30992 }, { "epoch": 0.05495492380620561, "grad_norm": 0.451171875, "learning_rate": 0.0015925063833364293, "loss": 0.1903, "step": 30994 }, { "epoch": 0.054958469971515427, "grad_norm": 0.251953125, "learning_rate": 0.0015924565661792173, "loss": 0.1503, "step": 30996 }, { "epoch": 0.05496201613682524, "grad_norm": 0.234375, "learning_rate": 0.0015924067468682866, "loss": 0.1574, "step": 30998 }, { "epoch": 0.054965562302135056, "grad_norm": 0.2177734375, "learning_rate": 0.0015923569254038547, "loss": 0.3123, "step": 31000 }, { "epoch": 0.05496910846744487, "grad_norm": 0.44921875, "learning_rate": 0.0015923071017861397, "loss": 0.1684, "step": 31002 }, { "epoch": 0.054972654632754685, "grad_norm": 0.30859375, "learning_rate": 0.0015922572760153598, "loss": 0.2031, "step": 31004 }, { "epoch": 0.054976200798064506, "grad_norm": 0.75390625, "learning_rate": 0.0015922074480917317, "loss": 0.2126, "step": 31006 }, { "epoch": 0.05497974696337432, "grad_norm": 1.390625, "learning_rate": 0.0015921576180154748, "loss": 0.2323, "step": 31008 }, { "epoch": 0.054983293128684135, "grad_norm": 0.3125, "learning_rate": 0.001592107785786806, "loss": 0.172, "step": 31010 }, { "epoch": 0.05498683929399395, "grad_norm": 0.6875, "learning_rate": 0.0015920579514059438, "loss": 0.3915, "step": 31012 }, { "epoch": 0.054990385459303764, "grad_norm": 0.19921875, "learning_rate": 0.0015920081148731056, "loss": 0.158, "step": 31014 }, { "epoch": 0.05499393162461358, "grad_norm": 0.55859375, "learning_rate": 0.00159195827618851, "loss": 0.2471, "step": 31016 }, { "epoch": 0.054997477789923394, "grad_norm": 0.5625, "learning_rate": 0.0015919084353523745, "loss": 0.3113, "step": 31018 }, { "epoch": 0.05500102395523321, "grad_norm": 0.173828125, "learning_rate": 0.0015918585923649173, "loss": 0.1557, "step": 31020 }, { "epoch": 0.05500457012054302, "grad_norm": 0.35546875, "learning_rate": 0.0015918087472263564, "loss": 0.2761, "step": 31022 }, { "epoch": 0.05500811628585284, "grad_norm": 0.43359375, "learning_rate": 0.0015917588999369097, "loss": 0.201, "step": 31024 }, { "epoch": 0.05501166245116265, "grad_norm": 0.412109375, "learning_rate": 0.001591709050496795, "loss": 0.223, "step": 31026 }, { "epoch": 0.05501520861647247, "grad_norm": 0.400390625, "learning_rate": 0.001591659198906231, "loss": 0.2484, "step": 31028 }, { "epoch": 0.05501875478178229, "grad_norm": 1.5390625, "learning_rate": 0.0015916093451654348, "loss": 0.2643, "step": 31030 }, { "epoch": 0.0550223009470921, "grad_norm": 0.1962890625, "learning_rate": 0.0015915594892746251, "loss": 0.1781, "step": 31032 }, { "epoch": 0.05502584711240192, "grad_norm": 0.51171875, "learning_rate": 0.0015915096312340195, "loss": 0.2634, "step": 31034 }, { "epoch": 0.05502939327771173, "grad_norm": 0.49609375, "learning_rate": 0.0015914597710438366, "loss": 0.2137, "step": 31036 }, { "epoch": 0.055032939443021546, "grad_norm": 0.24609375, "learning_rate": 0.001591409908704294, "loss": 0.2319, "step": 31038 }, { "epoch": 0.05503648560833136, "grad_norm": 0.92578125, "learning_rate": 0.00159136004421561, "loss": 0.2546, "step": 31040 }, { "epoch": 0.055040031773641175, "grad_norm": 1.6171875, "learning_rate": 0.0015913101775780023, "loss": 0.1825, "step": 31042 }, { "epoch": 0.05504357793895099, "grad_norm": 0.58203125, "learning_rate": 0.0015912603087916894, "loss": 0.2534, "step": 31044 }, { "epoch": 0.055047124104260804, "grad_norm": 0.28125, "learning_rate": 0.0015912104378568894, "loss": 0.1701, "step": 31046 }, { "epoch": 0.05505067026957062, "grad_norm": 1.1328125, "learning_rate": 0.00159116056477382, "loss": 0.2973, "step": 31048 }, { "epoch": 0.05505421643488043, "grad_norm": 0.1923828125, "learning_rate": 0.0015911106895427, "loss": 0.1551, "step": 31050 }, { "epoch": 0.055057762600190255, "grad_norm": 0.51953125, "learning_rate": 0.001591060812163747, "loss": 0.18, "step": 31052 }, { "epoch": 0.05506130876550007, "grad_norm": 1.421875, "learning_rate": 0.001591010932637179, "loss": 0.3355, "step": 31054 }, { "epoch": 0.055064854930809884, "grad_norm": 0.73046875, "learning_rate": 0.0015909610509632147, "loss": 0.2099, "step": 31056 }, { "epoch": 0.0550684010961197, "grad_norm": 0.2158203125, "learning_rate": 0.0015909111671420716, "loss": 0.2108, "step": 31058 }, { "epoch": 0.05507194726142951, "grad_norm": 0.42578125, "learning_rate": 0.0015908612811739685, "loss": 0.1775, "step": 31060 }, { "epoch": 0.05507549342673933, "grad_norm": 2.03125, "learning_rate": 0.001590811393059123, "loss": 0.212, "step": 31062 }, { "epoch": 0.05507903959204914, "grad_norm": 0.2578125, "learning_rate": 0.0015907615027977537, "loss": 0.1701, "step": 31064 }, { "epoch": 0.05508258575735896, "grad_norm": 0.2294921875, "learning_rate": 0.0015907116103900786, "loss": 0.1974, "step": 31066 }, { "epoch": 0.05508613192266877, "grad_norm": 0.46484375, "learning_rate": 0.0015906617158363162, "loss": 0.159, "step": 31068 }, { "epoch": 0.055089678087978586, "grad_norm": 3.109375, "learning_rate": 0.0015906118191366843, "loss": 0.3078, "step": 31070 }, { "epoch": 0.0550932242532884, "grad_norm": 0.2080078125, "learning_rate": 0.0015905619202914011, "loss": 0.1915, "step": 31072 }, { "epoch": 0.05509677041859822, "grad_norm": 0.66796875, "learning_rate": 0.0015905120193006855, "loss": 0.2231, "step": 31074 }, { "epoch": 0.055100316583908036, "grad_norm": 2.265625, "learning_rate": 0.001590462116164755, "loss": 0.1687, "step": 31076 }, { "epoch": 0.05510386274921785, "grad_norm": 1.8515625, "learning_rate": 0.001590412210883828, "loss": 0.217, "step": 31078 }, { "epoch": 0.055107408914527665, "grad_norm": 0.455078125, "learning_rate": 0.001590362303458123, "loss": 0.3625, "step": 31080 }, { "epoch": 0.05511095507983748, "grad_norm": 0.2216796875, "learning_rate": 0.0015903123938878579, "loss": 0.3165, "step": 31082 }, { "epoch": 0.055114501245147295, "grad_norm": 1.109375, "learning_rate": 0.0015902624821732515, "loss": 0.2021, "step": 31084 }, { "epoch": 0.05511804741045711, "grad_norm": 0.373046875, "learning_rate": 0.0015902125683145214, "loss": 0.1967, "step": 31086 }, { "epoch": 0.055121593575766924, "grad_norm": 0.1796875, "learning_rate": 0.0015901626523118866, "loss": 0.2346, "step": 31088 }, { "epoch": 0.05512513974107674, "grad_norm": 0.318359375, "learning_rate": 0.001590112734165565, "loss": 0.2031, "step": 31090 }, { "epoch": 0.05512868590638655, "grad_norm": 0.2734375, "learning_rate": 0.0015900628138757747, "loss": 0.1546, "step": 31092 }, { "epoch": 0.05513223207169637, "grad_norm": 0.7265625, "learning_rate": 0.0015900128914427348, "loss": 0.1991, "step": 31094 }, { "epoch": 0.05513577823700619, "grad_norm": 0.4140625, "learning_rate": 0.0015899629668666628, "loss": 0.2277, "step": 31096 }, { "epoch": 0.055139324402316, "grad_norm": 0.4921875, "learning_rate": 0.0015899130401477778, "loss": 0.1446, "step": 31098 }, { "epoch": 0.05514287056762582, "grad_norm": 0.4140625, "learning_rate": 0.0015898631112862972, "loss": 0.1982, "step": 31100 }, { "epoch": 0.05514641673293563, "grad_norm": 0.19140625, "learning_rate": 0.00158981318028244, "loss": 0.1425, "step": 31102 }, { "epoch": 0.05514996289824545, "grad_norm": 0.283203125, "learning_rate": 0.001589763247136425, "loss": 0.279, "step": 31104 }, { "epoch": 0.05515350906355526, "grad_norm": 0.5, "learning_rate": 0.0015897133118484696, "loss": 0.2613, "step": 31106 }, { "epoch": 0.055157055228865076, "grad_norm": 0.53125, "learning_rate": 0.0015896633744187925, "loss": 0.2096, "step": 31108 }, { "epoch": 0.05516060139417489, "grad_norm": 0.171875, "learning_rate": 0.0015896134348476125, "loss": 0.2069, "step": 31110 }, { "epoch": 0.055164147559484705, "grad_norm": 0.455078125, "learning_rate": 0.0015895634931351475, "loss": 0.1668, "step": 31112 }, { "epoch": 0.05516769372479452, "grad_norm": 0.33984375, "learning_rate": 0.0015895135492816164, "loss": 0.2508, "step": 31114 }, { "epoch": 0.055171239890104334, "grad_norm": 0.91015625, "learning_rate": 0.0015894636032872374, "loss": 0.1813, "step": 31116 }, { "epoch": 0.05517478605541415, "grad_norm": 0.3671875, "learning_rate": 0.0015894136551522287, "loss": 0.2015, "step": 31118 }, { "epoch": 0.05517833222072397, "grad_norm": 0.5859375, "learning_rate": 0.001589363704876809, "loss": 0.1669, "step": 31120 }, { "epoch": 0.055181878386033785, "grad_norm": 0.177734375, "learning_rate": 0.0015893137524611966, "loss": 0.1776, "step": 31122 }, { "epoch": 0.0551854245513436, "grad_norm": 0.2294921875, "learning_rate": 0.0015892637979056103, "loss": 0.1621, "step": 31124 }, { "epoch": 0.055188970716653414, "grad_norm": 0.44140625, "learning_rate": 0.0015892138412102687, "loss": 0.1849, "step": 31126 }, { "epoch": 0.05519251688196323, "grad_norm": 0.37890625, "learning_rate": 0.0015891638823753891, "loss": 0.3485, "step": 31128 }, { "epoch": 0.05519606304727304, "grad_norm": 0.330078125, "learning_rate": 0.0015891139214011913, "loss": 0.2004, "step": 31130 }, { "epoch": 0.05519960921258286, "grad_norm": 1.28125, "learning_rate": 0.0015890639582878932, "loss": 0.2628, "step": 31132 }, { "epoch": 0.05520315537789267, "grad_norm": 0.36328125, "learning_rate": 0.0015890139930357136, "loss": 0.2067, "step": 31134 }, { "epoch": 0.05520670154320249, "grad_norm": 0.34765625, "learning_rate": 0.0015889640256448708, "loss": 0.1887, "step": 31136 }, { "epoch": 0.0552102477085123, "grad_norm": 0.859375, "learning_rate": 0.0015889140561155832, "loss": 0.1316, "step": 31138 }, { "epoch": 0.055213793873822116, "grad_norm": 0.41015625, "learning_rate": 0.0015888640844480696, "loss": 0.1865, "step": 31140 }, { "epoch": 0.05521734003913194, "grad_norm": 0.322265625, "learning_rate": 0.0015888141106425485, "loss": 0.1891, "step": 31142 }, { "epoch": 0.05522088620444175, "grad_norm": 1.125, "learning_rate": 0.0015887641346992384, "loss": 0.2402, "step": 31144 }, { "epoch": 0.05522443236975157, "grad_norm": 0.6640625, "learning_rate": 0.0015887141566183578, "loss": 0.2016, "step": 31146 }, { "epoch": 0.05522797853506138, "grad_norm": 0.2333984375, "learning_rate": 0.0015886641764001257, "loss": 0.1633, "step": 31148 }, { "epoch": 0.055231524700371196, "grad_norm": 0.26953125, "learning_rate": 0.0015886141940447598, "loss": 0.1726, "step": 31150 }, { "epoch": 0.05523507086568101, "grad_norm": 0.546875, "learning_rate": 0.0015885642095524797, "loss": 0.2194, "step": 31152 }, { "epoch": 0.055238617030990825, "grad_norm": 0.23828125, "learning_rate": 0.0015885142229235033, "loss": 0.2042, "step": 31154 }, { "epoch": 0.05524216319630064, "grad_norm": 0.423828125, "learning_rate": 0.0015884642341580494, "loss": 0.1592, "step": 31156 }, { "epoch": 0.055245709361610454, "grad_norm": 0.56640625, "learning_rate": 0.0015884142432563367, "loss": 0.213, "step": 31158 }, { "epoch": 0.05524925552692027, "grad_norm": 0.68359375, "learning_rate": 0.001588364250218584, "loss": 0.1687, "step": 31160 }, { "epoch": 0.05525280169223008, "grad_norm": 0.8046875, "learning_rate": 0.0015883142550450096, "loss": 0.1816, "step": 31162 }, { "epoch": 0.055256347857539904, "grad_norm": 0.404296875, "learning_rate": 0.0015882642577358323, "loss": 0.2615, "step": 31164 }, { "epoch": 0.05525989402284972, "grad_norm": 0.57421875, "learning_rate": 0.0015882142582912709, "loss": 0.2276, "step": 31166 }, { "epoch": 0.055263440188159534, "grad_norm": 0.60546875, "learning_rate": 0.001588164256711544, "loss": 0.1463, "step": 31168 }, { "epoch": 0.05526698635346935, "grad_norm": 1.484375, "learning_rate": 0.0015881142529968698, "loss": 0.3188, "step": 31170 }, { "epoch": 0.05527053251877916, "grad_norm": 0.28125, "learning_rate": 0.0015880642471474678, "loss": 0.1544, "step": 31172 }, { "epoch": 0.05527407868408898, "grad_norm": 0.84765625, "learning_rate": 0.001588014239163556, "loss": 0.1668, "step": 31174 }, { "epoch": 0.05527762484939879, "grad_norm": 0.4140625, "learning_rate": 0.0015879642290453533, "loss": 0.1791, "step": 31176 }, { "epoch": 0.055281171014708606, "grad_norm": 0.47265625, "learning_rate": 0.0015879142167930787, "loss": 0.1708, "step": 31178 }, { "epoch": 0.05528471718001842, "grad_norm": 1.4765625, "learning_rate": 0.0015878642024069508, "loss": 0.216, "step": 31180 }, { "epoch": 0.055288263345328235, "grad_norm": 0.2353515625, "learning_rate": 0.001587814185887188, "loss": 0.2039, "step": 31182 }, { "epoch": 0.05529180951063805, "grad_norm": 0.5625, "learning_rate": 0.0015877641672340096, "loss": 0.2204, "step": 31184 }, { "epoch": 0.055295355675947865, "grad_norm": 0.1748046875, "learning_rate": 0.0015877141464476336, "loss": 0.271, "step": 31186 }, { "epoch": 0.055298901841257686, "grad_norm": 0.7265625, "learning_rate": 0.0015876641235282798, "loss": 0.246, "step": 31188 }, { "epoch": 0.0553024480065675, "grad_norm": 0.251953125, "learning_rate": 0.0015876140984761662, "loss": 0.1494, "step": 31190 }, { "epoch": 0.055305994171877315, "grad_norm": 0.283203125, "learning_rate": 0.0015875640712915118, "loss": 0.202, "step": 31192 }, { "epoch": 0.05530954033718713, "grad_norm": 0.314453125, "learning_rate": 0.0015875140419745351, "loss": 0.1742, "step": 31194 }, { "epoch": 0.055313086502496944, "grad_norm": 0.703125, "learning_rate": 0.0015874640105254553, "loss": 0.3514, "step": 31196 }, { "epoch": 0.05531663266780676, "grad_norm": 1.7265625, "learning_rate": 0.0015874139769444912, "loss": 0.2531, "step": 31198 }, { "epoch": 0.05532017883311657, "grad_norm": 0.6171875, "learning_rate": 0.0015873639412318614, "loss": 0.3206, "step": 31200 }, { "epoch": 0.05532372499842639, "grad_norm": 0.31640625, "learning_rate": 0.0015873139033877849, "loss": 0.2695, "step": 31202 }, { "epoch": 0.0553272711637362, "grad_norm": 0.412109375, "learning_rate": 0.0015872638634124804, "loss": 0.1735, "step": 31204 }, { "epoch": 0.05533081732904602, "grad_norm": 0.494140625, "learning_rate": 0.0015872138213061666, "loss": 0.2341, "step": 31206 }, { "epoch": 0.05533436349435583, "grad_norm": 0.248046875, "learning_rate": 0.0015871637770690627, "loss": 0.1346, "step": 31208 }, { "epoch": 0.05533790965966565, "grad_norm": 0.55859375, "learning_rate": 0.0015871137307013877, "loss": 0.2033, "step": 31210 }, { "epoch": 0.05534145582497547, "grad_norm": 0.6796875, "learning_rate": 0.0015870636822033597, "loss": 0.1796, "step": 31212 }, { "epoch": 0.05534500199028528, "grad_norm": 0.439453125, "learning_rate": 0.0015870136315751984, "loss": 0.2062, "step": 31214 }, { "epoch": 0.0553485481555951, "grad_norm": 0.35546875, "learning_rate": 0.001586963578817122, "loss": 0.1966, "step": 31216 }, { "epoch": 0.05535209432090491, "grad_norm": 1.0546875, "learning_rate": 0.0015869135239293503, "loss": 0.4616, "step": 31218 }, { "epoch": 0.055355640486214726, "grad_norm": 0.70703125, "learning_rate": 0.0015868634669121016, "loss": 0.2027, "step": 31220 }, { "epoch": 0.05535918665152454, "grad_norm": 0.287109375, "learning_rate": 0.0015868134077655946, "loss": 0.1818, "step": 31222 }, { "epoch": 0.055362732816834355, "grad_norm": 0.2373046875, "learning_rate": 0.0015867633464900486, "loss": 0.2626, "step": 31224 }, { "epoch": 0.05536627898214417, "grad_norm": 0.28515625, "learning_rate": 0.0015867132830856825, "loss": 0.206, "step": 31226 }, { "epoch": 0.055369825147453984, "grad_norm": 0.66796875, "learning_rate": 0.0015866632175527153, "loss": 0.1819, "step": 31228 }, { "epoch": 0.0553733713127638, "grad_norm": 0.63671875, "learning_rate": 0.0015866131498913658, "loss": 0.2969, "step": 31230 }, { "epoch": 0.05537691747807362, "grad_norm": 0.5390625, "learning_rate": 0.001586563080101853, "loss": 0.2063, "step": 31232 }, { "epoch": 0.055380463643383435, "grad_norm": 0.341796875, "learning_rate": 0.001586513008184396, "loss": 0.2209, "step": 31234 }, { "epoch": 0.05538400980869325, "grad_norm": 0.51953125, "learning_rate": 0.0015864629341392135, "loss": 0.2592, "step": 31236 }, { "epoch": 0.055387555974003064, "grad_norm": 0.265625, "learning_rate": 0.0015864128579665248, "loss": 0.1471, "step": 31238 }, { "epoch": 0.05539110213931288, "grad_norm": 0.76171875, "learning_rate": 0.001586362779666549, "loss": 0.2042, "step": 31240 }, { "epoch": 0.05539464830462269, "grad_norm": 0.4375, "learning_rate": 0.0015863126992395047, "loss": 0.2702, "step": 31242 }, { "epoch": 0.05539819446993251, "grad_norm": 4.0, "learning_rate": 0.0015862626166856112, "loss": 0.2453, "step": 31244 }, { "epoch": 0.05540174063524232, "grad_norm": 0.408203125, "learning_rate": 0.0015862125320050876, "loss": 0.288, "step": 31246 }, { "epoch": 0.055405286800552137, "grad_norm": 0.435546875, "learning_rate": 0.001586162445198153, "loss": 0.2731, "step": 31248 }, { "epoch": 0.05540883296586195, "grad_norm": 1.7265625, "learning_rate": 0.001586112356265026, "loss": 0.3648, "step": 31250 }, { "epoch": 0.055412379131171766, "grad_norm": 0.5625, "learning_rate": 0.0015860622652059259, "loss": 0.2104, "step": 31252 }, { "epoch": 0.05541592529648158, "grad_norm": 1.6953125, "learning_rate": 0.0015860121720210716, "loss": 0.204, "step": 31254 }, { "epoch": 0.0554194714617914, "grad_norm": 0.67578125, "learning_rate": 0.0015859620767106823, "loss": 0.1823, "step": 31256 }, { "epoch": 0.055423017627101216, "grad_norm": 0.42578125, "learning_rate": 0.0015859119792749777, "loss": 0.1714, "step": 31258 }, { "epoch": 0.05542656379241103, "grad_norm": 1.03125, "learning_rate": 0.001585861879714176, "loss": 0.2757, "step": 31260 }, { "epoch": 0.055430109957720845, "grad_norm": 0.60546875, "learning_rate": 0.0015858117780284966, "loss": 0.1968, "step": 31262 }, { "epoch": 0.05543365612303066, "grad_norm": 0.61328125, "learning_rate": 0.0015857616742181587, "loss": 0.192, "step": 31264 }, { "epoch": 0.055437202288340474, "grad_norm": 1.359375, "learning_rate": 0.0015857115682833818, "loss": 0.2854, "step": 31266 }, { "epoch": 0.05544074845365029, "grad_norm": 0.28125, "learning_rate": 0.001585661460224384, "loss": 0.1752, "step": 31268 }, { "epoch": 0.055444294618960104, "grad_norm": 0.345703125, "learning_rate": 0.0015856113500413859, "loss": 0.2035, "step": 31270 }, { "epoch": 0.05544784078426992, "grad_norm": 0.55078125, "learning_rate": 0.0015855612377346049, "loss": 0.2004, "step": 31272 }, { "epoch": 0.05545138694957973, "grad_norm": 1.5546875, "learning_rate": 0.0015855111233042617, "loss": 0.1769, "step": 31274 }, { "epoch": 0.05545493311488955, "grad_norm": 1.03125, "learning_rate": 0.0015854610067505745, "loss": 0.2485, "step": 31276 }, { "epoch": 0.05545847928019937, "grad_norm": 0.283203125, "learning_rate": 0.0015854108880737632, "loss": 0.244, "step": 31278 }, { "epoch": 0.05546202544550918, "grad_norm": 0.17578125, "learning_rate": 0.0015853607672740464, "loss": 0.1734, "step": 31280 }, { "epoch": 0.055465571610819, "grad_norm": 0.75, "learning_rate": 0.0015853106443516436, "loss": 0.2192, "step": 31282 }, { "epoch": 0.05546911777612881, "grad_norm": 0.51171875, "learning_rate": 0.001585260519306774, "loss": 0.2941, "step": 31284 }, { "epoch": 0.05547266394143863, "grad_norm": 0.32421875, "learning_rate": 0.0015852103921396568, "loss": 0.1955, "step": 31286 }, { "epoch": 0.05547621010674844, "grad_norm": 0.40234375, "learning_rate": 0.001585160262850511, "loss": 0.2549, "step": 31288 }, { "epoch": 0.055479756272058256, "grad_norm": 0.7421875, "learning_rate": 0.001585110131439556, "loss": 0.1936, "step": 31290 }, { "epoch": 0.05548330243736807, "grad_norm": 0.6171875, "learning_rate": 0.0015850599979070114, "loss": 0.3102, "step": 31292 }, { "epoch": 0.055486848602677885, "grad_norm": 0.64453125, "learning_rate": 0.0015850098622530963, "loss": 0.1962, "step": 31294 }, { "epoch": 0.0554903947679877, "grad_norm": 0.9609375, "learning_rate": 0.001584959724478029, "loss": 0.1734, "step": 31296 }, { "epoch": 0.055493940933297514, "grad_norm": 0.25390625, "learning_rate": 0.0015849095845820301, "loss": 0.1955, "step": 31298 }, { "epoch": 0.05549748709860733, "grad_norm": 0.2734375, "learning_rate": 0.0015848594425653183, "loss": 0.1338, "step": 31300 }, { "epoch": 0.05550103326391715, "grad_norm": 0.447265625, "learning_rate": 0.0015848092984281127, "loss": 0.2134, "step": 31302 }, { "epoch": 0.055504579429226965, "grad_norm": 0.59765625, "learning_rate": 0.001584759152170633, "loss": 0.1921, "step": 31304 }, { "epoch": 0.05550812559453678, "grad_norm": 0.79296875, "learning_rate": 0.0015847090037930983, "loss": 0.1311, "step": 31306 }, { "epoch": 0.055511671759846594, "grad_norm": 1.8046875, "learning_rate": 0.0015846588532957282, "loss": 0.2029, "step": 31308 }, { "epoch": 0.05551521792515641, "grad_norm": 0.470703125, "learning_rate": 0.0015846087006787415, "loss": 0.1818, "step": 31310 }, { "epoch": 0.05551876409046622, "grad_norm": 0.400390625, "learning_rate": 0.001584558545942358, "loss": 0.1529, "step": 31312 }, { "epoch": 0.05552231025577604, "grad_norm": 0.478515625, "learning_rate": 0.001584508389086797, "loss": 0.1835, "step": 31314 }, { "epoch": 0.05552585642108585, "grad_norm": 0.3046875, "learning_rate": 0.0015844582301122773, "loss": 0.2339, "step": 31316 }, { "epoch": 0.05552940258639567, "grad_norm": 0.49609375, "learning_rate": 0.001584408069019019, "loss": 0.1589, "step": 31318 }, { "epoch": 0.05553294875170548, "grad_norm": 2.265625, "learning_rate": 0.0015843579058072413, "loss": 0.2302, "step": 31320 }, { "epoch": 0.055536494917015296, "grad_norm": 0.455078125, "learning_rate": 0.0015843077404771632, "loss": 0.1914, "step": 31322 }, { "epoch": 0.05554004108232512, "grad_norm": 1.25, "learning_rate": 0.0015842575730290046, "loss": 0.1813, "step": 31324 }, { "epoch": 0.05554358724763493, "grad_norm": 0.80078125, "learning_rate": 0.0015842074034629846, "loss": 0.2245, "step": 31326 }, { "epoch": 0.055547133412944746, "grad_norm": 0.412109375, "learning_rate": 0.0015841572317793226, "loss": 0.1937, "step": 31328 }, { "epoch": 0.05555067957825456, "grad_norm": 0.3359375, "learning_rate": 0.0015841070579782383, "loss": 0.2615, "step": 31330 }, { "epoch": 0.055554225743564375, "grad_norm": 0.45703125, "learning_rate": 0.0015840568820599508, "loss": 0.1818, "step": 31332 }, { "epoch": 0.05555777190887419, "grad_norm": 0.263671875, "learning_rate": 0.0015840067040246795, "loss": 0.2092, "step": 31334 }, { "epoch": 0.055561318074184005, "grad_norm": 0.388671875, "learning_rate": 0.0015839565238726441, "loss": 0.1339, "step": 31336 }, { "epoch": 0.05556486423949382, "grad_norm": 0.74609375, "learning_rate": 0.0015839063416040642, "loss": 0.2457, "step": 31338 }, { "epoch": 0.055568410404803634, "grad_norm": 0.353515625, "learning_rate": 0.001583856157219159, "loss": 0.2162, "step": 31340 }, { "epoch": 0.05557195657011345, "grad_norm": 0.353515625, "learning_rate": 0.001583805970718148, "loss": 0.1731, "step": 31342 }, { "epoch": 0.05557550273542326, "grad_norm": 0.80078125, "learning_rate": 0.0015837557821012506, "loss": 0.3627, "step": 31344 }, { "epoch": 0.055579048900733084, "grad_norm": 0.62890625, "learning_rate": 0.0015837055913686861, "loss": 0.1894, "step": 31346 }, { "epoch": 0.0555825950660429, "grad_norm": 0.5625, "learning_rate": 0.001583655398520675, "loss": 0.1778, "step": 31348 }, { "epoch": 0.05558614123135271, "grad_norm": 0.478515625, "learning_rate": 0.0015836052035574357, "loss": 0.2885, "step": 31350 }, { "epoch": 0.05558968739666253, "grad_norm": 0.259765625, "learning_rate": 0.0015835550064791882, "loss": 0.1742, "step": 31352 }, { "epoch": 0.05559323356197234, "grad_norm": 0.95703125, "learning_rate": 0.0015835048072861519, "loss": 0.2766, "step": 31354 }, { "epoch": 0.05559677972728216, "grad_norm": 0.353515625, "learning_rate": 0.0015834546059785466, "loss": 0.1784, "step": 31356 }, { "epoch": 0.05560032589259197, "grad_norm": 0.330078125, "learning_rate": 0.0015834044025565914, "loss": 0.3342, "step": 31358 }, { "epoch": 0.055603872057901786, "grad_norm": 0.64453125, "learning_rate": 0.0015833541970205066, "loss": 0.1974, "step": 31360 }, { "epoch": 0.0556074182232116, "grad_norm": 0.50390625, "learning_rate": 0.001583303989370511, "loss": 0.1654, "step": 31362 }, { "epoch": 0.055610964388521415, "grad_norm": 1.953125, "learning_rate": 0.0015832537796068248, "loss": 0.2265, "step": 31364 }, { "epoch": 0.05561451055383123, "grad_norm": 0.65625, "learning_rate": 0.001583203567729667, "loss": 0.1779, "step": 31366 }, { "epoch": 0.055618056719141044, "grad_norm": 0.48828125, "learning_rate": 0.0015831533537392574, "loss": 0.2579, "step": 31368 }, { "epoch": 0.055621602884450866, "grad_norm": 2.53125, "learning_rate": 0.001583103137635816, "loss": 0.3657, "step": 31370 }, { "epoch": 0.05562514904976068, "grad_norm": 0.8828125, "learning_rate": 0.0015830529194195615, "loss": 0.2351, "step": 31372 }, { "epoch": 0.055628695215070495, "grad_norm": 1.734375, "learning_rate": 0.0015830026990907144, "loss": 0.2034, "step": 31374 }, { "epoch": 0.05563224138038031, "grad_norm": 0.291015625, "learning_rate": 0.0015829524766494943, "loss": 0.1787, "step": 31376 }, { "epoch": 0.055635787545690124, "grad_norm": 0.921875, "learning_rate": 0.0015829022520961204, "loss": 0.2, "step": 31378 }, { "epoch": 0.05563933371099994, "grad_norm": 0.494140625, "learning_rate": 0.0015828520254308129, "loss": 0.1661, "step": 31380 }, { "epoch": 0.05564287987630975, "grad_norm": 1.546875, "learning_rate": 0.001582801796653791, "loss": 0.2007, "step": 31382 }, { "epoch": 0.05564642604161957, "grad_norm": 0.37109375, "learning_rate": 0.0015827515657652743, "loss": 0.2123, "step": 31384 }, { "epoch": 0.05564997220692938, "grad_norm": 0.439453125, "learning_rate": 0.0015827013327654824, "loss": 0.181, "step": 31386 }, { "epoch": 0.0556535183722392, "grad_norm": 0.62890625, "learning_rate": 0.0015826510976546357, "loss": 0.1792, "step": 31388 }, { "epoch": 0.05565706453754901, "grad_norm": 0.349609375, "learning_rate": 0.0015826008604329532, "loss": 0.1785, "step": 31390 }, { "epoch": 0.05566061070285883, "grad_norm": 0.875, "learning_rate": 0.0015825506211006551, "loss": 0.2025, "step": 31392 }, { "epoch": 0.05566415686816865, "grad_norm": 1.4921875, "learning_rate": 0.0015825003796579606, "loss": 0.1991, "step": 31394 }, { "epoch": 0.05566770303347846, "grad_norm": 1.4140625, "learning_rate": 0.00158245013610509, "loss": 0.2841, "step": 31396 }, { "epoch": 0.05567124919878828, "grad_norm": 0.85546875, "learning_rate": 0.0015823998904422622, "loss": 0.1833, "step": 31398 }, { "epoch": 0.05567479536409809, "grad_norm": 0.384765625, "learning_rate": 0.0015823496426696979, "loss": 0.2043, "step": 31400 }, { "epoch": 0.055678341529407906, "grad_norm": 0.59375, "learning_rate": 0.0015822993927876166, "loss": 0.1873, "step": 31402 }, { "epoch": 0.05568188769471772, "grad_norm": 2.03125, "learning_rate": 0.001582249140796238, "loss": 0.3987, "step": 31404 }, { "epoch": 0.055685433860027535, "grad_norm": 0.490234375, "learning_rate": 0.0015821988866957812, "loss": 0.1563, "step": 31406 }, { "epoch": 0.05568898002533735, "grad_norm": 0.2578125, "learning_rate": 0.0015821486304864669, "loss": 0.1952, "step": 31408 }, { "epoch": 0.055692526190647164, "grad_norm": 1.375, "learning_rate": 0.0015820983721685146, "loss": 0.1765, "step": 31410 }, { "epoch": 0.05569607235595698, "grad_norm": 1.453125, "learning_rate": 0.001582048111742144, "loss": 0.1445, "step": 31412 }, { "epoch": 0.0556996185212668, "grad_norm": 0.27734375, "learning_rate": 0.0015819978492075748, "loss": 0.1763, "step": 31414 }, { "epoch": 0.055703164686576614, "grad_norm": 1.3125, "learning_rate": 0.001581947584565027, "loss": 0.2401, "step": 31416 }, { "epoch": 0.05570671085188643, "grad_norm": 0.3984375, "learning_rate": 0.0015818973178147208, "loss": 0.229, "step": 31418 }, { "epoch": 0.055710257017196244, "grad_norm": 0.5234375, "learning_rate": 0.0015818470489568754, "loss": 0.2063, "step": 31420 }, { "epoch": 0.05571380318250606, "grad_norm": 0.3125, "learning_rate": 0.001581796777991711, "loss": 0.1924, "step": 31422 }, { "epoch": 0.05571734934781587, "grad_norm": 1.3359375, "learning_rate": 0.0015817465049194474, "loss": 0.2552, "step": 31424 }, { "epoch": 0.05572089551312569, "grad_norm": 0.51171875, "learning_rate": 0.001581696229740304, "loss": 0.1498, "step": 31426 }, { "epoch": 0.0557244416784355, "grad_norm": 0.515625, "learning_rate": 0.0015816459524545016, "loss": 0.2681, "step": 31428 }, { "epoch": 0.055727987843745316, "grad_norm": 0.8125, "learning_rate": 0.0015815956730622594, "loss": 0.4454, "step": 31430 }, { "epoch": 0.05573153400905513, "grad_norm": 2.359375, "learning_rate": 0.0015815453915637972, "loss": 0.3548, "step": 31432 }, { "epoch": 0.055735080174364945, "grad_norm": 1.1328125, "learning_rate": 0.0015814951079593355, "loss": 0.2253, "step": 31434 }, { "epoch": 0.05573862633967476, "grad_norm": 1.2890625, "learning_rate": 0.0015814448222490939, "loss": 0.2625, "step": 31436 }, { "epoch": 0.05574217250498458, "grad_norm": 0.30859375, "learning_rate": 0.0015813945344332922, "loss": 0.314, "step": 31438 }, { "epoch": 0.055745718670294396, "grad_norm": 0.62890625, "learning_rate": 0.0015813442445121505, "loss": 0.2143, "step": 31440 }, { "epoch": 0.05574926483560421, "grad_norm": 0.55078125, "learning_rate": 0.0015812939524858884, "loss": 0.1446, "step": 31442 }, { "epoch": 0.055752811000914025, "grad_norm": 0.421875, "learning_rate": 0.0015812436583547264, "loss": 0.2614, "step": 31444 }, { "epoch": 0.05575635716622384, "grad_norm": 0.423828125, "learning_rate": 0.0015811933621188842, "loss": 0.2264, "step": 31446 }, { "epoch": 0.055759903331533654, "grad_norm": 0.380859375, "learning_rate": 0.0015811430637785815, "loss": 0.1803, "step": 31448 }, { "epoch": 0.05576344949684347, "grad_norm": 0.60546875, "learning_rate": 0.0015810927633340386, "loss": 0.2521, "step": 31450 }, { "epoch": 0.05576699566215328, "grad_norm": 0.451171875, "learning_rate": 0.0015810424607854755, "loss": 0.2851, "step": 31452 }, { "epoch": 0.0557705418274631, "grad_norm": 0.81640625, "learning_rate": 0.001580992156133112, "loss": 0.197, "step": 31454 }, { "epoch": 0.05577408799277291, "grad_norm": 0.578125, "learning_rate": 0.0015809418493771684, "loss": 0.2149, "step": 31456 }, { "epoch": 0.05577763415808273, "grad_norm": 2.5, "learning_rate": 0.0015808915405178643, "loss": 0.3473, "step": 31458 }, { "epoch": 0.05578118032339255, "grad_norm": 0.326171875, "learning_rate": 0.00158084122955542, "loss": 0.2581, "step": 31460 }, { "epoch": 0.05578472648870236, "grad_norm": 0.388671875, "learning_rate": 0.0015807909164900556, "loss": 0.342, "step": 31462 }, { "epoch": 0.05578827265401218, "grad_norm": 0.38671875, "learning_rate": 0.0015807406013219904, "loss": 0.1603, "step": 31464 }, { "epoch": 0.05579181881932199, "grad_norm": 0.52734375, "learning_rate": 0.001580690284051446, "loss": 0.2562, "step": 31466 }, { "epoch": 0.05579536498463181, "grad_norm": 1.96875, "learning_rate": 0.001580639964678641, "loss": 0.4193, "step": 31468 }, { "epoch": 0.05579891114994162, "grad_norm": 0.486328125, "learning_rate": 0.001580589643203796, "loss": 0.217, "step": 31470 }, { "epoch": 0.055802457315251436, "grad_norm": 0.416015625, "learning_rate": 0.001580539319627131, "loss": 0.3901, "step": 31472 }, { "epoch": 0.05580600348056125, "grad_norm": 0.625, "learning_rate": 0.0015804889939488663, "loss": 0.2214, "step": 31474 }, { "epoch": 0.055809549645871065, "grad_norm": 0.443359375, "learning_rate": 0.0015804386661692218, "loss": 0.1907, "step": 31476 }, { "epoch": 0.05581309581118088, "grad_norm": 0.341796875, "learning_rate": 0.0015803883362884177, "loss": 0.2111, "step": 31478 }, { "epoch": 0.055816641976490694, "grad_norm": 1.5390625, "learning_rate": 0.0015803380043066739, "loss": 0.2539, "step": 31480 }, { "epoch": 0.055820188141800516, "grad_norm": 0.984375, "learning_rate": 0.0015802876702242109, "loss": 0.1664, "step": 31482 }, { "epoch": 0.05582373430711033, "grad_norm": 0.275390625, "learning_rate": 0.0015802373340412486, "loss": 0.2057, "step": 31484 }, { "epoch": 0.055827280472420145, "grad_norm": 0.35546875, "learning_rate": 0.0015801869957580068, "loss": 0.2017, "step": 31486 }, { "epoch": 0.05583082663772996, "grad_norm": 0.30078125, "learning_rate": 0.0015801366553747063, "loss": 0.1755, "step": 31488 }, { "epoch": 0.055834372803039774, "grad_norm": 0.38671875, "learning_rate": 0.0015800863128915667, "loss": 0.2311, "step": 31490 }, { "epoch": 0.05583791896834959, "grad_norm": 0.62109375, "learning_rate": 0.0015800359683088088, "loss": 0.1608, "step": 31492 }, { "epoch": 0.0558414651336594, "grad_norm": 0.404296875, "learning_rate": 0.0015799856216266524, "loss": 0.1902, "step": 31494 }, { "epoch": 0.05584501129896922, "grad_norm": 0.421875, "learning_rate": 0.0015799352728453175, "loss": 0.181, "step": 31496 }, { "epoch": 0.05584855746427903, "grad_norm": 0.41796875, "learning_rate": 0.0015798849219650247, "loss": 0.1978, "step": 31498 }, { "epoch": 0.055852103629588847, "grad_norm": 0.2412109375, "learning_rate": 0.001579834568985994, "loss": 0.1685, "step": 31500 }, { "epoch": 0.05585564979489866, "grad_norm": 0.298828125, "learning_rate": 0.001579784213908445, "loss": 0.1578, "step": 31502 }, { "epoch": 0.055859195960208476, "grad_norm": 0.34765625, "learning_rate": 0.0015797338567325994, "loss": 0.1546, "step": 31504 }, { "epoch": 0.0558627421255183, "grad_norm": 0.2490234375, "learning_rate": 0.001579683497458676, "loss": 0.1491, "step": 31506 }, { "epoch": 0.05586628829082811, "grad_norm": 0.486328125, "learning_rate": 0.0015796331360868959, "loss": 0.4946, "step": 31508 }, { "epoch": 0.055869834456137926, "grad_norm": 0.2578125, "learning_rate": 0.001579582772617479, "loss": 0.2212, "step": 31510 }, { "epoch": 0.05587338062144774, "grad_norm": 0.546875, "learning_rate": 0.0015795324070506456, "loss": 0.1957, "step": 31512 }, { "epoch": 0.055876926786757555, "grad_norm": 0.5546875, "learning_rate": 0.001579482039386616, "loss": 0.3397, "step": 31514 }, { "epoch": 0.05588047295206737, "grad_norm": 0.6640625, "learning_rate": 0.0015794316696256107, "loss": 0.1913, "step": 31516 }, { "epoch": 0.055884019117377184, "grad_norm": 0.486328125, "learning_rate": 0.0015793812977678494, "loss": 0.2293, "step": 31518 }, { "epoch": 0.055887565282687, "grad_norm": 0.353515625, "learning_rate": 0.0015793309238135527, "loss": 0.1675, "step": 31520 }, { "epoch": 0.055891111447996814, "grad_norm": 0.875, "learning_rate": 0.0015792805477629413, "loss": 0.1526, "step": 31522 }, { "epoch": 0.05589465761330663, "grad_norm": 0.375, "learning_rate": 0.0015792301696162352, "loss": 0.1256, "step": 31524 }, { "epoch": 0.05589820377861644, "grad_norm": 0.98828125, "learning_rate": 0.0015791797893736543, "loss": 0.1994, "step": 31526 }, { "epoch": 0.055901749943926264, "grad_norm": 0.390625, "learning_rate": 0.0015791294070354196, "loss": 0.1543, "step": 31528 }, { "epoch": 0.05590529610923608, "grad_norm": 0.35546875, "learning_rate": 0.0015790790226017514, "loss": 0.1686, "step": 31530 }, { "epoch": 0.05590884227454589, "grad_norm": 0.279296875, "learning_rate": 0.0015790286360728696, "loss": 0.1553, "step": 31532 }, { "epoch": 0.05591238843985571, "grad_norm": 0.22265625, "learning_rate": 0.0015789782474489949, "loss": 0.155, "step": 31534 }, { "epoch": 0.05591593460516552, "grad_norm": 0.78515625, "learning_rate": 0.0015789278567303476, "loss": 0.2033, "step": 31536 }, { "epoch": 0.05591948077047534, "grad_norm": 1.4140625, "learning_rate": 0.0015788774639171478, "loss": 0.1975, "step": 31538 }, { "epoch": 0.05592302693578515, "grad_norm": 2.453125, "learning_rate": 0.0015788270690096163, "loss": 0.3192, "step": 31540 }, { "epoch": 0.055926573101094966, "grad_norm": 0.3359375, "learning_rate": 0.0015787766720079732, "loss": 0.2119, "step": 31542 }, { "epoch": 0.05593011926640478, "grad_norm": 0.283203125, "learning_rate": 0.0015787262729124393, "loss": 0.1727, "step": 31544 }, { "epoch": 0.055933665431714595, "grad_norm": 0.6328125, "learning_rate": 0.0015786758717232343, "loss": 0.2437, "step": 31546 }, { "epoch": 0.05593721159702441, "grad_norm": 0.546875, "learning_rate": 0.0015786254684405797, "loss": 0.3225, "step": 31548 }, { "epoch": 0.05594075776233423, "grad_norm": 0.279296875, "learning_rate": 0.0015785750630646952, "loss": 0.3449, "step": 31550 }, { "epoch": 0.055944303927644046, "grad_norm": 0.296875, "learning_rate": 0.0015785246555958012, "loss": 0.1501, "step": 31552 }, { "epoch": 0.05594785009295386, "grad_norm": 0.37890625, "learning_rate": 0.001578474246034118, "loss": 0.2831, "step": 31554 }, { "epoch": 0.055951396258263675, "grad_norm": 1.3515625, "learning_rate": 0.0015784238343798667, "loss": 0.2639, "step": 31556 }, { "epoch": 0.05595494242357349, "grad_norm": 0.36328125, "learning_rate": 0.0015783734206332675, "loss": 0.1718, "step": 31558 }, { "epoch": 0.055958488588883304, "grad_norm": 0.353515625, "learning_rate": 0.001578323004794541, "loss": 0.2069, "step": 31560 }, { "epoch": 0.05596203475419312, "grad_norm": 0.58203125, "learning_rate": 0.001578272586863907, "loss": 0.2093, "step": 31562 }, { "epoch": 0.05596558091950293, "grad_norm": 0.48046875, "learning_rate": 0.0015782221668415868, "loss": 0.1728, "step": 31564 }, { "epoch": 0.05596912708481275, "grad_norm": 0.26953125, "learning_rate": 0.0015781717447278007, "loss": 0.1473, "step": 31566 }, { "epoch": 0.05597267325012256, "grad_norm": 0.466796875, "learning_rate": 0.0015781213205227692, "loss": 0.2306, "step": 31568 }, { "epoch": 0.05597621941543238, "grad_norm": 0.392578125, "learning_rate": 0.0015780708942267126, "loss": 0.1683, "step": 31570 }, { "epoch": 0.05597976558074219, "grad_norm": 0.23046875, "learning_rate": 0.0015780204658398516, "loss": 0.1924, "step": 31572 }, { "epoch": 0.05598331174605201, "grad_norm": 0.15625, "learning_rate": 0.001577970035362407, "loss": 0.1573, "step": 31574 }, { "epoch": 0.05598685791136183, "grad_norm": 0.259765625, "learning_rate": 0.0015779196027945986, "loss": 0.206, "step": 31576 }, { "epoch": 0.05599040407667164, "grad_norm": 0.345703125, "learning_rate": 0.0015778691681366478, "loss": 0.2891, "step": 31578 }, { "epoch": 0.055993950241981456, "grad_norm": 0.294921875, "learning_rate": 0.0015778187313887748, "loss": 0.128, "step": 31580 }, { "epoch": 0.05599749640729127, "grad_norm": 0.349609375, "learning_rate": 0.0015777682925512003, "loss": 0.1987, "step": 31582 }, { "epoch": 0.056001042572601085, "grad_norm": 0.89453125, "learning_rate": 0.0015777178516241447, "loss": 0.2538, "step": 31584 }, { "epoch": 0.0560045887379109, "grad_norm": 0.89453125, "learning_rate": 0.0015776674086078286, "loss": 0.2965, "step": 31586 }, { "epoch": 0.056008134903220715, "grad_norm": 0.259765625, "learning_rate": 0.001577616963502473, "loss": 0.2687, "step": 31588 }, { "epoch": 0.05601168106853053, "grad_norm": 0.376953125, "learning_rate": 0.0015775665163082983, "loss": 0.2303, "step": 31590 }, { "epoch": 0.056015227233840344, "grad_norm": 0.90234375, "learning_rate": 0.0015775160670255248, "loss": 0.3005, "step": 31592 }, { "epoch": 0.05601877339915016, "grad_norm": 0.4921875, "learning_rate": 0.0015774656156543733, "loss": 0.2338, "step": 31594 }, { "epoch": 0.05602231956445998, "grad_norm": 0.361328125, "learning_rate": 0.0015774151621950647, "loss": 0.2035, "step": 31596 }, { "epoch": 0.056025865729769794, "grad_norm": 0.65625, "learning_rate": 0.0015773647066478196, "loss": 0.2276, "step": 31598 }, { "epoch": 0.05602941189507961, "grad_norm": 0.27734375, "learning_rate": 0.0015773142490128583, "loss": 0.1807, "step": 31600 }, { "epoch": 0.05603295806038942, "grad_norm": 1.2109375, "learning_rate": 0.001577263789290402, "loss": 0.1833, "step": 31602 }, { "epoch": 0.05603650422569924, "grad_norm": 7.46875, "learning_rate": 0.001577213327480671, "loss": 0.1888, "step": 31604 }, { "epoch": 0.05604005039100905, "grad_norm": 0.703125, "learning_rate": 0.0015771628635838863, "loss": 0.3437, "step": 31606 }, { "epoch": 0.05604359655631887, "grad_norm": 0.6015625, "learning_rate": 0.0015771123976002681, "loss": 0.1537, "step": 31608 }, { "epoch": 0.05604714272162868, "grad_norm": 0.8203125, "learning_rate": 0.0015770619295300376, "loss": 0.2145, "step": 31610 }, { "epoch": 0.056050688886938496, "grad_norm": 2.59375, "learning_rate": 0.001577011459373415, "loss": 0.2339, "step": 31612 }, { "epoch": 0.05605423505224831, "grad_norm": 0.5546875, "learning_rate": 0.0015769609871306217, "loss": 0.1846, "step": 31614 }, { "epoch": 0.056057781217558125, "grad_norm": 0.318359375, "learning_rate": 0.001576910512801878, "loss": 0.1614, "step": 31616 }, { "epoch": 0.05606132738286795, "grad_norm": 0.2578125, "learning_rate": 0.0015768600363874048, "loss": 0.1297, "step": 31618 }, { "epoch": 0.05606487354817776, "grad_norm": 1.2890625, "learning_rate": 0.0015768095578874224, "loss": 0.2745, "step": 31620 }, { "epoch": 0.056068419713487576, "grad_norm": 0.375, "learning_rate": 0.0015767590773021526, "loss": 0.2105, "step": 31622 }, { "epoch": 0.05607196587879739, "grad_norm": 0.3046875, "learning_rate": 0.0015767085946318154, "loss": 0.1507, "step": 31624 }, { "epoch": 0.056075512044107205, "grad_norm": 1.6328125, "learning_rate": 0.0015766581098766315, "loss": 0.1731, "step": 31626 }, { "epoch": 0.05607905820941702, "grad_norm": 0.255859375, "learning_rate": 0.0015766076230368218, "loss": 0.1255, "step": 31628 }, { "epoch": 0.056082604374726834, "grad_norm": 0.2314453125, "learning_rate": 0.0015765571341126072, "loss": 0.1965, "step": 31630 }, { "epoch": 0.05608615054003665, "grad_norm": 0.443359375, "learning_rate": 0.001576506643104209, "loss": 0.1852, "step": 31632 }, { "epoch": 0.05608969670534646, "grad_norm": 0.345703125, "learning_rate": 0.001576456150011847, "loss": 0.2156, "step": 31634 }, { "epoch": 0.05609324287065628, "grad_norm": 0.421875, "learning_rate": 0.0015764056548357428, "loss": 0.1837, "step": 31636 }, { "epoch": 0.05609678903596609, "grad_norm": 0.23828125, "learning_rate": 0.001576355157576117, "loss": 0.1918, "step": 31638 }, { "epoch": 0.05610033520127591, "grad_norm": 3.15625, "learning_rate": 0.0015763046582331901, "loss": 0.3827, "step": 31640 }, { "epoch": 0.05610388136658573, "grad_norm": 0.310546875, "learning_rate": 0.0015762541568071838, "loss": 0.2107, "step": 31642 }, { "epoch": 0.05610742753189554, "grad_norm": 0.359375, "learning_rate": 0.0015762036532983182, "loss": 0.183, "step": 31644 }, { "epoch": 0.05611097369720536, "grad_norm": 0.9296875, "learning_rate": 0.0015761531477068147, "loss": 0.2281, "step": 31646 }, { "epoch": 0.05611451986251517, "grad_norm": 0.640625, "learning_rate": 0.0015761026400328935, "loss": 0.261, "step": 31648 }, { "epoch": 0.05611806602782499, "grad_norm": 0.34765625, "learning_rate": 0.0015760521302767762, "loss": 0.182, "step": 31650 }, { "epoch": 0.0561216121931348, "grad_norm": 0.423828125, "learning_rate": 0.0015760016184386831, "loss": 0.2255, "step": 31652 }, { "epoch": 0.056125158358444616, "grad_norm": 0.50390625, "learning_rate": 0.0015759511045188353, "loss": 0.1869, "step": 31654 }, { "epoch": 0.05612870452375443, "grad_norm": 0.328125, "learning_rate": 0.0015759005885174545, "loss": 0.2303, "step": 31656 }, { "epoch": 0.056132250689064245, "grad_norm": 1.359375, "learning_rate": 0.0015758500704347602, "loss": 0.2089, "step": 31658 }, { "epoch": 0.05613579685437406, "grad_norm": 0.8984375, "learning_rate": 0.0015757995502709747, "loss": 0.1791, "step": 31660 }, { "epoch": 0.056139343019683874, "grad_norm": 0.2080078125, "learning_rate": 0.0015757490280263182, "loss": 0.1313, "step": 31662 }, { "epoch": 0.056142889184993695, "grad_norm": 0.423828125, "learning_rate": 0.0015756985037010118, "loss": 0.1574, "step": 31664 }, { "epoch": 0.05614643535030351, "grad_norm": 0.462890625, "learning_rate": 0.0015756479772952765, "loss": 0.2021, "step": 31666 }, { "epoch": 0.056149981515613324, "grad_norm": 0.78125, "learning_rate": 0.0015755974488093332, "loss": 0.1713, "step": 31668 }, { "epoch": 0.05615352768092314, "grad_norm": 0.16015625, "learning_rate": 0.0015755469182434027, "loss": 0.2109, "step": 31670 }, { "epoch": 0.056157073846232954, "grad_norm": 1.421875, "learning_rate": 0.0015754963855977066, "loss": 0.2644, "step": 31672 }, { "epoch": 0.05616062001154277, "grad_norm": 0.93359375, "learning_rate": 0.001575445850872465, "loss": 0.172, "step": 31674 }, { "epoch": 0.05616416617685258, "grad_norm": 0.57421875, "learning_rate": 0.0015753953140678998, "loss": 0.1923, "step": 31676 }, { "epoch": 0.0561677123421624, "grad_norm": 0.251953125, "learning_rate": 0.0015753447751842313, "loss": 0.2004, "step": 31678 }, { "epoch": 0.05617125850747221, "grad_norm": 0.5234375, "learning_rate": 0.0015752942342216816, "loss": 0.166, "step": 31680 }, { "epoch": 0.056174804672782026, "grad_norm": 0.380859375, "learning_rate": 0.0015752436911804703, "loss": 0.1743, "step": 31682 }, { "epoch": 0.05617835083809184, "grad_norm": 0.296875, "learning_rate": 0.0015751931460608196, "loss": 0.2723, "step": 31684 }, { "epoch": 0.05618189700340166, "grad_norm": 0.37890625, "learning_rate": 0.0015751425988629498, "loss": 0.2252, "step": 31686 }, { "epoch": 0.05618544316871148, "grad_norm": 0.2109375, "learning_rate": 0.0015750920495870822, "loss": 0.2169, "step": 31688 }, { "epoch": 0.05618898933402129, "grad_norm": 0.416015625, "learning_rate": 0.001575041498233438, "loss": 0.1383, "step": 31690 }, { "epoch": 0.056192535499331106, "grad_norm": 0.5390625, "learning_rate": 0.0015749909448022385, "loss": 0.1735, "step": 31692 }, { "epoch": 0.05619608166464092, "grad_norm": 4.15625, "learning_rate": 0.0015749403892937042, "loss": 0.3588, "step": 31694 }, { "epoch": 0.056199627829950735, "grad_norm": 0.25390625, "learning_rate": 0.0015748898317080567, "loss": 0.2356, "step": 31696 }, { "epoch": 0.05620317399526055, "grad_norm": 0.2890625, "learning_rate": 0.001574839272045517, "loss": 0.1871, "step": 31698 }, { "epoch": 0.056206720160570364, "grad_norm": 0.76171875, "learning_rate": 0.0015747887103063059, "loss": 0.144, "step": 31700 }, { "epoch": 0.05621026632588018, "grad_norm": 0.296875, "learning_rate": 0.0015747381464906447, "loss": 0.2034, "step": 31702 }, { "epoch": 0.05621381249118999, "grad_norm": 0.55078125, "learning_rate": 0.001574687580598755, "loss": 0.259, "step": 31704 }, { "epoch": 0.05621735865649981, "grad_norm": 0.39453125, "learning_rate": 0.0015746370126308571, "loss": 0.196, "step": 31706 }, { "epoch": 0.05622090482180962, "grad_norm": 0.6875, "learning_rate": 0.001574586442587173, "loss": 0.1685, "step": 31708 }, { "epoch": 0.056224450987119444, "grad_norm": 0.322265625, "learning_rate": 0.0015745358704679232, "loss": 0.181, "step": 31710 }, { "epoch": 0.05622799715242926, "grad_norm": 0.365234375, "learning_rate": 0.0015744852962733295, "loss": 0.206, "step": 31712 }, { "epoch": 0.05623154331773907, "grad_norm": 0.89453125, "learning_rate": 0.0015744347200036123, "loss": 0.1821, "step": 31714 }, { "epoch": 0.05623508948304889, "grad_norm": 0.423828125, "learning_rate": 0.0015743841416589936, "loss": 0.2014, "step": 31716 }, { "epoch": 0.0562386356483587, "grad_norm": 0.322265625, "learning_rate": 0.0015743335612396939, "loss": 0.1839, "step": 31718 }, { "epoch": 0.05624218181366852, "grad_norm": 0.29296875, "learning_rate": 0.0015742829787459352, "loss": 0.258, "step": 31720 }, { "epoch": 0.05624572797897833, "grad_norm": 1.015625, "learning_rate": 0.0015742323941779375, "loss": 0.2354, "step": 31722 }, { "epoch": 0.056249274144288146, "grad_norm": 1.3828125, "learning_rate": 0.0015741818075359236, "loss": 0.2884, "step": 31724 }, { "epoch": 0.05625282030959796, "grad_norm": 1.125, "learning_rate": 0.0015741312188201132, "loss": 0.2646, "step": 31726 }, { "epoch": 0.056256366474907775, "grad_norm": 0.7734375, "learning_rate": 0.0015740806280307284, "loss": 0.2466, "step": 31728 }, { "epoch": 0.05625991264021759, "grad_norm": 0.49609375, "learning_rate": 0.0015740300351679906, "loss": 0.1922, "step": 31730 }, { "epoch": 0.05626345880552741, "grad_norm": 0.380859375, "learning_rate": 0.0015739794402321208, "loss": 0.2046, "step": 31732 }, { "epoch": 0.056267004970837226, "grad_norm": 3.671875, "learning_rate": 0.00157392884322334, "loss": 0.3482, "step": 31734 }, { "epoch": 0.05627055113614704, "grad_norm": 0.251953125, "learning_rate": 0.0015738782441418698, "loss": 0.2074, "step": 31736 }, { "epoch": 0.056274097301456855, "grad_norm": 0.2294921875, "learning_rate": 0.0015738276429879314, "loss": 0.1833, "step": 31738 }, { "epoch": 0.05627764346676667, "grad_norm": 0.5625, "learning_rate": 0.0015737770397617461, "loss": 0.193, "step": 31740 }, { "epoch": 0.056281189632076484, "grad_norm": 0.625, "learning_rate": 0.0015737264344635354, "loss": 0.1712, "step": 31742 }, { "epoch": 0.0562847357973863, "grad_norm": 0.375, "learning_rate": 0.00157367582709352, "loss": 0.1809, "step": 31744 }, { "epoch": 0.05628828196269611, "grad_norm": 0.369140625, "learning_rate": 0.001573625217651922, "loss": 0.1505, "step": 31746 }, { "epoch": 0.05629182812800593, "grad_norm": 0.6875, "learning_rate": 0.0015735746061389627, "loss": 0.2225, "step": 31748 }, { "epoch": 0.05629537429331574, "grad_norm": 0.2109375, "learning_rate": 0.001573523992554863, "loss": 0.2404, "step": 31750 }, { "epoch": 0.056298920458625556, "grad_norm": 0.37890625, "learning_rate": 0.0015734733768998442, "loss": 0.1931, "step": 31752 }, { "epoch": 0.05630246662393538, "grad_norm": 0.2255859375, "learning_rate": 0.001573422759174128, "loss": 0.1842, "step": 31754 }, { "epoch": 0.05630601278924519, "grad_norm": 0.263671875, "learning_rate": 0.0015733721393779356, "loss": 0.1661, "step": 31756 }, { "epoch": 0.05630955895455501, "grad_norm": 0.443359375, "learning_rate": 0.0015733215175114887, "loss": 0.164, "step": 31758 }, { "epoch": 0.05631310511986482, "grad_norm": 0.9453125, "learning_rate": 0.0015732708935750077, "loss": 0.2351, "step": 31760 }, { "epoch": 0.056316651285174636, "grad_norm": 0.39453125, "learning_rate": 0.0015732202675687154, "loss": 0.1819, "step": 31762 }, { "epoch": 0.05632019745048445, "grad_norm": 0.423828125, "learning_rate": 0.0015731696394928323, "loss": 0.1952, "step": 31764 }, { "epoch": 0.056323743615794265, "grad_norm": 1.171875, "learning_rate": 0.00157311900934758, "loss": 0.3291, "step": 31766 }, { "epoch": 0.05632728978110408, "grad_norm": 4.125, "learning_rate": 0.0015730683771331799, "loss": 0.2503, "step": 31768 }, { "epoch": 0.056330835946413894, "grad_norm": 0.68359375, "learning_rate": 0.0015730177428498538, "loss": 0.1594, "step": 31770 }, { "epoch": 0.05633438211172371, "grad_norm": 1.34375, "learning_rate": 0.001572967106497823, "loss": 0.2025, "step": 31772 }, { "epoch": 0.056337928277033524, "grad_norm": 0.515625, "learning_rate": 0.0015729164680773082, "loss": 0.1814, "step": 31774 }, { "epoch": 0.05634147444234334, "grad_norm": 0.68359375, "learning_rate": 0.0015728658275885319, "loss": 0.2987, "step": 31776 }, { "epoch": 0.05634502060765316, "grad_norm": 0.5546875, "learning_rate": 0.0015728151850317152, "loss": 0.1991, "step": 31778 }, { "epoch": 0.056348566772962974, "grad_norm": 0.5390625, "learning_rate": 0.0015727645404070793, "loss": 0.1644, "step": 31780 }, { "epoch": 0.05635211293827279, "grad_norm": 2.359375, "learning_rate": 0.001572713893714846, "loss": 0.2354, "step": 31782 }, { "epoch": 0.0563556591035826, "grad_norm": 0.318359375, "learning_rate": 0.0015726632449552362, "loss": 0.2181, "step": 31784 }, { "epoch": 0.05635920526889242, "grad_norm": 0.5234375, "learning_rate": 0.0015726125941284727, "loss": 0.2244, "step": 31786 }, { "epoch": 0.05636275143420223, "grad_norm": 0.56640625, "learning_rate": 0.0015725619412347755, "loss": 0.1776, "step": 31788 }, { "epoch": 0.05636629759951205, "grad_norm": 0.52734375, "learning_rate": 0.0015725112862743679, "loss": 0.2227, "step": 31790 }, { "epoch": 0.05636984376482186, "grad_norm": 1.5859375, "learning_rate": 0.0015724606292474696, "loss": 0.2322, "step": 31792 }, { "epoch": 0.056373389930131676, "grad_norm": 0.8125, "learning_rate": 0.0015724099701543033, "loss": 0.2024, "step": 31794 }, { "epoch": 0.05637693609544149, "grad_norm": 0.349609375, "learning_rate": 0.0015723593089950899, "loss": 0.1994, "step": 31796 }, { "epoch": 0.056380482260751305, "grad_norm": 2.21875, "learning_rate": 0.0015723086457700515, "loss": 0.2888, "step": 31798 }, { "epoch": 0.05638402842606113, "grad_norm": 0.96484375, "learning_rate": 0.0015722579804794093, "loss": 0.2439, "step": 31800 }, { "epoch": 0.05638757459137094, "grad_norm": 0.6015625, "learning_rate": 0.001572207313123385, "loss": 0.2162, "step": 31802 }, { "epoch": 0.056391120756680756, "grad_norm": 1.6484375, "learning_rate": 0.0015721566437022002, "loss": 0.1872, "step": 31804 }, { "epoch": 0.05639466692199057, "grad_norm": 0.4453125, "learning_rate": 0.0015721059722160766, "loss": 0.2099, "step": 31806 }, { "epoch": 0.056398213087300385, "grad_norm": 0.34375, "learning_rate": 0.0015720552986652357, "loss": 0.1453, "step": 31808 }, { "epoch": 0.0564017592526102, "grad_norm": 0.439453125, "learning_rate": 0.0015720046230498993, "loss": 0.3344, "step": 31810 }, { "epoch": 0.056405305417920014, "grad_norm": 0.8828125, "learning_rate": 0.0015719539453702884, "loss": 0.2228, "step": 31812 }, { "epoch": 0.05640885158322983, "grad_norm": 2.734375, "learning_rate": 0.0015719032656266254, "loss": 0.3393, "step": 31814 }, { "epoch": 0.05641239774853964, "grad_norm": 0.40625, "learning_rate": 0.0015718525838191316, "loss": 0.1845, "step": 31816 }, { "epoch": 0.05641594391384946, "grad_norm": 0.33203125, "learning_rate": 0.0015718018999480285, "loss": 0.1682, "step": 31818 }, { "epoch": 0.05641949007915927, "grad_norm": 0.54296875, "learning_rate": 0.0015717512140135384, "loss": 0.3934, "step": 31820 }, { "epoch": 0.056423036244469094, "grad_norm": 0.9296875, "learning_rate": 0.0015717005260158818, "loss": 0.2013, "step": 31822 }, { "epoch": 0.05642658240977891, "grad_norm": 5.9375, "learning_rate": 0.0015716498359552818, "loss": 0.3728, "step": 31824 }, { "epoch": 0.05643012857508872, "grad_norm": 0.5703125, "learning_rate": 0.0015715991438319588, "loss": 0.1906, "step": 31826 }, { "epoch": 0.05643367474039854, "grad_norm": 0.6875, "learning_rate": 0.0015715484496461357, "loss": 0.3512, "step": 31828 }, { "epoch": 0.05643722090570835, "grad_norm": 0.421875, "learning_rate": 0.001571497753398033, "loss": 0.2177, "step": 31830 }, { "epoch": 0.056440767071018166, "grad_norm": 0.37109375, "learning_rate": 0.0015714470550878734, "loss": 0.1614, "step": 31832 }, { "epoch": 0.05644431323632798, "grad_norm": 0.921875, "learning_rate": 0.0015713963547158783, "loss": 0.1625, "step": 31834 }, { "epoch": 0.056447859401637795, "grad_norm": 2.140625, "learning_rate": 0.0015713456522822688, "loss": 0.2384, "step": 31836 }, { "epoch": 0.05645140556694761, "grad_norm": 0.5390625, "learning_rate": 0.0015712949477872677, "loss": 0.206, "step": 31838 }, { "epoch": 0.056454951732257425, "grad_norm": 0.46875, "learning_rate": 0.0015712442412310961, "loss": 0.3, "step": 31840 }, { "epoch": 0.05645849789756724, "grad_norm": 0.30078125, "learning_rate": 0.001571193532613976, "loss": 0.1562, "step": 31842 }, { "epoch": 0.056462044062877054, "grad_norm": 0.83203125, "learning_rate": 0.001571142821936129, "loss": 0.1549, "step": 31844 }, { "epoch": 0.056465590228186875, "grad_norm": 0.37890625, "learning_rate": 0.001571092109197777, "loss": 0.1394, "step": 31846 }, { "epoch": 0.05646913639349669, "grad_norm": 0.71484375, "learning_rate": 0.0015710413943991418, "loss": 0.2299, "step": 31848 }, { "epoch": 0.056472682558806504, "grad_norm": 0.228515625, "learning_rate": 0.001570990677540445, "loss": 0.3401, "step": 31850 }, { "epoch": 0.05647622872411632, "grad_norm": 0.640625, "learning_rate": 0.0015709399586219088, "loss": 0.1858, "step": 31852 }, { "epoch": 0.05647977488942613, "grad_norm": 0.392578125, "learning_rate": 0.0015708892376437547, "loss": 0.1818, "step": 31854 }, { "epoch": 0.05648332105473595, "grad_norm": 3.609375, "learning_rate": 0.0015708385146062048, "loss": 0.2723, "step": 31856 }, { "epoch": 0.05648686722004576, "grad_norm": 0.66015625, "learning_rate": 0.0015707877895094803, "loss": 0.231, "step": 31858 }, { "epoch": 0.05649041338535558, "grad_norm": 0.48046875, "learning_rate": 0.0015707370623538038, "loss": 0.1811, "step": 31860 }, { "epoch": 0.05649395955066539, "grad_norm": 0.4453125, "learning_rate": 0.0015706863331393965, "loss": 0.2217, "step": 31862 }, { "epoch": 0.056497505715975206, "grad_norm": 0.54296875, "learning_rate": 0.0015706356018664806, "loss": 0.182, "step": 31864 }, { "epoch": 0.05650105188128502, "grad_norm": 0.400390625, "learning_rate": 0.0015705848685352783, "loss": 0.2094, "step": 31866 }, { "epoch": 0.05650459804659484, "grad_norm": 1.328125, "learning_rate": 0.0015705341331460109, "loss": 0.3011, "step": 31868 }, { "epoch": 0.05650814421190466, "grad_norm": 0.7109375, "learning_rate": 0.0015704833956989006, "loss": 0.1441, "step": 31870 }, { "epoch": 0.05651169037721447, "grad_norm": 0.466796875, "learning_rate": 0.0015704326561941694, "loss": 0.137, "step": 31872 }, { "epoch": 0.056515236542524286, "grad_norm": 0.53515625, "learning_rate": 0.0015703819146320385, "loss": 0.4438, "step": 31874 }, { "epoch": 0.0565187827078341, "grad_norm": 0.2490234375, "learning_rate": 0.0015703311710127308, "loss": 0.1593, "step": 31876 }, { "epoch": 0.056522328873143915, "grad_norm": 0.3125, "learning_rate": 0.0015702804253364675, "loss": 0.1592, "step": 31878 }, { "epoch": 0.05652587503845373, "grad_norm": 0.86328125, "learning_rate": 0.001570229677603471, "loss": 0.2546, "step": 31880 }, { "epoch": 0.056529421203763544, "grad_norm": 0.76953125, "learning_rate": 0.0015701789278139628, "loss": 0.1914, "step": 31882 }, { "epoch": 0.05653296736907336, "grad_norm": 0.439453125, "learning_rate": 0.0015701281759681652, "loss": 0.1811, "step": 31884 }, { "epoch": 0.05653651353438317, "grad_norm": 0.2216796875, "learning_rate": 0.0015700774220663003, "loss": 0.1303, "step": 31886 }, { "epoch": 0.05654005969969299, "grad_norm": 0.466796875, "learning_rate": 0.0015700266661085895, "loss": 0.2166, "step": 31888 }, { "epoch": 0.05654360586500281, "grad_norm": 0.578125, "learning_rate": 0.0015699759080952552, "loss": 0.1777, "step": 31890 }, { "epoch": 0.056547152030312624, "grad_norm": 1.2265625, "learning_rate": 0.0015699251480265192, "loss": 0.2194, "step": 31892 }, { "epoch": 0.05655069819562244, "grad_norm": 1.6796875, "learning_rate": 0.0015698743859026035, "loss": 0.2464, "step": 31894 }, { "epoch": 0.05655424436093225, "grad_norm": 2.140625, "learning_rate": 0.0015698236217237306, "loss": 0.2988, "step": 31896 }, { "epoch": 0.05655779052624207, "grad_norm": 0.416015625, "learning_rate": 0.0015697728554901213, "loss": 0.2077, "step": 31898 }, { "epoch": 0.05656133669155188, "grad_norm": 1.703125, "learning_rate": 0.001569722087201999, "loss": 0.3094, "step": 31900 }, { "epoch": 0.0565648828568617, "grad_norm": 0.8125, "learning_rate": 0.001569671316859585, "loss": 0.2131, "step": 31902 }, { "epoch": 0.05656842902217151, "grad_norm": 2.359375, "learning_rate": 0.0015696205444631016, "loss": 0.2686, "step": 31904 }, { "epoch": 0.056571975187481326, "grad_norm": 0.302734375, "learning_rate": 0.0015695697700127706, "loss": 0.1976, "step": 31906 }, { "epoch": 0.05657552135279114, "grad_norm": 0.3828125, "learning_rate": 0.001569518993508814, "loss": 0.1556, "step": 31908 }, { "epoch": 0.056579067518100955, "grad_norm": 0.201171875, "learning_rate": 0.0015694682149514543, "loss": 0.1451, "step": 31910 }, { "epoch": 0.05658261368341077, "grad_norm": 0.388671875, "learning_rate": 0.001569417434340913, "loss": 0.2178, "step": 31912 }, { "epoch": 0.05658615984872059, "grad_norm": 0.5859375, "learning_rate": 0.001569366651677413, "loss": 0.2007, "step": 31914 }, { "epoch": 0.056589706014030405, "grad_norm": 0.482421875, "learning_rate": 0.0015693158669611755, "loss": 0.2089, "step": 31916 }, { "epoch": 0.05659325217934022, "grad_norm": 0.66015625, "learning_rate": 0.0015692650801924235, "loss": 0.183, "step": 31918 }, { "epoch": 0.056596798344650034, "grad_norm": 0.41796875, "learning_rate": 0.001569214291371378, "loss": 0.193, "step": 31920 }, { "epoch": 0.05660034450995985, "grad_norm": 0.26171875, "learning_rate": 0.0015691635004982621, "loss": 0.1504, "step": 31922 }, { "epoch": 0.056603890675269664, "grad_norm": 0.53125, "learning_rate": 0.0015691127075732976, "loss": 0.1574, "step": 31924 }, { "epoch": 0.05660743684057948, "grad_norm": 0.36328125, "learning_rate": 0.0015690619125967068, "loss": 0.2717, "step": 31926 }, { "epoch": 0.05661098300588929, "grad_norm": 0.52734375, "learning_rate": 0.0015690111155687112, "loss": 0.1626, "step": 31928 }, { "epoch": 0.05661452917119911, "grad_norm": 0.85546875, "learning_rate": 0.0015689603164895337, "loss": 0.4407, "step": 31930 }, { "epoch": 0.05661807533650892, "grad_norm": 2.046875, "learning_rate": 0.001568909515359396, "loss": 0.1987, "step": 31932 }, { "epoch": 0.056621621501818736, "grad_norm": 0.30859375, "learning_rate": 0.0015688587121785208, "loss": 0.1756, "step": 31934 }, { "epoch": 0.05662516766712856, "grad_norm": 0.62890625, "learning_rate": 0.0015688079069471293, "loss": 0.1342, "step": 31936 }, { "epoch": 0.05662871383243837, "grad_norm": 1.0234375, "learning_rate": 0.001568757099665445, "loss": 0.2281, "step": 31938 }, { "epoch": 0.05663225999774819, "grad_norm": 0.349609375, "learning_rate": 0.001568706290333689, "loss": 0.1942, "step": 31940 }, { "epoch": 0.056635806163058, "grad_norm": 1.703125, "learning_rate": 0.001568655478952084, "loss": 0.1968, "step": 31942 }, { "epoch": 0.056639352328367816, "grad_norm": 4.46875, "learning_rate": 0.0015686046655208523, "loss": 0.4224, "step": 31944 }, { "epoch": 0.05664289849367763, "grad_norm": 0.2734375, "learning_rate": 0.001568553850040216, "loss": 0.1908, "step": 31946 }, { "epoch": 0.056646444658987445, "grad_norm": 0.83203125, "learning_rate": 0.0015685030325103973, "loss": 0.2404, "step": 31948 }, { "epoch": 0.05664999082429726, "grad_norm": 1.46875, "learning_rate": 0.0015684522129316186, "loss": 0.2682, "step": 31950 }, { "epoch": 0.056653536989607074, "grad_norm": 0.498046875, "learning_rate": 0.0015684013913041016, "loss": 0.1954, "step": 31952 }, { "epoch": 0.05665708315491689, "grad_norm": 0.2451171875, "learning_rate": 0.0015683505676280694, "loss": 0.24, "step": 31954 }, { "epoch": 0.0566606293202267, "grad_norm": 0.5625, "learning_rate": 0.0015682997419037437, "loss": 0.1908, "step": 31956 }, { "epoch": 0.056664175485536525, "grad_norm": 0.8828125, "learning_rate": 0.001568248914131347, "loss": 0.2108, "step": 31958 }, { "epoch": 0.05666772165084634, "grad_norm": 0.5546875, "learning_rate": 0.0015681980843111015, "loss": 0.2204, "step": 31960 }, { "epoch": 0.056671267816156154, "grad_norm": 0.294921875, "learning_rate": 0.0015681472524432297, "loss": 0.1546, "step": 31962 }, { "epoch": 0.05667481398146597, "grad_norm": 0.5390625, "learning_rate": 0.0015680964185279533, "loss": 0.2263, "step": 31964 }, { "epoch": 0.05667836014677578, "grad_norm": 0.271484375, "learning_rate": 0.0015680455825654955, "loss": 0.2358, "step": 31966 }, { "epoch": 0.0566819063120856, "grad_norm": 0.51953125, "learning_rate": 0.001567994744556078, "loss": 0.2097, "step": 31968 }, { "epoch": 0.05668545247739541, "grad_norm": 0.265625, "learning_rate": 0.0015679439044999236, "loss": 0.189, "step": 31970 }, { "epoch": 0.05668899864270523, "grad_norm": 0.94921875, "learning_rate": 0.0015678930623972541, "loss": 0.2378, "step": 31972 }, { "epoch": 0.05669254480801504, "grad_norm": 1.25, "learning_rate": 0.0015678422182482923, "loss": 0.1974, "step": 31974 }, { "epoch": 0.056696090973324856, "grad_norm": 0.390625, "learning_rate": 0.0015677913720532602, "loss": 0.1609, "step": 31976 }, { "epoch": 0.05669963713863467, "grad_norm": 1.8359375, "learning_rate": 0.0015677405238123806, "loss": 0.1428, "step": 31978 }, { "epoch": 0.056703183303944485, "grad_norm": 0.9453125, "learning_rate": 0.0015676896735258754, "loss": 0.1898, "step": 31980 }, { "epoch": 0.056706729469254306, "grad_norm": 0.494140625, "learning_rate": 0.0015676388211939673, "loss": 0.1567, "step": 31982 }, { "epoch": 0.05671027563456412, "grad_norm": 0.6328125, "learning_rate": 0.0015675879668168786, "loss": 0.152, "step": 31984 }, { "epoch": 0.056713821799873936, "grad_norm": 1.203125, "learning_rate": 0.001567537110394832, "loss": 0.2965, "step": 31986 }, { "epoch": 0.05671736796518375, "grad_norm": 1.515625, "learning_rate": 0.0015674862519280495, "loss": 0.1973, "step": 31988 }, { "epoch": 0.056720914130493565, "grad_norm": 0.37109375, "learning_rate": 0.0015674353914167537, "loss": 0.1426, "step": 31990 }, { "epoch": 0.05672446029580338, "grad_norm": 0.6640625, "learning_rate": 0.0015673845288611667, "loss": 0.1841, "step": 31992 }, { "epoch": 0.056728006461113194, "grad_norm": 0.2578125, "learning_rate": 0.0015673336642615116, "loss": 0.1729, "step": 31994 }, { "epoch": 0.05673155262642301, "grad_norm": 0.25390625, "learning_rate": 0.0015672827976180106, "loss": 0.1624, "step": 31996 }, { "epoch": 0.05673509879173282, "grad_norm": 0.435546875, "learning_rate": 0.001567231928930886, "loss": 0.2301, "step": 31998 }, { "epoch": 0.05673864495704264, "grad_norm": 1.4453125, "learning_rate": 0.0015671810582003605, "loss": 0.1902, "step": 32000 }, { "epoch": 0.05674219112235245, "grad_norm": 0.265625, "learning_rate": 0.001567130185426656, "loss": 0.1788, "step": 32002 }, { "epoch": 0.05674573728766227, "grad_norm": 0.56640625, "learning_rate": 0.0015670793106099955, "loss": 0.2429, "step": 32004 }, { "epoch": 0.05674928345297209, "grad_norm": 0.57421875, "learning_rate": 0.0015670284337506014, "loss": 0.1885, "step": 32006 }, { "epoch": 0.0567528296182819, "grad_norm": 0.73046875, "learning_rate": 0.0015669775548486965, "loss": 0.2028, "step": 32008 }, { "epoch": 0.05675637578359172, "grad_norm": 0.52734375, "learning_rate": 0.0015669266739045027, "loss": 0.2217, "step": 32010 }, { "epoch": 0.05675992194890153, "grad_norm": 0.349609375, "learning_rate": 0.0015668757909182431, "loss": 0.2279, "step": 32012 }, { "epoch": 0.056763468114211346, "grad_norm": 0.5078125, "learning_rate": 0.00156682490589014, "loss": 0.2151, "step": 32014 }, { "epoch": 0.05676701427952116, "grad_norm": 0.625, "learning_rate": 0.0015667740188204157, "loss": 0.1602, "step": 32016 }, { "epoch": 0.056770560444830975, "grad_norm": 0.328125, "learning_rate": 0.0015667231297092931, "loss": 0.2891, "step": 32018 }, { "epoch": 0.05677410661014079, "grad_norm": 0.79296875, "learning_rate": 0.0015666722385569948, "loss": 0.2027, "step": 32020 }, { "epoch": 0.056777652775450604, "grad_norm": 0.7890625, "learning_rate": 0.001566621345363743, "loss": 0.1645, "step": 32022 }, { "epoch": 0.05678119894076042, "grad_norm": 0.228515625, "learning_rate": 0.0015665704501297607, "loss": 0.1633, "step": 32024 }, { "epoch": 0.05678474510607024, "grad_norm": 0.423828125, "learning_rate": 0.0015665195528552695, "loss": 0.1464, "step": 32026 }, { "epoch": 0.056788291271380055, "grad_norm": 0.310546875, "learning_rate": 0.0015664686535404938, "loss": 0.2002, "step": 32028 }, { "epoch": 0.05679183743668987, "grad_norm": 0.9375, "learning_rate": 0.0015664177521856543, "loss": 0.2033, "step": 32030 }, { "epoch": 0.056795383601999684, "grad_norm": 1.46875, "learning_rate": 0.0015663668487909752, "loss": 0.2873, "step": 32032 }, { "epoch": 0.0567989297673095, "grad_norm": 0.71484375, "learning_rate": 0.001566315943356678, "loss": 0.2676, "step": 32034 }, { "epoch": 0.05680247593261931, "grad_norm": 1.15625, "learning_rate": 0.0015662650358829857, "loss": 0.1922, "step": 32036 }, { "epoch": 0.05680602209792913, "grad_norm": 0.404296875, "learning_rate": 0.001566214126370121, "loss": 0.2233, "step": 32038 }, { "epoch": 0.05680956826323894, "grad_norm": 0.5703125, "learning_rate": 0.001566163214818307, "loss": 0.1665, "step": 32040 }, { "epoch": 0.05681311442854876, "grad_norm": 0.3671875, "learning_rate": 0.001566112301227765, "loss": 0.2876, "step": 32042 }, { "epoch": 0.05681666059385857, "grad_norm": 0.36328125, "learning_rate": 0.0015660613855987195, "loss": 0.1457, "step": 32044 }, { "epoch": 0.056820206759168386, "grad_norm": 0.244140625, "learning_rate": 0.0015660104679313916, "loss": 0.188, "step": 32046 }, { "epoch": 0.0568237529244782, "grad_norm": 0.8828125, "learning_rate": 0.001565959548226005, "loss": 0.2686, "step": 32048 }, { "epoch": 0.05682729908978802, "grad_norm": 0.6484375, "learning_rate": 0.0015659086264827816, "loss": 0.18, "step": 32050 }, { "epoch": 0.05683084525509784, "grad_norm": 0.4296875, "learning_rate": 0.0015658577027019447, "loss": 0.2925, "step": 32052 }, { "epoch": 0.05683439142040765, "grad_norm": 0.396484375, "learning_rate": 0.0015658067768837165, "loss": 0.2012, "step": 32054 }, { "epoch": 0.056837937585717466, "grad_norm": 0.408203125, "learning_rate": 0.0015657558490283206, "loss": 0.1614, "step": 32056 }, { "epoch": 0.05684148375102728, "grad_norm": 0.8828125, "learning_rate": 0.0015657049191359787, "loss": 0.2312, "step": 32058 }, { "epoch": 0.056845029916337095, "grad_norm": 0.470703125, "learning_rate": 0.0015656539872069143, "loss": 0.4211, "step": 32060 }, { "epoch": 0.05684857608164691, "grad_norm": 1.9921875, "learning_rate": 0.0015656030532413497, "loss": 0.5052, "step": 32062 }, { "epoch": 0.056852122246956724, "grad_norm": 0.93359375, "learning_rate": 0.0015655521172395078, "loss": 0.2941, "step": 32064 }, { "epoch": 0.05685566841226654, "grad_norm": 0.6640625, "learning_rate": 0.0015655011792016112, "loss": 0.1685, "step": 32066 }, { "epoch": 0.05685921457757635, "grad_norm": 0.171875, "learning_rate": 0.0015654502391278832, "loss": 0.1551, "step": 32068 }, { "epoch": 0.05686276074288617, "grad_norm": 1.7890625, "learning_rate": 0.001565399297018546, "loss": 0.1756, "step": 32070 }, { "epoch": 0.05686630690819599, "grad_norm": 0.326171875, "learning_rate": 0.0015653483528738224, "loss": 0.1792, "step": 32072 }, { "epoch": 0.056869853073505804, "grad_norm": 1.1953125, "learning_rate": 0.001565297406693936, "loss": 0.2405, "step": 32074 }, { "epoch": 0.05687339923881562, "grad_norm": 0.291015625, "learning_rate": 0.0015652464584791081, "loss": 0.2502, "step": 32076 }, { "epoch": 0.05687694540412543, "grad_norm": 0.51171875, "learning_rate": 0.0015651955082295632, "loss": 0.1705, "step": 32078 }, { "epoch": 0.05688049156943525, "grad_norm": 0.423828125, "learning_rate": 0.0015651445559455232, "loss": 0.1881, "step": 32080 }, { "epoch": 0.05688403773474506, "grad_norm": 0.26171875, "learning_rate": 0.001565093601627211, "loss": 0.1477, "step": 32082 }, { "epoch": 0.056887583900054876, "grad_norm": 1.5390625, "learning_rate": 0.0015650426452748498, "loss": 0.233, "step": 32084 }, { "epoch": 0.05689113006536469, "grad_norm": 4.375, "learning_rate": 0.0015649916868886618, "loss": 0.2199, "step": 32086 }, { "epoch": 0.056894676230674505, "grad_norm": 0.283203125, "learning_rate": 0.0015649407264688706, "loss": 0.1826, "step": 32088 }, { "epoch": 0.05689822239598432, "grad_norm": 0.30859375, "learning_rate": 0.0015648897640156987, "loss": 0.2291, "step": 32090 }, { "epoch": 0.056901768561294135, "grad_norm": 0.240234375, "learning_rate": 0.001564838799529369, "loss": 0.1866, "step": 32092 }, { "epoch": 0.056905314726603956, "grad_norm": 0.484375, "learning_rate": 0.0015647878330101042, "loss": 0.2349, "step": 32094 }, { "epoch": 0.05690886089191377, "grad_norm": 0.59375, "learning_rate": 0.0015647368644581274, "loss": 0.1454, "step": 32096 }, { "epoch": 0.056912407057223585, "grad_norm": 0.31640625, "learning_rate": 0.0015646858938736618, "loss": 0.1743, "step": 32098 }, { "epoch": 0.0569159532225334, "grad_norm": 0.953125, "learning_rate": 0.0015646349212569298, "loss": 0.2142, "step": 32100 }, { "epoch": 0.056919499387843214, "grad_norm": 0.470703125, "learning_rate": 0.0015645839466081545, "loss": 0.1674, "step": 32102 }, { "epoch": 0.05692304555315303, "grad_norm": 0.62109375, "learning_rate": 0.001564532969927559, "loss": 0.2256, "step": 32104 }, { "epoch": 0.05692659171846284, "grad_norm": 0.90234375, "learning_rate": 0.001564481991215366, "loss": 0.2044, "step": 32106 }, { "epoch": 0.05693013788377266, "grad_norm": 0.6328125, "learning_rate": 0.0015644310104717988, "loss": 0.2192, "step": 32108 }, { "epoch": 0.05693368404908247, "grad_norm": 0.291015625, "learning_rate": 0.0015643800276970799, "loss": 0.2244, "step": 32110 }, { "epoch": 0.05693723021439229, "grad_norm": 0.271484375, "learning_rate": 0.0015643290428914327, "loss": 0.2352, "step": 32112 }, { "epoch": 0.0569407763797021, "grad_norm": 1.578125, "learning_rate": 0.00156427805605508, "loss": 0.1942, "step": 32114 }, { "epoch": 0.056944322545011916, "grad_norm": 0.65625, "learning_rate": 0.0015642270671882446, "loss": 0.1818, "step": 32116 }, { "epoch": 0.05694786871032174, "grad_norm": 0.466796875, "learning_rate": 0.0015641760762911496, "loss": 0.1862, "step": 32118 }, { "epoch": 0.05695141487563155, "grad_norm": 8.1875, "learning_rate": 0.0015641250833640184, "loss": 0.3622, "step": 32120 }, { "epoch": 0.05695496104094137, "grad_norm": 0.57421875, "learning_rate": 0.0015640740884070735, "loss": 0.2184, "step": 32122 }, { "epoch": 0.05695850720625118, "grad_norm": 0.52734375, "learning_rate": 0.001564023091420538, "loss": 0.189, "step": 32124 }, { "epoch": 0.056962053371560996, "grad_norm": 1.6328125, "learning_rate": 0.001563972092404635, "loss": 0.2492, "step": 32126 }, { "epoch": 0.05696559953687081, "grad_norm": 11.5, "learning_rate": 0.0015639210913595877, "loss": 0.3734, "step": 32128 }, { "epoch": 0.056969145702180625, "grad_norm": 0.875, "learning_rate": 0.001563870088285619, "loss": 0.1814, "step": 32130 }, { "epoch": 0.05697269186749044, "grad_norm": 0.52734375, "learning_rate": 0.001563819083182952, "loss": 0.1948, "step": 32132 }, { "epoch": 0.056976238032800254, "grad_norm": 0.82421875, "learning_rate": 0.0015637680760518101, "loss": 0.2031, "step": 32134 }, { "epoch": 0.05697978419811007, "grad_norm": 0.59375, "learning_rate": 0.0015637170668924153, "loss": 0.1938, "step": 32136 }, { "epoch": 0.05698333036341988, "grad_norm": 0.609375, "learning_rate": 0.0015636660557049918, "loss": 0.2081, "step": 32138 }, { "epoch": 0.056986876528729705, "grad_norm": 1.328125, "learning_rate": 0.0015636150424897621, "loss": 0.3748, "step": 32140 }, { "epoch": 0.05699042269403952, "grad_norm": 1.515625, "learning_rate": 0.0015635640272469496, "loss": 0.3033, "step": 32142 }, { "epoch": 0.056993968859349334, "grad_norm": 0.73046875, "learning_rate": 0.0015635130099767773, "loss": 0.2421, "step": 32144 }, { "epoch": 0.05699751502465915, "grad_norm": 0.205078125, "learning_rate": 0.0015634619906794687, "loss": 0.1306, "step": 32146 }, { "epoch": 0.05700106118996896, "grad_norm": 2.078125, "learning_rate": 0.0015634109693552466, "loss": 0.4345, "step": 32148 }, { "epoch": 0.05700460735527878, "grad_norm": 0.4296875, "learning_rate": 0.0015633599460043337, "loss": 0.1538, "step": 32150 }, { "epoch": 0.05700815352058859, "grad_norm": 0.9609375, "learning_rate": 0.0015633089206269534, "loss": 0.2521, "step": 32152 }, { "epoch": 0.05701169968589841, "grad_norm": 0.3515625, "learning_rate": 0.001563257893223329, "loss": 0.1935, "step": 32154 }, { "epoch": 0.05701524585120822, "grad_norm": 0.259765625, "learning_rate": 0.001563206863793684, "loss": 0.2224, "step": 32156 }, { "epoch": 0.057018792016518036, "grad_norm": 0.5078125, "learning_rate": 0.0015631558323382412, "loss": 0.2041, "step": 32158 }, { "epoch": 0.05702233818182785, "grad_norm": 0.80078125, "learning_rate": 0.001563104798857224, "loss": 0.1875, "step": 32160 }, { "epoch": 0.05702588434713767, "grad_norm": 0.2138671875, "learning_rate": 0.001563053763350855, "loss": 0.2855, "step": 32162 }, { "epoch": 0.057029430512447486, "grad_norm": 0.423828125, "learning_rate": 0.001563002725819358, "loss": 0.2074, "step": 32164 }, { "epoch": 0.0570329766777573, "grad_norm": 0.55078125, "learning_rate": 0.001562951686262956, "loss": 0.2322, "step": 32166 }, { "epoch": 0.057036522843067115, "grad_norm": 0.6328125, "learning_rate": 0.0015629006446818722, "loss": 0.1924, "step": 32168 }, { "epoch": 0.05704006900837693, "grad_norm": 2.796875, "learning_rate": 0.0015628496010763298, "loss": 0.2221, "step": 32170 }, { "epoch": 0.057043615173686744, "grad_norm": 0.7734375, "learning_rate": 0.0015627985554465523, "loss": 0.2148, "step": 32172 }, { "epoch": 0.05704716133899656, "grad_norm": 1.1640625, "learning_rate": 0.0015627475077927627, "loss": 0.2548, "step": 32174 }, { "epoch": 0.057050707504306374, "grad_norm": 1.890625, "learning_rate": 0.0015626964581151844, "loss": 0.157, "step": 32176 }, { "epoch": 0.05705425366961619, "grad_norm": 0.349609375, "learning_rate": 0.00156264540641404, "loss": 0.1892, "step": 32178 }, { "epoch": 0.057057799834926, "grad_norm": 0.86328125, "learning_rate": 0.001562594352689554, "loss": 0.1976, "step": 32180 }, { "epoch": 0.05706134600023582, "grad_norm": 0.9921875, "learning_rate": 0.0015625432969419487, "loss": 0.1798, "step": 32182 }, { "epoch": 0.05706489216554563, "grad_norm": 1.6953125, "learning_rate": 0.0015624922391714478, "loss": 0.2785, "step": 32184 }, { "epoch": 0.05706843833085545, "grad_norm": 0.73828125, "learning_rate": 0.0015624411793782747, "loss": 0.2852, "step": 32186 }, { "epoch": 0.05707198449616527, "grad_norm": 0.67578125, "learning_rate": 0.001562390117562652, "loss": 0.227, "step": 32188 }, { "epoch": 0.05707553066147508, "grad_norm": 0.609375, "learning_rate": 0.0015623390537248037, "loss": 0.1537, "step": 32190 }, { "epoch": 0.0570790768267849, "grad_norm": 1.4375, "learning_rate": 0.0015622879878649531, "loss": 0.2325, "step": 32192 }, { "epoch": 0.05708262299209471, "grad_norm": 0.341796875, "learning_rate": 0.001562236919983323, "loss": 0.1795, "step": 32194 }, { "epoch": 0.057086169157404526, "grad_norm": 0.62890625, "learning_rate": 0.0015621858500801376, "loss": 0.1908, "step": 32196 }, { "epoch": 0.05708971532271434, "grad_norm": 1.5078125, "learning_rate": 0.0015621347781556195, "loss": 0.253, "step": 32198 }, { "epoch": 0.057093261488024155, "grad_norm": 0.96875, "learning_rate": 0.0015620837042099925, "loss": 0.1902, "step": 32200 }, { "epoch": 0.05709680765333397, "grad_norm": 0.68359375, "learning_rate": 0.0015620326282434795, "loss": 0.232, "step": 32202 }, { "epoch": 0.057100353818643784, "grad_norm": 0.330078125, "learning_rate": 0.0015619815502563043, "loss": 0.1615, "step": 32204 }, { "epoch": 0.0571038999839536, "grad_norm": 0.3515625, "learning_rate": 0.0015619304702486903, "loss": 0.1635, "step": 32206 }, { "epoch": 0.05710744614926342, "grad_norm": 0.39453125, "learning_rate": 0.0015618793882208606, "loss": 0.1833, "step": 32208 }, { "epoch": 0.057110992314573235, "grad_norm": 7.15625, "learning_rate": 0.0015618283041730389, "loss": 0.4443, "step": 32210 }, { "epoch": 0.05711453847988305, "grad_norm": 0.62890625, "learning_rate": 0.001561777218105448, "loss": 0.1918, "step": 32212 }, { "epoch": 0.057118084645192864, "grad_norm": 0.44921875, "learning_rate": 0.0015617261300183123, "loss": 0.208, "step": 32214 }, { "epoch": 0.05712163081050268, "grad_norm": 0.41796875, "learning_rate": 0.0015616750399118544, "loss": 0.1831, "step": 32216 }, { "epoch": 0.05712517697581249, "grad_norm": 1.328125, "learning_rate": 0.001561623947786298, "loss": 0.1804, "step": 32218 }, { "epoch": 0.05712872314112231, "grad_norm": 0.2177734375, "learning_rate": 0.0015615728536418668, "loss": 0.1726, "step": 32220 }, { "epoch": 0.05713226930643212, "grad_norm": 0.349609375, "learning_rate": 0.001561521757478784, "loss": 0.1702, "step": 32222 }, { "epoch": 0.05713581547174194, "grad_norm": 0.2412109375, "learning_rate": 0.0015614706592972732, "loss": 0.1738, "step": 32224 }, { "epoch": 0.05713936163705175, "grad_norm": 0.2158203125, "learning_rate": 0.0015614195590975574, "loss": 0.1705, "step": 32226 }, { "epoch": 0.057142907802361566, "grad_norm": 0.37109375, "learning_rate": 0.0015613684568798612, "loss": 0.2368, "step": 32228 }, { "epoch": 0.05714645396767139, "grad_norm": 0.294921875, "learning_rate": 0.0015613173526444067, "loss": 0.2326, "step": 32230 }, { "epoch": 0.0571500001329812, "grad_norm": 0.37890625, "learning_rate": 0.0015612662463914185, "loss": 0.1535, "step": 32232 }, { "epoch": 0.057153546298291016, "grad_norm": 0.376953125, "learning_rate": 0.0015612151381211193, "loss": 0.2127, "step": 32234 }, { "epoch": 0.05715709246360083, "grad_norm": 1.21875, "learning_rate": 0.0015611640278337334, "loss": 0.2155, "step": 32236 }, { "epoch": 0.057160638628910646, "grad_norm": 0.2734375, "learning_rate": 0.0015611129155294835, "loss": 0.182, "step": 32238 }, { "epoch": 0.05716418479422046, "grad_norm": 0.296875, "learning_rate": 0.0015610618012085935, "loss": 0.1956, "step": 32240 }, { "epoch": 0.057167730959530275, "grad_norm": 0.36328125, "learning_rate": 0.0015610106848712875, "loss": 0.171, "step": 32242 }, { "epoch": 0.05717127712484009, "grad_norm": 0.25, "learning_rate": 0.0015609595665177882, "loss": 0.1812, "step": 32244 }, { "epoch": 0.057174823290149904, "grad_norm": 0.53515625, "learning_rate": 0.0015609084461483196, "loss": 0.193, "step": 32246 }, { "epoch": 0.05717836945545972, "grad_norm": 0.6328125, "learning_rate": 0.0015608573237631052, "loss": 0.2598, "step": 32248 }, { "epoch": 0.05718191562076953, "grad_norm": 0.4765625, "learning_rate": 0.0015608061993623687, "loss": 0.1934, "step": 32250 }, { "epoch": 0.05718546178607935, "grad_norm": 0.3046875, "learning_rate": 0.001560755072946333, "loss": 0.2219, "step": 32252 }, { "epoch": 0.05718900795138917, "grad_norm": 0.212890625, "learning_rate": 0.0015607039445152229, "loss": 0.1541, "step": 32254 }, { "epoch": 0.05719255411669898, "grad_norm": 0.78125, "learning_rate": 0.001560652814069261, "loss": 0.2698, "step": 32256 }, { "epoch": 0.0571961002820088, "grad_norm": 0.50390625, "learning_rate": 0.0015606016816086714, "loss": 0.1809, "step": 32258 }, { "epoch": 0.05719964644731861, "grad_norm": 1.984375, "learning_rate": 0.0015605505471336775, "loss": 0.1961, "step": 32260 }, { "epoch": 0.05720319261262843, "grad_norm": 0.453125, "learning_rate": 0.0015604994106445031, "loss": 0.2081, "step": 32262 }, { "epoch": 0.05720673877793824, "grad_norm": 0.63671875, "learning_rate": 0.001560448272141372, "loss": 0.2378, "step": 32264 }, { "epoch": 0.057210284943248056, "grad_norm": 0.671875, "learning_rate": 0.0015603971316245073, "loss": 0.2678, "step": 32266 }, { "epoch": 0.05721383110855787, "grad_norm": 0.51953125, "learning_rate": 0.0015603459890941329, "loss": 0.2462, "step": 32268 }, { "epoch": 0.057217377273867685, "grad_norm": 0.314453125, "learning_rate": 0.0015602948445504728, "loss": 0.155, "step": 32270 }, { "epoch": 0.0572209234391775, "grad_norm": 0.271484375, "learning_rate": 0.0015602436979937505, "loss": 0.1951, "step": 32272 }, { "epoch": 0.057224469604487314, "grad_norm": 0.453125, "learning_rate": 0.0015601925494241893, "loss": 0.222, "step": 32274 }, { "epoch": 0.057228015769797136, "grad_norm": 0.2041015625, "learning_rate": 0.0015601413988420132, "loss": 0.1967, "step": 32276 }, { "epoch": 0.05723156193510695, "grad_norm": 0.64453125, "learning_rate": 0.0015600902462474461, "loss": 0.1306, "step": 32278 }, { "epoch": 0.057235108100416765, "grad_norm": 1.484375, "learning_rate": 0.0015600390916407118, "loss": 0.2058, "step": 32280 }, { "epoch": 0.05723865426572658, "grad_norm": 0.6328125, "learning_rate": 0.0015599879350220334, "loss": 0.1973, "step": 32282 }, { "epoch": 0.057242200431036394, "grad_norm": 1.0, "learning_rate": 0.0015599367763916346, "loss": 0.1665, "step": 32284 }, { "epoch": 0.05724574659634621, "grad_norm": 0.412109375, "learning_rate": 0.00155988561574974, "loss": 0.1601, "step": 32286 }, { "epoch": 0.05724929276165602, "grad_norm": 0.48046875, "learning_rate": 0.0015598344530965724, "loss": 0.1869, "step": 32288 }, { "epoch": 0.05725283892696584, "grad_norm": 0.56640625, "learning_rate": 0.0015597832884323567, "loss": 0.1652, "step": 32290 }, { "epoch": 0.05725638509227565, "grad_norm": 1.375, "learning_rate": 0.0015597321217573153, "loss": 0.2419, "step": 32292 }, { "epoch": 0.05725993125758547, "grad_norm": 0.5234375, "learning_rate": 0.0015596809530716732, "loss": 0.2808, "step": 32294 }, { "epoch": 0.05726347742289528, "grad_norm": 0.7890625, "learning_rate": 0.001559629782375653, "loss": 0.1761, "step": 32296 }, { "epoch": 0.0572670235882051, "grad_norm": 0.5703125, "learning_rate": 0.0015595786096694794, "loss": 0.1904, "step": 32298 }, { "epoch": 0.05727056975351492, "grad_norm": 0.2197265625, "learning_rate": 0.001559527434953376, "loss": 0.1479, "step": 32300 }, { "epoch": 0.05727411591882473, "grad_norm": 0.2578125, "learning_rate": 0.0015594762582275667, "loss": 0.2044, "step": 32302 }, { "epoch": 0.05727766208413455, "grad_norm": 0.2119140625, "learning_rate": 0.001559425079492275, "loss": 0.1646, "step": 32304 }, { "epoch": 0.05728120824944436, "grad_norm": 0.796875, "learning_rate": 0.0015593738987477249, "loss": 0.1767, "step": 32306 }, { "epoch": 0.057284754414754176, "grad_norm": 0.6328125, "learning_rate": 0.00155932271599414, "loss": 0.2523, "step": 32308 }, { "epoch": 0.05728830058006399, "grad_norm": 0.4609375, "learning_rate": 0.0015592715312317447, "loss": 0.1737, "step": 32310 }, { "epoch": 0.057291846745373805, "grad_norm": 0.77734375, "learning_rate": 0.001559220344460762, "loss": 0.228, "step": 32312 }, { "epoch": 0.05729539291068362, "grad_norm": 1.59375, "learning_rate": 0.0015591691556814165, "loss": 0.2514, "step": 32314 }, { "epoch": 0.057298939075993434, "grad_norm": 0.455078125, "learning_rate": 0.001559117964893932, "loss": 0.1679, "step": 32316 }, { "epoch": 0.05730248524130325, "grad_norm": 2.1875, "learning_rate": 0.001559066772098532, "loss": 0.4687, "step": 32318 }, { "epoch": 0.05730603140661306, "grad_norm": 0.306640625, "learning_rate": 0.0015590155772954406, "loss": 0.1545, "step": 32320 }, { "epoch": 0.057309577571922884, "grad_norm": 0.79296875, "learning_rate": 0.0015589643804848816, "loss": 0.2366, "step": 32322 }, { "epoch": 0.0573131237372327, "grad_norm": 0.240234375, "learning_rate": 0.0015589131816670795, "loss": 0.1679, "step": 32324 }, { "epoch": 0.057316669902542514, "grad_norm": 2.1875, "learning_rate": 0.001558861980842257, "loss": 0.2072, "step": 32326 }, { "epoch": 0.05732021606785233, "grad_norm": 1.0546875, "learning_rate": 0.0015588107780106393, "loss": 0.1914, "step": 32328 }, { "epoch": 0.05732376223316214, "grad_norm": 0.453125, "learning_rate": 0.0015587595731724492, "loss": 0.1454, "step": 32330 }, { "epoch": 0.05732730839847196, "grad_norm": 0.365234375, "learning_rate": 0.001558708366327912, "loss": 0.1816, "step": 32332 }, { "epoch": 0.05733085456378177, "grad_norm": 2.25, "learning_rate": 0.0015586571574772504, "loss": 0.2226, "step": 32334 }, { "epoch": 0.057334400729091586, "grad_norm": 0.392578125, "learning_rate": 0.0015586059466206888, "loss": 0.164, "step": 32336 }, { "epoch": 0.0573379468944014, "grad_norm": 1.7890625, "learning_rate": 0.001558554733758451, "loss": 0.2791, "step": 32338 }, { "epoch": 0.057341493059711215, "grad_norm": 12.875, "learning_rate": 0.0015585035188907617, "loss": 0.2768, "step": 32340 }, { "epoch": 0.05734503922502103, "grad_norm": 2.53125, "learning_rate": 0.0015584523020178438, "loss": 0.289, "step": 32342 }, { "epoch": 0.05734858539033085, "grad_norm": 2.375, "learning_rate": 0.001558401083139922, "loss": 0.2455, "step": 32344 }, { "epoch": 0.057352131555640666, "grad_norm": 0.53125, "learning_rate": 0.0015583498622572202, "loss": 0.1701, "step": 32346 }, { "epoch": 0.05735567772095048, "grad_norm": 0.3671875, "learning_rate": 0.0015582986393699622, "loss": 0.2135, "step": 32348 }, { "epoch": 0.057359223886260295, "grad_norm": 1.921875, "learning_rate": 0.0015582474144783722, "loss": 0.1853, "step": 32350 }, { "epoch": 0.05736277005157011, "grad_norm": 0.328125, "learning_rate": 0.0015581961875826744, "loss": 0.1776, "step": 32352 }, { "epoch": 0.057366316216879924, "grad_norm": 0.419921875, "learning_rate": 0.0015581449586830928, "loss": 0.2334, "step": 32354 }, { "epoch": 0.05736986238218974, "grad_norm": 0.5078125, "learning_rate": 0.0015580937277798511, "loss": 0.22, "step": 32356 }, { "epoch": 0.05737340854749955, "grad_norm": 0.400390625, "learning_rate": 0.0015580424948731732, "loss": 0.1895, "step": 32358 }, { "epoch": 0.05737695471280937, "grad_norm": 2.046875, "learning_rate": 0.0015579912599632836, "loss": 0.2682, "step": 32360 }, { "epoch": 0.05738050087811918, "grad_norm": 0.41015625, "learning_rate": 0.0015579400230504065, "loss": 0.2234, "step": 32362 }, { "epoch": 0.057384047043429, "grad_norm": 0.51953125, "learning_rate": 0.0015578887841347655, "loss": 0.1737, "step": 32364 }, { "epoch": 0.05738759320873882, "grad_norm": 0.4296875, "learning_rate": 0.001557837543216585, "loss": 0.2105, "step": 32366 }, { "epoch": 0.05739113937404863, "grad_norm": 0.703125, "learning_rate": 0.001557786300296089, "loss": 0.1848, "step": 32368 }, { "epoch": 0.05739468553935845, "grad_norm": 0.34765625, "learning_rate": 0.0015577350553735018, "loss": 0.1639, "step": 32370 }, { "epoch": 0.05739823170466826, "grad_norm": 0.482421875, "learning_rate": 0.0015576838084490473, "loss": 0.1814, "step": 32372 }, { "epoch": 0.05740177786997808, "grad_norm": 0.349609375, "learning_rate": 0.0015576325595229497, "loss": 0.2088, "step": 32374 }, { "epoch": 0.05740532403528789, "grad_norm": 0.88671875, "learning_rate": 0.001557581308595433, "loss": 0.2096, "step": 32376 }, { "epoch": 0.057408870200597706, "grad_norm": 0.5390625, "learning_rate": 0.0015575300556667213, "loss": 0.2658, "step": 32378 }, { "epoch": 0.05741241636590752, "grad_norm": 0.380859375, "learning_rate": 0.0015574788007370393, "loss": 0.2267, "step": 32380 }, { "epoch": 0.057415962531217335, "grad_norm": 0.388671875, "learning_rate": 0.0015574275438066105, "loss": 0.1544, "step": 32382 }, { "epoch": 0.05741950869652715, "grad_norm": 0.95703125, "learning_rate": 0.0015573762848756592, "loss": 0.2101, "step": 32384 }, { "epoch": 0.057423054861836964, "grad_norm": 2.578125, "learning_rate": 0.0015573250239444096, "loss": 0.2404, "step": 32386 }, { "epoch": 0.05742660102714678, "grad_norm": 0.2431640625, "learning_rate": 0.0015572737610130866, "loss": 0.1938, "step": 32388 }, { "epoch": 0.0574301471924566, "grad_norm": 0.384765625, "learning_rate": 0.0015572224960819132, "loss": 0.2129, "step": 32390 }, { "epoch": 0.057433693357766415, "grad_norm": 0.55078125, "learning_rate": 0.0015571712291511147, "loss": 0.1626, "step": 32392 }, { "epoch": 0.05743723952307623, "grad_norm": 0.32421875, "learning_rate": 0.0015571199602209148, "loss": 0.1362, "step": 32394 }, { "epoch": 0.057440785688386044, "grad_norm": 0.31640625, "learning_rate": 0.0015570686892915373, "loss": 0.1614, "step": 32396 }, { "epoch": 0.05744433185369586, "grad_norm": 0.392578125, "learning_rate": 0.0015570174163632069, "loss": 0.17, "step": 32398 }, { "epoch": 0.05744787801900567, "grad_norm": 0.86328125, "learning_rate": 0.001556966141436148, "loss": 0.1699, "step": 32400 }, { "epoch": 0.05745142418431549, "grad_norm": 0.6328125, "learning_rate": 0.0015569148645105844, "loss": 0.2534, "step": 32402 }, { "epoch": 0.0574549703496253, "grad_norm": 0.28125, "learning_rate": 0.0015568635855867411, "loss": 0.1865, "step": 32404 }, { "epoch": 0.057458516514935117, "grad_norm": 0.498046875, "learning_rate": 0.0015568123046648412, "loss": 0.1932, "step": 32406 }, { "epoch": 0.05746206268024493, "grad_norm": 0.421875, "learning_rate": 0.0015567610217451099, "loss": 0.4279, "step": 32408 }, { "epoch": 0.057465608845554746, "grad_norm": 0.328125, "learning_rate": 0.0015567097368277712, "loss": 0.2004, "step": 32410 }, { "epoch": 0.05746915501086457, "grad_norm": 0.625, "learning_rate": 0.0015566584499130494, "loss": 0.2572, "step": 32412 }, { "epoch": 0.05747270117617438, "grad_norm": 0.703125, "learning_rate": 0.001556607161001169, "loss": 0.1913, "step": 32414 }, { "epoch": 0.057476247341484196, "grad_norm": 0.9140625, "learning_rate": 0.0015565558700923537, "loss": 0.3234, "step": 32416 }, { "epoch": 0.05747979350679401, "grad_norm": 2.0625, "learning_rate": 0.0015565045771868287, "loss": 0.3797, "step": 32418 }, { "epoch": 0.057483339672103825, "grad_norm": 0.3828125, "learning_rate": 0.0015564532822848174, "loss": 0.172, "step": 32420 }, { "epoch": 0.05748688583741364, "grad_norm": 0.71875, "learning_rate": 0.0015564019853865447, "loss": 0.1727, "step": 32422 }, { "epoch": 0.057490432002723454, "grad_norm": 0.421875, "learning_rate": 0.001556350686492235, "loss": 0.1625, "step": 32424 }, { "epoch": 0.05749397816803327, "grad_norm": 0.72265625, "learning_rate": 0.0015562993856021124, "loss": 0.1577, "step": 32426 }, { "epoch": 0.057497524333343084, "grad_norm": 0.640625, "learning_rate": 0.0015562480827164013, "loss": 0.1856, "step": 32428 }, { "epoch": 0.0575010704986529, "grad_norm": 0.796875, "learning_rate": 0.001556196777835326, "loss": 0.1747, "step": 32430 }, { "epoch": 0.05750461666396271, "grad_norm": 2.8125, "learning_rate": 0.0015561454709591112, "loss": 0.1919, "step": 32432 }, { "epoch": 0.057508162829272534, "grad_norm": 0.232421875, "learning_rate": 0.001556094162087981, "loss": 0.1671, "step": 32434 }, { "epoch": 0.05751170899458235, "grad_norm": 0.296875, "learning_rate": 0.00155604285122216, "loss": 0.1457, "step": 32436 }, { "epoch": 0.05751525515989216, "grad_norm": 1.8203125, "learning_rate": 0.001555991538361872, "loss": 0.4397, "step": 32438 }, { "epoch": 0.05751880132520198, "grad_norm": 2.1875, "learning_rate": 0.0015559402235073424, "loss": 0.1941, "step": 32440 }, { "epoch": 0.05752234749051179, "grad_norm": 0.47265625, "learning_rate": 0.001555888906658795, "loss": 0.2278, "step": 32442 }, { "epoch": 0.05752589365582161, "grad_norm": 0.5703125, "learning_rate": 0.001555837587816454, "loss": 0.1521, "step": 32444 }, { "epoch": 0.05752943982113142, "grad_norm": 0.50390625, "learning_rate": 0.0015557862669805444, "loss": 0.208, "step": 32446 }, { "epoch": 0.057532985986441236, "grad_norm": 0.25, "learning_rate": 0.0015557349441512906, "loss": 0.2124, "step": 32448 }, { "epoch": 0.05753653215175105, "grad_norm": 1.96875, "learning_rate": 0.0015556836193289167, "loss": 0.2536, "step": 32450 }, { "epoch": 0.057540078317060865, "grad_norm": 0.859375, "learning_rate": 0.0015556322925136476, "loss": 0.2857, "step": 32452 }, { "epoch": 0.05754362448237068, "grad_norm": 0.251953125, "learning_rate": 0.001555580963705707, "loss": 0.1598, "step": 32454 }, { "epoch": 0.057547170647680494, "grad_norm": 0.8828125, "learning_rate": 0.00155552963290532, "loss": 0.2007, "step": 32456 }, { "epoch": 0.057550716812990316, "grad_norm": 0.76171875, "learning_rate": 0.0015554783001127115, "loss": 0.2275, "step": 32458 }, { "epoch": 0.05755426297830013, "grad_norm": 0.38671875, "learning_rate": 0.001555426965328105, "loss": 0.2242, "step": 32460 }, { "epoch": 0.057557809143609945, "grad_norm": 1.4375, "learning_rate": 0.0015553756285517255, "loss": 0.2393, "step": 32462 }, { "epoch": 0.05756135530891976, "grad_norm": 0.76171875, "learning_rate": 0.0015553242897837975, "loss": 0.1729, "step": 32464 }, { "epoch": 0.057564901474229574, "grad_norm": 0.71484375, "learning_rate": 0.001555272949024546, "loss": 0.1808, "step": 32466 }, { "epoch": 0.05756844763953939, "grad_norm": 0.68359375, "learning_rate": 0.0015552216062741946, "loss": 0.1677, "step": 32468 }, { "epoch": 0.0575719938048492, "grad_norm": 1.8359375, "learning_rate": 0.0015551702615329687, "loss": 0.4286, "step": 32470 }, { "epoch": 0.05757553997015902, "grad_norm": 0.2021484375, "learning_rate": 0.001555118914801092, "loss": 0.1659, "step": 32472 }, { "epoch": 0.05757908613546883, "grad_norm": 0.5625, "learning_rate": 0.00155506756607879, "loss": 0.1743, "step": 32474 }, { "epoch": 0.05758263230077865, "grad_norm": 0.28125, "learning_rate": 0.0015550162153662866, "loss": 0.1767, "step": 32476 }, { "epoch": 0.05758617846608846, "grad_norm": 1.1015625, "learning_rate": 0.0015549648626638068, "loss": 0.3092, "step": 32478 }, { "epoch": 0.05758972463139828, "grad_norm": 0.90234375, "learning_rate": 0.0015549135079715743, "loss": 0.3511, "step": 32480 }, { "epoch": 0.0575932707967081, "grad_norm": 0.3515625, "learning_rate": 0.0015548621512898148, "loss": 0.1741, "step": 32482 }, { "epoch": 0.05759681696201791, "grad_norm": 0.52734375, "learning_rate": 0.0015548107926187528, "loss": 0.199, "step": 32484 }, { "epoch": 0.057600363127327726, "grad_norm": 0.73828125, "learning_rate": 0.001554759431958612, "loss": 0.24, "step": 32486 }, { "epoch": 0.05760390929263754, "grad_norm": 1.390625, "learning_rate": 0.0015547080693096179, "loss": 0.2288, "step": 32488 }, { "epoch": 0.057607455457947356, "grad_norm": 0.32421875, "learning_rate": 0.0015546567046719945, "loss": 0.1782, "step": 32490 }, { "epoch": 0.05761100162325717, "grad_norm": 0.66015625, "learning_rate": 0.0015546053380459674, "loss": 0.2162, "step": 32492 }, { "epoch": 0.057614547788566985, "grad_norm": 3.453125, "learning_rate": 0.0015545539694317599, "loss": 0.238, "step": 32494 }, { "epoch": 0.0576180939538768, "grad_norm": 1.0625, "learning_rate": 0.0015545025988295978, "loss": 0.2908, "step": 32496 }, { "epoch": 0.057621640119186614, "grad_norm": 2.734375, "learning_rate": 0.0015544512262397055, "loss": 0.2277, "step": 32498 }, { "epoch": 0.05762518628449643, "grad_norm": 0.419921875, "learning_rate": 0.0015543998516623072, "loss": 0.189, "step": 32500 }, { "epoch": 0.05762873244980625, "grad_norm": 0.498046875, "learning_rate": 0.0015543484750976282, "loss": 0.1353, "step": 32502 }, { "epoch": 0.057632278615116064, "grad_norm": 0.427734375, "learning_rate": 0.0015542970965458926, "loss": 0.2177, "step": 32504 }, { "epoch": 0.05763582478042588, "grad_norm": 1.6640625, "learning_rate": 0.0015542457160073254, "loss": 0.2283, "step": 32506 }, { "epoch": 0.05763937094573569, "grad_norm": 0.5234375, "learning_rate": 0.0015541943334821512, "loss": 0.1681, "step": 32508 }, { "epoch": 0.05764291711104551, "grad_norm": 0.494140625, "learning_rate": 0.001554142948970595, "loss": 0.1898, "step": 32510 }, { "epoch": 0.05764646327635532, "grad_norm": 1.2265625, "learning_rate": 0.0015540915624728815, "loss": 0.2549, "step": 32512 }, { "epoch": 0.05765000944166514, "grad_norm": 0.90625, "learning_rate": 0.0015540401739892347, "loss": 0.1459, "step": 32514 }, { "epoch": 0.05765355560697495, "grad_norm": 0.89453125, "learning_rate": 0.0015539887835198806, "loss": 0.1449, "step": 32516 }, { "epoch": 0.057657101772284766, "grad_norm": 2.625, "learning_rate": 0.0015539373910650428, "loss": 0.2313, "step": 32518 }, { "epoch": 0.05766064793759458, "grad_norm": 0.72265625, "learning_rate": 0.0015538859966249467, "loss": 0.2128, "step": 32520 }, { "epoch": 0.057664194102904395, "grad_norm": 0.7421875, "learning_rate": 0.001553834600199817, "loss": 0.1874, "step": 32522 }, { "epoch": 0.05766774026821421, "grad_norm": 0.291015625, "learning_rate": 0.0015537832017898786, "loss": 0.1988, "step": 32524 }, { "epoch": 0.05767128643352403, "grad_norm": 2.546875, "learning_rate": 0.0015537318013953555, "loss": 0.2306, "step": 32526 }, { "epoch": 0.057674832598833846, "grad_norm": 1.2421875, "learning_rate": 0.0015536803990164733, "loss": 0.1674, "step": 32528 }, { "epoch": 0.05767837876414366, "grad_norm": 5.25, "learning_rate": 0.0015536289946534562, "loss": 0.2624, "step": 32530 }, { "epoch": 0.057681924929453475, "grad_norm": 1.2421875, "learning_rate": 0.00155357758830653, "loss": 0.3909, "step": 32532 }, { "epoch": 0.05768547109476329, "grad_norm": 0.302734375, "learning_rate": 0.0015535261799759187, "loss": 0.1338, "step": 32534 }, { "epoch": 0.057689017260073104, "grad_norm": 1.6015625, "learning_rate": 0.001553474769661847, "loss": 0.2787, "step": 32536 }, { "epoch": 0.05769256342538292, "grad_norm": 0.9609375, "learning_rate": 0.0015534233573645405, "loss": 0.1968, "step": 32538 }, { "epoch": 0.05769610959069273, "grad_norm": 0.349609375, "learning_rate": 0.0015533719430842234, "loss": 0.1498, "step": 32540 }, { "epoch": 0.05769965575600255, "grad_norm": 0.640625, "learning_rate": 0.001553320526821121, "loss": 0.3432, "step": 32542 }, { "epoch": 0.05770320192131236, "grad_norm": 0.400390625, "learning_rate": 0.0015532691085754578, "loss": 0.2025, "step": 32544 }, { "epoch": 0.05770674808662218, "grad_norm": 0.267578125, "learning_rate": 0.0015532176883474584, "loss": 0.1761, "step": 32546 }, { "epoch": 0.057710294251932, "grad_norm": 1.0703125, "learning_rate": 0.0015531662661373484, "loss": 0.2267, "step": 32548 }, { "epoch": 0.05771384041724181, "grad_norm": 0.37109375, "learning_rate": 0.0015531148419453526, "loss": 0.2532, "step": 32550 }, { "epoch": 0.05771738658255163, "grad_norm": 0.4140625, "learning_rate": 0.0015530634157716956, "loss": 0.2195, "step": 32552 }, { "epoch": 0.05772093274786144, "grad_norm": 0.392578125, "learning_rate": 0.001553011987616602, "loss": 0.1793, "step": 32554 }, { "epoch": 0.05772447891317126, "grad_norm": 0.3515625, "learning_rate": 0.0015529605574802975, "loss": 0.2428, "step": 32556 }, { "epoch": 0.05772802507848107, "grad_norm": 0.236328125, "learning_rate": 0.0015529091253630065, "loss": 0.3177, "step": 32558 }, { "epoch": 0.057731571243790886, "grad_norm": 0.4375, "learning_rate": 0.001552857691264954, "loss": 0.1529, "step": 32560 }, { "epoch": 0.0577351174091007, "grad_norm": 0.427734375, "learning_rate": 0.0015528062551863654, "loss": 0.2156, "step": 32562 }, { "epoch": 0.057738663574410515, "grad_norm": 1.015625, "learning_rate": 0.001552754817127465, "loss": 0.1672, "step": 32564 }, { "epoch": 0.05774220973972033, "grad_norm": 0.359375, "learning_rate": 0.0015527033770884777, "loss": 0.1862, "step": 32566 }, { "epoch": 0.057745755905030144, "grad_norm": 0.310546875, "learning_rate": 0.0015526519350696293, "loss": 0.1316, "step": 32568 }, { "epoch": 0.057749302070339965, "grad_norm": 0.95703125, "learning_rate": 0.001552600491071144, "loss": 0.2426, "step": 32570 }, { "epoch": 0.05775284823564978, "grad_norm": 1.6875, "learning_rate": 0.0015525490450932471, "loss": 0.1743, "step": 32572 }, { "epoch": 0.057756394400959594, "grad_norm": 11.25, "learning_rate": 0.0015524975971361636, "loss": 0.2521, "step": 32574 }, { "epoch": 0.05775994056626941, "grad_norm": 0.40234375, "learning_rate": 0.0015524461472001185, "loss": 0.1604, "step": 32576 }, { "epoch": 0.057763486731579224, "grad_norm": 0.97265625, "learning_rate": 0.0015523946952853368, "loss": 0.2201, "step": 32578 }, { "epoch": 0.05776703289688904, "grad_norm": 0.50390625, "learning_rate": 0.0015523432413920432, "loss": 0.1897, "step": 32580 }, { "epoch": 0.05777057906219885, "grad_norm": 2.28125, "learning_rate": 0.001552291785520463, "loss": 0.309, "step": 32582 }, { "epoch": 0.05777412522750867, "grad_norm": 0.66015625, "learning_rate": 0.0015522403276708218, "loss": 0.1743, "step": 32584 }, { "epoch": 0.05777767139281848, "grad_norm": 1.34375, "learning_rate": 0.0015521888678433435, "loss": 0.3351, "step": 32586 }, { "epoch": 0.057781217558128296, "grad_norm": 0.546875, "learning_rate": 0.001552137406038254, "loss": 0.1991, "step": 32588 }, { "epoch": 0.05778476372343811, "grad_norm": 0.349609375, "learning_rate": 0.0015520859422557779, "loss": 0.1572, "step": 32590 }, { "epoch": 0.057788309888747925, "grad_norm": 0.474609375, "learning_rate": 0.0015520344764961408, "loss": 0.1692, "step": 32592 }, { "epoch": 0.05779185605405775, "grad_norm": 0.94140625, "learning_rate": 0.0015519830087595671, "loss": 0.1908, "step": 32594 }, { "epoch": 0.05779540221936756, "grad_norm": 0.373046875, "learning_rate": 0.0015519315390462828, "loss": 0.1788, "step": 32596 }, { "epoch": 0.057798948384677376, "grad_norm": 0.76953125, "learning_rate": 0.0015518800673565124, "loss": 0.1666, "step": 32598 }, { "epoch": 0.05780249454998719, "grad_norm": 0.73046875, "learning_rate": 0.0015518285936904806, "loss": 0.1553, "step": 32600 }, { "epoch": 0.057806040715297005, "grad_norm": 0.53515625, "learning_rate": 0.0015517771180484133, "loss": 0.1775, "step": 32602 }, { "epoch": 0.05780958688060682, "grad_norm": 0.84765625, "learning_rate": 0.001551725640430535, "loss": 0.1692, "step": 32604 }, { "epoch": 0.057813133045916634, "grad_norm": 1.0078125, "learning_rate": 0.0015516741608370716, "loss": 0.2226, "step": 32606 }, { "epoch": 0.05781667921122645, "grad_norm": 0.30859375, "learning_rate": 0.0015516226792682474, "loss": 0.2335, "step": 32608 }, { "epoch": 0.05782022537653626, "grad_norm": 0.3359375, "learning_rate": 0.0015515711957242884, "loss": 0.1768, "step": 32610 }, { "epoch": 0.05782377154184608, "grad_norm": 0.36328125, "learning_rate": 0.001551519710205419, "loss": 0.2005, "step": 32612 }, { "epoch": 0.05782731770715589, "grad_norm": 0.50390625, "learning_rate": 0.0015514682227118646, "loss": 0.1533, "step": 32614 }, { "epoch": 0.057830863872465714, "grad_norm": 0.55078125, "learning_rate": 0.0015514167332438505, "loss": 0.2213, "step": 32616 }, { "epoch": 0.05783441003777553, "grad_norm": 0.6953125, "learning_rate": 0.0015513652418016018, "loss": 0.1994, "step": 32618 }, { "epoch": 0.05783795620308534, "grad_norm": 0.28125, "learning_rate": 0.0015513137483853436, "loss": 0.1338, "step": 32620 }, { "epoch": 0.05784150236839516, "grad_norm": 0.53515625, "learning_rate": 0.0015512622529953012, "loss": 0.1632, "step": 32622 }, { "epoch": 0.05784504853370497, "grad_norm": 0.275390625, "learning_rate": 0.0015512107556317001, "loss": 0.1956, "step": 32624 }, { "epoch": 0.05784859469901479, "grad_norm": 0.27734375, "learning_rate": 0.0015511592562947654, "loss": 0.2028, "step": 32626 }, { "epoch": 0.0578521408643246, "grad_norm": 0.7265625, "learning_rate": 0.0015511077549847216, "loss": 0.1665, "step": 32628 }, { "epoch": 0.057855687029634416, "grad_norm": 1.0078125, "learning_rate": 0.0015510562517017951, "loss": 0.3109, "step": 32630 }, { "epoch": 0.05785923319494423, "grad_norm": 0.490234375, "learning_rate": 0.0015510047464462103, "loss": 0.1832, "step": 32632 }, { "epoch": 0.057862779360254045, "grad_norm": 0.6171875, "learning_rate": 0.0015509532392181924, "loss": 0.2036, "step": 32634 }, { "epoch": 0.05786632552556386, "grad_norm": 0.37890625, "learning_rate": 0.0015509017300179674, "loss": 0.2014, "step": 32636 }, { "epoch": 0.05786987169087368, "grad_norm": 2.265625, "learning_rate": 0.0015508502188457602, "loss": 0.1755, "step": 32638 }, { "epoch": 0.057873417856183496, "grad_norm": 1.109375, "learning_rate": 0.0015507987057017958, "loss": 0.1658, "step": 32640 }, { "epoch": 0.05787696402149331, "grad_norm": 0.5859375, "learning_rate": 0.0015507471905862997, "loss": 0.1419, "step": 32642 }, { "epoch": 0.057880510186803125, "grad_norm": 0.7890625, "learning_rate": 0.0015506956734994972, "loss": 0.175, "step": 32644 }, { "epoch": 0.05788405635211294, "grad_norm": 0.8359375, "learning_rate": 0.0015506441544416137, "loss": 0.2017, "step": 32646 }, { "epoch": 0.057887602517422754, "grad_norm": 0.36328125, "learning_rate": 0.0015505926334128744, "loss": 0.1334, "step": 32648 }, { "epoch": 0.05789114868273257, "grad_norm": 0.275390625, "learning_rate": 0.0015505411104135048, "loss": 0.2382, "step": 32650 }, { "epoch": 0.05789469484804238, "grad_norm": 0.6328125, "learning_rate": 0.00155048958544373, "loss": 0.2341, "step": 32652 }, { "epoch": 0.0578982410133522, "grad_norm": 0.85546875, "learning_rate": 0.0015504380585037755, "loss": 0.1722, "step": 32654 }, { "epoch": 0.05790178717866201, "grad_norm": 0.95703125, "learning_rate": 0.0015503865295938665, "loss": 0.2059, "step": 32656 }, { "epoch": 0.057905333343971827, "grad_norm": 0.7109375, "learning_rate": 0.0015503349987142283, "loss": 0.214, "step": 32658 }, { "epoch": 0.05790887950928164, "grad_norm": 2.5, "learning_rate": 0.0015502834658650867, "loss": 0.2269, "step": 32660 }, { "epoch": 0.05791242567459146, "grad_norm": 0.83203125, "learning_rate": 0.0015502319310466666, "loss": 0.1995, "step": 32662 }, { "epoch": 0.05791597183990128, "grad_norm": 0.2734375, "learning_rate": 0.0015501803942591932, "loss": 0.2188, "step": 32664 }, { "epoch": 0.05791951800521109, "grad_norm": 0.61328125, "learning_rate": 0.0015501288555028925, "loss": 0.2223, "step": 32666 }, { "epoch": 0.057923064170520906, "grad_norm": 0.484375, "learning_rate": 0.0015500773147779903, "loss": 0.1822, "step": 32668 }, { "epoch": 0.05792661033583072, "grad_norm": 0.8125, "learning_rate": 0.0015500257720847108, "loss": 0.1621, "step": 32670 }, { "epoch": 0.057930156501140535, "grad_norm": 0.72265625, "learning_rate": 0.0015499742274232798, "loss": 0.2553, "step": 32672 }, { "epoch": 0.05793370266645035, "grad_norm": 0.478515625, "learning_rate": 0.001549922680793923, "loss": 0.3143, "step": 32674 }, { "epoch": 0.057937248831760164, "grad_norm": 0.76171875, "learning_rate": 0.0015498711321968658, "loss": 0.2183, "step": 32676 }, { "epoch": 0.05794079499706998, "grad_norm": 0.6484375, "learning_rate": 0.0015498195816323337, "loss": 0.1813, "step": 32678 }, { "epoch": 0.057944341162379794, "grad_norm": 0.51171875, "learning_rate": 0.001549768029100552, "loss": 0.2066, "step": 32680 }, { "epoch": 0.05794788732768961, "grad_norm": 0.7890625, "learning_rate": 0.001549716474601746, "loss": 0.201, "step": 32682 }, { "epoch": 0.05795143349299943, "grad_norm": 0.150390625, "learning_rate": 0.0015496649181361418, "loss": 0.1413, "step": 32684 }, { "epoch": 0.057954979658309244, "grad_norm": 0.45703125, "learning_rate": 0.0015496133597039641, "loss": 0.234, "step": 32686 }, { "epoch": 0.05795852582361906, "grad_norm": 2.28125, "learning_rate": 0.0015495617993054388, "loss": 0.2642, "step": 32688 }, { "epoch": 0.05796207198892887, "grad_norm": 0.349609375, "learning_rate": 0.0015495102369407915, "loss": 0.3398, "step": 32690 }, { "epoch": 0.05796561815423869, "grad_norm": 0.2177734375, "learning_rate": 0.0015494586726102473, "loss": 0.1918, "step": 32692 }, { "epoch": 0.0579691643195485, "grad_norm": 0.75, "learning_rate": 0.0015494071063140318, "loss": 0.2178, "step": 32694 }, { "epoch": 0.05797271048485832, "grad_norm": 0.390625, "learning_rate": 0.001549355538052371, "loss": 0.1898, "step": 32696 }, { "epoch": 0.05797625665016813, "grad_norm": 0.38671875, "learning_rate": 0.00154930396782549, "loss": 0.2675, "step": 32698 }, { "epoch": 0.057979802815477946, "grad_norm": 0.34765625, "learning_rate": 0.0015492523956336145, "loss": 0.1765, "step": 32700 }, { "epoch": 0.05798334898078776, "grad_norm": 0.3671875, "learning_rate": 0.0015492008214769697, "loss": 0.1783, "step": 32702 }, { "epoch": 0.057986895146097575, "grad_norm": 0.7890625, "learning_rate": 0.0015491492453557818, "loss": 0.2519, "step": 32704 }, { "epoch": 0.0579904413114074, "grad_norm": 0.36328125, "learning_rate": 0.0015490976672702754, "loss": 0.1926, "step": 32706 }, { "epoch": 0.05799398747671721, "grad_norm": 0.66015625, "learning_rate": 0.0015490460872206774, "loss": 0.1503, "step": 32708 }, { "epoch": 0.057997533642027026, "grad_norm": 0.333984375, "learning_rate": 0.0015489945052072124, "loss": 0.2192, "step": 32710 }, { "epoch": 0.05800107980733684, "grad_norm": 0.91015625, "learning_rate": 0.001548942921230106, "loss": 0.2445, "step": 32712 }, { "epoch": 0.058004625972646655, "grad_norm": 0.62109375, "learning_rate": 0.0015488913352895842, "loss": 0.2324, "step": 32714 }, { "epoch": 0.05800817213795647, "grad_norm": 0.328125, "learning_rate": 0.0015488397473858726, "loss": 0.1257, "step": 32716 }, { "epoch": 0.058011718303266284, "grad_norm": 0.6484375, "learning_rate": 0.0015487881575191966, "loss": 0.2182, "step": 32718 }, { "epoch": 0.0580152644685761, "grad_norm": 0.3359375, "learning_rate": 0.001548736565689782, "loss": 0.1354, "step": 32720 }, { "epoch": 0.05801881063388591, "grad_norm": 0.310546875, "learning_rate": 0.0015486849718978537, "loss": 0.1892, "step": 32722 }, { "epoch": 0.05802235679919573, "grad_norm": 1.234375, "learning_rate": 0.0015486333761436384, "loss": 0.2164, "step": 32724 }, { "epoch": 0.05802590296450554, "grad_norm": 0.59375, "learning_rate": 0.0015485817784273612, "loss": 0.2139, "step": 32726 }, { "epoch": 0.05802944912981536, "grad_norm": 0.4296875, "learning_rate": 0.0015485301787492483, "loss": 0.1753, "step": 32728 }, { "epoch": 0.05803299529512518, "grad_norm": 0.59375, "learning_rate": 0.0015484785771095245, "loss": 0.1488, "step": 32730 }, { "epoch": 0.05803654146043499, "grad_norm": 0.490234375, "learning_rate": 0.001548426973508416, "loss": 0.2039, "step": 32732 }, { "epoch": 0.05804008762574481, "grad_norm": 0.29296875, "learning_rate": 0.0015483753679461482, "loss": 0.2055, "step": 32734 }, { "epoch": 0.05804363379105462, "grad_norm": 0.26171875, "learning_rate": 0.0015483237604229474, "loss": 0.1246, "step": 32736 }, { "epoch": 0.058047179956364436, "grad_norm": 0.734375, "learning_rate": 0.0015482721509390385, "loss": 0.2483, "step": 32738 }, { "epoch": 0.05805072612167425, "grad_norm": 0.5625, "learning_rate": 0.0015482205394946478, "loss": 0.1687, "step": 32740 }, { "epoch": 0.058054272286984065, "grad_norm": 0.91796875, "learning_rate": 0.0015481689260900008, "loss": 0.2611, "step": 32742 }, { "epoch": 0.05805781845229388, "grad_norm": 0.21875, "learning_rate": 0.0015481173107253232, "loss": 0.129, "step": 32744 }, { "epoch": 0.058061364617603695, "grad_norm": 0.62109375, "learning_rate": 0.001548065693400841, "loss": 0.1614, "step": 32746 }, { "epoch": 0.05806491078291351, "grad_norm": 0.7421875, "learning_rate": 0.0015480140741167795, "loss": 0.1468, "step": 32748 }, { "epoch": 0.058068456948223324, "grad_norm": 0.287109375, "learning_rate": 0.0015479624528733646, "loss": 0.1847, "step": 32750 }, { "epoch": 0.058072003113533145, "grad_norm": 1.1640625, "learning_rate": 0.0015479108296708225, "loss": 0.2115, "step": 32752 }, { "epoch": 0.05807554927884296, "grad_norm": 0.8828125, "learning_rate": 0.0015478592045093781, "loss": 0.1759, "step": 32754 }, { "epoch": 0.058079095444152774, "grad_norm": 0.490234375, "learning_rate": 0.0015478075773892582, "loss": 0.2047, "step": 32756 }, { "epoch": 0.05808264160946259, "grad_norm": 1.9140625, "learning_rate": 0.0015477559483106878, "loss": 0.2186, "step": 32758 }, { "epoch": 0.0580861877747724, "grad_norm": 1.15625, "learning_rate": 0.001547704317273893, "loss": 0.1579, "step": 32760 }, { "epoch": 0.05808973394008222, "grad_norm": 0.4296875, "learning_rate": 0.0015476526842790996, "loss": 0.1826, "step": 32762 }, { "epoch": 0.05809328010539203, "grad_norm": 0.322265625, "learning_rate": 0.0015476010493265333, "loss": 0.125, "step": 32764 }, { "epoch": 0.05809682627070185, "grad_norm": 0.6640625, "learning_rate": 0.0015475494124164203, "loss": 0.1696, "step": 32766 }, { "epoch": 0.05810037243601166, "grad_norm": 0.322265625, "learning_rate": 0.0015474977735489856, "loss": 0.1495, "step": 32768 }, { "epoch": 0.058103918601321476, "grad_norm": 0.3203125, "learning_rate": 0.001547446132724456, "loss": 0.1782, "step": 32770 }, { "epoch": 0.05810746476663129, "grad_norm": 0.62109375, "learning_rate": 0.0015473944899430567, "loss": 0.3256, "step": 32772 }, { "epoch": 0.05811101093194111, "grad_norm": 0.251953125, "learning_rate": 0.001547342845205014, "loss": 0.2066, "step": 32774 }, { "epoch": 0.05811455709725093, "grad_norm": 2.171875, "learning_rate": 0.0015472911985105534, "loss": 0.477, "step": 32776 }, { "epoch": 0.05811810326256074, "grad_norm": 0.6953125, "learning_rate": 0.001547239549859901, "loss": 0.1861, "step": 32778 }, { "epoch": 0.058121649427870556, "grad_norm": 0.375, "learning_rate": 0.0015471878992532825, "loss": 0.1913, "step": 32780 }, { "epoch": 0.05812519559318037, "grad_norm": 0.76953125, "learning_rate": 0.001547136246690924, "loss": 0.3667, "step": 32782 }, { "epoch": 0.058128741758490185, "grad_norm": 0.30078125, "learning_rate": 0.0015470845921730513, "loss": 0.2809, "step": 32784 }, { "epoch": 0.0581322879238, "grad_norm": 1.4453125, "learning_rate": 0.0015470329356998902, "loss": 0.2582, "step": 32786 }, { "epoch": 0.058135834089109814, "grad_norm": 0.44921875, "learning_rate": 0.0015469812772716666, "loss": 0.1488, "step": 32788 }, { "epoch": 0.05813938025441963, "grad_norm": 0.234375, "learning_rate": 0.0015469296168886067, "loss": 0.1734, "step": 32790 }, { "epoch": 0.05814292641972944, "grad_norm": 0.91015625, "learning_rate": 0.001546877954550936, "loss": 0.3811, "step": 32792 }, { "epoch": 0.05814647258503926, "grad_norm": 1.359375, "learning_rate": 0.001546826290258881, "loss": 0.3681, "step": 32794 }, { "epoch": 0.05815001875034907, "grad_norm": 2.1875, "learning_rate": 0.0015467746240126672, "loss": 0.316, "step": 32796 }, { "epoch": 0.058153564915658894, "grad_norm": 0.84375, "learning_rate": 0.0015467229558125209, "loss": 0.2074, "step": 32798 }, { "epoch": 0.05815711108096871, "grad_norm": 0.65234375, "learning_rate": 0.0015466712856586676, "loss": 0.1452, "step": 32800 }, { "epoch": 0.05816065724627852, "grad_norm": 1.1171875, "learning_rate": 0.0015466196135513338, "loss": 0.1868, "step": 32802 }, { "epoch": 0.05816420341158834, "grad_norm": 1.6484375, "learning_rate": 0.001546567939490745, "loss": 0.3062, "step": 32804 }, { "epoch": 0.05816774957689815, "grad_norm": 0.33203125, "learning_rate": 0.0015465162634771277, "loss": 0.1659, "step": 32806 }, { "epoch": 0.05817129574220797, "grad_norm": 0.82421875, "learning_rate": 0.0015464645855107076, "loss": 0.1711, "step": 32808 }, { "epoch": 0.05817484190751778, "grad_norm": 0.2734375, "learning_rate": 0.0015464129055917104, "loss": 0.197, "step": 32810 }, { "epoch": 0.058178388072827596, "grad_norm": 0.59375, "learning_rate": 0.0015463612237203628, "loss": 0.1863, "step": 32812 }, { "epoch": 0.05818193423813741, "grad_norm": 2.375, "learning_rate": 0.0015463095398968907, "loss": 0.239, "step": 32814 }, { "epoch": 0.058185480403447225, "grad_norm": 0.55078125, "learning_rate": 0.0015462578541215193, "loss": 0.1599, "step": 32816 }, { "epoch": 0.05818902656875704, "grad_norm": 0.251953125, "learning_rate": 0.0015462061663944757, "loss": 0.1489, "step": 32818 }, { "epoch": 0.05819257273406686, "grad_norm": 0.3046875, "learning_rate": 0.0015461544767159852, "loss": 0.1743, "step": 32820 }, { "epoch": 0.058196118899376675, "grad_norm": 0.330078125, "learning_rate": 0.0015461027850862746, "loss": 0.1592, "step": 32822 }, { "epoch": 0.05819966506468649, "grad_norm": 0.64453125, "learning_rate": 0.0015460510915055693, "loss": 0.2169, "step": 32824 }, { "epoch": 0.058203211229996304, "grad_norm": 0.357421875, "learning_rate": 0.0015459993959740955, "loss": 0.1771, "step": 32826 }, { "epoch": 0.05820675739530612, "grad_norm": 1.78125, "learning_rate": 0.0015459476984920797, "loss": 0.2825, "step": 32828 }, { "epoch": 0.058210303560615934, "grad_norm": 0.64453125, "learning_rate": 0.0015458959990597473, "loss": 0.1675, "step": 32830 }, { "epoch": 0.05821384972592575, "grad_norm": 0.546875, "learning_rate": 0.0015458442976773253, "loss": 0.2112, "step": 32832 }, { "epoch": 0.05821739589123556, "grad_norm": 0.8203125, "learning_rate": 0.0015457925943450387, "loss": 0.2091, "step": 32834 }, { "epoch": 0.05822094205654538, "grad_norm": 0.5390625, "learning_rate": 0.0015457408890631149, "loss": 0.1629, "step": 32836 }, { "epoch": 0.05822448822185519, "grad_norm": 2.125, "learning_rate": 0.001545689181831779, "loss": 0.2125, "step": 32838 }, { "epoch": 0.058228034387165006, "grad_norm": 0.265625, "learning_rate": 0.0015456374726512576, "loss": 0.1783, "step": 32840 }, { "epoch": 0.05823158055247483, "grad_norm": 0.224609375, "learning_rate": 0.0015455857615217768, "loss": 0.145, "step": 32842 }, { "epoch": 0.05823512671778464, "grad_norm": 0.75, "learning_rate": 0.0015455340484435626, "loss": 0.1641, "step": 32844 }, { "epoch": 0.05823867288309446, "grad_norm": 0.546875, "learning_rate": 0.0015454823334168414, "loss": 0.2008, "step": 32846 }, { "epoch": 0.05824221904840427, "grad_norm": 0.302734375, "learning_rate": 0.0015454306164418392, "loss": 0.2207, "step": 32848 }, { "epoch": 0.058245765213714086, "grad_norm": 0.76171875, "learning_rate": 0.001545378897518782, "loss": 0.2048, "step": 32850 }, { "epoch": 0.0582493113790239, "grad_norm": 0.1865234375, "learning_rate": 0.0015453271766478964, "loss": 0.1611, "step": 32852 }, { "epoch": 0.058252857544333715, "grad_norm": 1.4296875, "learning_rate": 0.0015452754538294083, "loss": 0.3008, "step": 32854 }, { "epoch": 0.05825640370964353, "grad_norm": 0.302734375, "learning_rate": 0.0015452237290635442, "loss": 0.1433, "step": 32856 }, { "epoch": 0.058259949874953344, "grad_norm": 0.71484375, "learning_rate": 0.0015451720023505302, "loss": 0.215, "step": 32858 }, { "epoch": 0.05826349604026316, "grad_norm": 2.96875, "learning_rate": 0.0015451202736905922, "loss": 0.2807, "step": 32860 }, { "epoch": 0.05826704220557297, "grad_norm": 1.109375, "learning_rate": 0.0015450685430839566, "loss": 0.3874, "step": 32862 }, { "epoch": 0.05827058837088279, "grad_norm": 1.0625, "learning_rate": 0.00154501681053085, "loss": 0.2067, "step": 32864 }, { "epoch": 0.05827413453619261, "grad_norm": 1.078125, "learning_rate": 0.001544965076031498, "loss": 0.3986, "step": 32866 }, { "epoch": 0.058277680701502424, "grad_norm": 0.58203125, "learning_rate": 0.0015449133395861276, "loss": 0.2626, "step": 32868 }, { "epoch": 0.05828122686681224, "grad_norm": 0.56640625, "learning_rate": 0.0015448616011949645, "loss": 0.2164, "step": 32870 }, { "epoch": 0.05828477303212205, "grad_norm": 0.64453125, "learning_rate": 0.0015448098608582355, "loss": 0.193, "step": 32872 }, { "epoch": 0.05828831919743187, "grad_norm": 0.2412109375, "learning_rate": 0.001544758118576166, "loss": 0.1891, "step": 32874 }, { "epoch": 0.05829186536274168, "grad_norm": 0.51171875, "learning_rate": 0.001544706374348983, "loss": 0.3071, "step": 32876 }, { "epoch": 0.0582954115280515, "grad_norm": 0.2392578125, "learning_rate": 0.0015446546281769132, "loss": 0.1614, "step": 32878 }, { "epoch": 0.05829895769336131, "grad_norm": 1.09375, "learning_rate": 0.0015446028800601816, "loss": 0.2111, "step": 32880 }, { "epoch": 0.058302503858671126, "grad_norm": 0.3828125, "learning_rate": 0.0015445511299990156, "loss": 0.23, "step": 32882 }, { "epoch": 0.05830605002398094, "grad_norm": 0.45703125, "learning_rate": 0.0015444993779936411, "loss": 0.1871, "step": 32884 }, { "epoch": 0.058309596189290755, "grad_norm": 2.5, "learning_rate": 0.0015444476240442844, "loss": 0.3988, "step": 32886 }, { "epoch": 0.058313142354600576, "grad_norm": 0.515625, "learning_rate": 0.0015443958681511723, "loss": 0.2062, "step": 32888 }, { "epoch": 0.05831668851991039, "grad_norm": 4.46875, "learning_rate": 0.0015443441103145303, "loss": 0.335, "step": 32890 }, { "epoch": 0.058320234685220206, "grad_norm": 0.482421875, "learning_rate": 0.0015442923505345858, "loss": 0.1858, "step": 32892 }, { "epoch": 0.05832378085053002, "grad_norm": 0.275390625, "learning_rate": 0.0015442405888115643, "loss": 0.1822, "step": 32894 }, { "epoch": 0.058327327015839835, "grad_norm": 0.9609375, "learning_rate": 0.0015441888251456926, "loss": 0.2514, "step": 32896 }, { "epoch": 0.05833087318114965, "grad_norm": 1.296875, "learning_rate": 0.0015441370595371971, "loss": 0.2094, "step": 32898 }, { "epoch": 0.058334419346459464, "grad_norm": 0.275390625, "learning_rate": 0.0015440852919863042, "loss": 0.2043, "step": 32900 }, { "epoch": 0.05833796551176928, "grad_norm": 0.5234375, "learning_rate": 0.0015440335224932399, "loss": 0.1949, "step": 32902 }, { "epoch": 0.05834151167707909, "grad_norm": 0.421875, "learning_rate": 0.001543981751058231, "loss": 0.1664, "step": 32904 }, { "epoch": 0.05834505784238891, "grad_norm": 0.51953125, "learning_rate": 0.0015439299776815038, "loss": 0.1594, "step": 32906 }, { "epoch": 0.05834860400769872, "grad_norm": 1.5078125, "learning_rate": 0.0015438782023632846, "loss": 0.3101, "step": 32908 }, { "epoch": 0.05835215017300854, "grad_norm": 0.427734375, "learning_rate": 0.0015438264251038001, "loss": 0.1774, "step": 32910 }, { "epoch": 0.05835569633831836, "grad_norm": 0.2314453125, "learning_rate": 0.0015437746459032765, "loss": 0.2407, "step": 32912 }, { "epoch": 0.05835924250362817, "grad_norm": 0.59375, "learning_rate": 0.0015437228647619406, "loss": 0.1494, "step": 32914 }, { "epoch": 0.05836278866893799, "grad_norm": 0.5, "learning_rate": 0.0015436710816800186, "loss": 0.2114, "step": 32916 }, { "epoch": 0.0583663348342478, "grad_norm": 0.435546875, "learning_rate": 0.001543619296657737, "loss": 0.2357, "step": 32918 }, { "epoch": 0.058369880999557616, "grad_norm": 0.435546875, "learning_rate": 0.0015435675096953222, "loss": 0.2111, "step": 32920 }, { "epoch": 0.05837342716486743, "grad_norm": 0.6484375, "learning_rate": 0.0015435157207930009, "loss": 0.2047, "step": 32922 }, { "epoch": 0.058376973330177245, "grad_norm": 0.609375, "learning_rate": 0.0015434639299509991, "loss": 0.1667, "step": 32924 }, { "epoch": 0.05838051949548706, "grad_norm": 1.5703125, "learning_rate": 0.0015434121371695443, "loss": 0.2829, "step": 32926 }, { "epoch": 0.058384065660796874, "grad_norm": 0.28515625, "learning_rate": 0.0015433603424488617, "loss": 0.1744, "step": 32928 }, { "epoch": 0.05838761182610669, "grad_norm": 0.3203125, "learning_rate": 0.001543308545789179, "loss": 0.192, "step": 32930 }, { "epoch": 0.058391157991416504, "grad_norm": 0.296875, "learning_rate": 0.0015432567471907218, "loss": 0.2123, "step": 32932 }, { "epoch": 0.058394704156726325, "grad_norm": 0.85546875, "learning_rate": 0.0015432049466537175, "loss": 0.2079, "step": 32934 }, { "epoch": 0.05839825032203614, "grad_norm": 0.365234375, "learning_rate": 0.001543153144178392, "loss": 0.2296, "step": 32936 }, { "epoch": 0.058401796487345954, "grad_norm": 1.3125, "learning_rate": 0.0015431013397649723, "loss": 0.1824, "step": 32938 }, { "epoch": 0.05840534265265577, "grad_norm": 0.373046875, "learning_rate": 0.001543049533413684, "loss": 0.1573, "step": 32940 }, { "epoch": 0.05840888881796558, "grad_norm": 1.171875, "learning_rate": 0.001542997725124755, "loss": 0.2017, "step": 32942 }, { "epoch": 0.0584124349832754, "grad_norm": 0.21875, "learning_rate": 0.0015429459148984114, "loss": 0.1683, "step": 32944 }, { "epoch": 0.05841598114858521, "grad_norm": 0.21484375, "learning_rate": 0.0015428941027348794, "loss": 0.1634, "step": 32946 }, { "epoch": 0.05841952731389503, "grad_norm": 0.38671875, "learning_rate": 0.0015428422886343855, "loss": 0.1652, "step": 32948 }, { "epoch": 0.05842307347920484, "grad_norm": 0.240234375, "learning_rate": 0.0015427904725971572, "loss": 0.2046, "step": 32950 }, { "epoch": 0.058426619644514656, "grad_norm": 0.2451171875, "learning_rate": 0.0015427386546234205, "loss": 0.148, "step": 32952 }, { "epoch": 0.05843016580982447, "grad_norm": 0.431640625, "learning_rate": 0.001542686834713402, "loss": 0.2354, "step": 32954 }, { "epoch": 0.05843371197513429, "grad_norm": 0.28515625, "learning_rate": 0.0015426350128673284, "loss": 0.4164, "step": 32956 }, { "epoch": 0.05843725814044411, "grad_norm": 0.5546875, "learning_rate": 0.0015425831890854263, "loss": 0.1922, "step": 32958 }, { "epoch": 0.05844080430575392, "grad_norm": 0.75390625, "learning_rate": 0.0015425313633679225, "loss": 0.1894, "step": 32960 }, { "epoch": 0.058444350471063736, "grad_norm": 0.51953125, "learning_rate": 0.0015424795357150435, "loss": 0.1992, "step": 32962 }, { "epoch": 0.05844789663637355, "grad_norm": 0.47265625, "learning_rate": 0.0015424277061270163, "loss": 0.1812, "step": 32964 }, { "epoch": 0.058451442801683365, "grad_norm": 0.2578125, "learning_rate": 0.0015423758746040672, "loss": 0.164, "step": 32966 }, { "epoch": 0.05845498896699318, "grad_norm": 0.2734375, "learning_rate": 0.0015423240411464227, "loss": 0.1477, "step": 32968 }, { "epoch": 0.058458535132302994, "grad_norm": 0.490234375, "learning_rate": 0.0015422722057543102, "loss": 0.1913, "step": 32970 }, { "epoch": 0.05846208129761281, "grad_norm": 0.74609375, "learning_rate": 0.0015422203684279556, "loss": 0.1596, "step": 32972 }, { "epoch": 0.05846562746292262, "grad_norm": 0.3828125, "learning_rate": 0.0015421685291675863, "loss": 0.186, "step": 32974 }, { "epoch": 0.05846917362823244, "grad_norm": 0.32421875, "learning_rate": 0.0015421166879734283, "loss": 0.2054, "step": 32976 }, { "epoch": 0.05847271979354226, "grad_norm": 0.396484375, "learning_rate": 0.0015420648448457091, "loss": 0.2201, "step": 32978 }, { "epoch": 0.058476265958852074, "grad_norm": 0.1982421875, "learning_rate": 0.0015420129997846547, "loss": 0.1334, "step": 32980 }, { "epoch": 0.05847981212416189, "grad_norm": 0.326171875, "learning_rate": 0.0015419611527904926, "loss": 0.2138, "step": 32982 }, { "epoch": 0.0584833582894717, "grad_norm": 1.28125, "learning_rate": 0.001541909303863449, "loss": 0.1858, "step": 32984 }, { "epoch": 0.05848690445478152, "grad_norm": 0.255859375, "learning_rate": 0.0015418574530037508, "loss": 0.3209, "step": 32986 }, { "epoch": 0.05849045062009133, "grad_norm": 0.3984375, "learning_rate": 0.0015418056002116246, "loss": 0.2256, "step": 32988 }, { "epoch": 0.058493996785401146, "grad_norm": 0.380859375, "learning_rate": 0.0015417537454872974, "loss": 0.2278, "step": 32990 }, { "epoch": 0.05849754295071096, "grad_norm": 0.44921875, "learning_rate": 0.001541701888830996, "loss": 0.1644, "step": 32992 }, { "epoch": 0.058501089116020775, "grad_norm": 0.56640625, "learning_rate": 0.0015416500302429471, "loss": 0.2221, "step": 32994 }, { "epoch": 0.05850463528133059, "grad_norm": 0.462890625, "learning_rate": 0.0015415981697233772, "loss": 0.1724, "step": 32996 }, { "epoch": 0.058508181446640405, "grad_norm": 0.55078125, "learning_rate": 0.0015415463072725136, "loss": 0.232, "step": 32998 }, { "epoch": 0.05851172761195022, "grad_norm": 0.62109375, "learning_rate": 0.0015414944428905832, "loss": 0.2728, "step": 33000 }, { "epoch": 0.05851527377726004, "grad_norm": 0.306640625, "learning_rate": 0.0015414425765778123, "loss": 0.1757, "step": 33002 }, { "epoch": 0.058518819942569855, "grad_norm": 0.6953125, "learning_rate": 0.0015413907083344281, "loss": 0.1818, "step": 33004 }, { "epoch": 0.05852236610787967, "grad_norm": 0.5703125, "learning_rate": 0.0015413388381606572, "loss": 0.1738, "step": 33006 }, { "epoch": 0.058525912273189484, "grad_norm": 0.287109375, "learning_rate": 0.0015412869660567268, "loss": 0.1707, "step": 33008 }, { "epoch": 0.0585294584384993, "grad_norm": 1.28125, "learning_rate": 0.0015412350920228635, "loss": 0.2252, "step": 33010 }, { "epoch": 0.05853300460380911, "grad_norm": 0.384765625, "learning_rate": 0.001541183216059294, "loss": 0.5729, "step": 33012 }, { "epoch": 0.05853655076911893, "grad_norm": 0.21484375, "learning_rate": 0.0015411313381662455, "loss": 0.1625, "step": 33014 }, { "epoch": 0.05854009693442874, "grad_norm": 0.458984375, "learning_rate": 0.001541079458343945, "loss": 0.137, "step": 33016 }, { "epoch": 0.05854364309973856, "grad_norm": 0.412109375, "learning_rate": 0.0015410275765926186, "loss": 0.1659, "step": 33018 }, { "epoch": 0.05854718926504837, "grad_norm": 0.443359375, "learning_rate": 0.0015409756929124943, "loss": 0.1782, "step": 33020 }, { "epoch": 0.058550735430358186, "grad_norm": 1.078125, "learning_rate": 0.001540923807303798, "loss": 0.257, "step": 33022 }, { "epoch": 0.05855428159566801, "grad_norm": 1.5, "learning_rate": 0.0015408719197667576, "loss": 0.3601, "step": 33024 }, { "epoch": 0.05855782776097782, "grad_norm": 0.3828125, "learning_rate": 0.0015408200303015994, "loss": 0.212, "step": 33026 }, { "epoch": 0.05856137392628764, "grad_norm": 0.41015625, "learning_rate": 0.0015407681389085501, "loss": 0.1554, "step": 33028 }, { "epoch": 0.05856492009159745, "grad_norm": 0.2109375, "learning_rate": 0.001540716245587837, "loss": 0.1979, "step": 33030 }, { "epoch": 0.058568466256907266, "grad_norm": 0.259765625, "learning_rate": 0.0015406643503396876, "loss": 0.3732, "step": 33032 }, { "epoch": 0.05857201242221708, "grad_norm": 0.28515625, "learning_rate": 0.0015406124531643277, "loss": 0.1809, "step": 33034 }, { "epoch": 0.058575558587526895, "grad_norm": 1.9765625, "learning_rate": 0.0015405605540619852, "loss": 0.2567, "step": 33036 }, { "epoch": 0.05857910475283671, "grad_norm": 1.125, "learning_rate": 0.0015405086530328866, "loss": 0.2221, "step": 33038 }, { "epoch": 0.058582650918146524, "grad_norm": 0.341796875, "learning_rate": 0.0015404567500772591, "loss": 0.1321, "step": 33040 }, { "epoch": 0.05858619708345634, "grad_norm": 0.94921875, "learning_rate": 0.0015404048451953297, "loss": 0.1509, "step": 33042 }, { "epoch": 0.05858974324876615, "grad_norm": 3.046875, "learning_rate": 0.001540352938387325, "loss": 0.1797, "step": 33044 }, { "epoch": 0.058593289414075975, "grad_norm": 0.921875, "learning_rate": 0.0015403010296534728, "loss": 0.3508, "step": 33046 }, { "epoch": 0.05859683557938579, "grad_norm": 0.171875, "learning_rate": 0.0015402491189939996, "loss": 0.1443, "step": 33048 }, { "epoch": 0.058600381744695604, "grad_norm": 1.8671875, "learning_rate": 0.0015401972064091319, "loss": 0.2212, "step": 33050 }, { "epoch": 0.05860392791000542, "grad_norm": 0.267578125, "learning_rate": 0.001540145291899098, "loss": 0.1768, "step": 33052 }, { "epoch": 0.05860747407531523, "grad_norm": 0.251953125, "learning_rate": 0.0015400933754641237, "loss": 0.2608, "step": 33054 }, { "epoch": 0.05861102024062505, "grad_norm": 0.330078125, "learning_rate": 0.001540041457104437, "loss": 0.1734, "step": 33056 }, { "epoch": 0.05861456640593486, "grad_norm": 0.3046875, "learning_rate": 0.0015399895368202643, "loss": 0.1636, "step": 33058 }, { "epoch": 0.05861811257124468, "grad_norm": 0.47265625, "learning_rate": 0.0015399376146118333, "loss": 0.1403, "step": 33060 }, { "epoch": 0.05862165873655449, "grad_norm": 0.5390625, "learning_rate": 0.0015398856904793702, "loss": 0.1597, "step": 33062 }, { "epoch": 0.058625204901864306, "grad_norm": 0.353515625, "learning_rate": 0.0015398337644231032, "loss": 0.1572, "step": 33064 }, { "epoch": 0.05862875106717412, "grad_norm": 0.515625, "learning_rate": 0.0015397818364432585, "loss": 0.2071, "step": 33066 }, { "epoch": 0.058632297232483935, "grad_norm": 0.875, "learning_rate": 0.0015397299065400636, "loss": 0.2024, "step": 33068 }, { "epoch": 0.058635843397793756, "grad_norm": 0.6953125, "learning_rate": 0.0015396779747137452, "loss": 0.1645, "step": 33070 }, { "epoch": 0.05863938956310357, "grad_norm": 0.953125, "learning_rate": 0.0015396260409645311, "loss": 0.227, "step": 33072 }, { "epoch": 0.058642935728413385, "grad_norm": 0.65234375, "learning_rate": 0.001539574105292648, "loss": 0.2297, "step": 33074 }, { "epoch": 0.0586464818937232, "grad_norm": 0.734375, "learning_rate": 0.0015395221676983229, "loss": 0.3506, "step": 33076 }, { "epoch": 0.058650028059033014, "grad_norm": 0.49609375, "learning_rate": 0.0015394702281817832, "loss": 0.2145, "step": 33078 }, { "epoch": 0.05865357422434283, "grad_norm": 0.416015625, "learning_rate": 0.0015394182867432562, "loss": 0.1517, "step": 33080 }, { "epoch": 0.058657120389652644, "grad_norm": 0.484375, "learning_rate": 0.0015393663433829687, "loss": 0.2045, "step": 33082 }, { "epoch": 0.05866066655496246, "grad_norm": 0.2080078125, "learning_rate": 0.001539314398101148, "loss": 0.2133, "step": 33084 }, { "epoch": 0.05866421272027227, "grad_norm": 0.447265625, "learning_rate": 0.0015392624508980216, "loss": 0.2679, "step": 33086 }, { "epoch": 0.05866775888558209, "grad_norm": 0.271484375, "learning_rate": 0.0015392105017738161, "loss": 0.1916, "step": 33088 }, { "epoch": 0.0586713050508919, "grad_norm": 0.60546875, "learning_rate": 0.001539158550728759, "loss": 0.1928, "step": 33090 }, { "epoch": 0.05867485121620172, "grad_norm": 0.43359375, "learning_rate": 0.0015391065977630775, "loss": 0.3062, "step": 33092 }, { "epoch": 0.05867839738151154, "grad_norm": 2.890625, "learning_rate": 0.001539054642876999, "loss": 0.2165, "step": 33094 }, { "epoch": 0.05868194354682135, "grad_norm": 0.2578125, "learning_rate": 0.0015390026860707502, "loss": 0.1672, "step": 33096 }, { "epoch": 0.05868548971213117, "grad_norm": 2.578125, "learning_rate": 0.0015389507273445586, "loss": 0.222, "step": 33098 }, { "epoch": 0.05868903587744098, "grad_norm": 1.625, "learning_rate": 0.001538898766698652, "loss": 0.2514, "step": 33100 }, { "epoch": 0.058692582042750796, "grad_norm": 0.388671875, "learning_rate": 0.001538846804133257, "loss": 0.1975, "step": 33102 }, { "epoch": 0.05869612820806061, "grad_norm": 0.359375, "learning_rate": 0.0015387948396486009, "loss": 0.2053, "step": 33104 }, { "epoch": 0.058699674373370425, "grad_norm": 0.1708984375, "learning_rate": 0.0015387428732449107, "loss": 0.1841, "step": 33106 }, { "epoch": 0.05870322053868024, "grad_norm": 0.2373046875, "learning_rate": 0.0015386909049224145, "loss": 0.1567, "step": 33108 }, { "epoch": 0.058706766703990054, "grad_norm": 4.0, "learning_rate": 0.0015386389346813388, "loss": 0.2079, "step": 33110 }, { "epoch": 0.05871031286929987, "grad_norm": 0.380859375, "learning_rate": 0.0015385869625219113, "loss": 0.1732, "step": 33112 }, { "epoch": 0.05871385903460969, "grad_norm": 0.3359375, "learning_rate": 0.0015385349884443592, "loss": 0.2141, "step": 33114 }, { "epoch": 0.058717405199919505, "grad_norm": 0.298828125, "learning_rate": 0.0015384830124489097, "loss": 0.1606, "step": 33116 }, { "epoch": 0.05872095136522932, "grad_norm": 0.341796875, "learning_rate": 0.0015384310345357903, "loss": 0.1862, "step": 33118 }, { "epoch": 0.058724497530539134, "grad_norm": 1.0546875, "learning_rate": 0.0015383790547052283, "loss": 0.2023, "step": 33120 }, { "epoch": 0.05872804369584895, "grad_norm": 0.59765625, "learning_rate": 0.0015383270729574512, "loss": 0.1785, "step": 33122 }, { "epoch": 0.05873158986115876, "grad_norm": 0.37890625, "learning_rate": 0.0015382750892926856, "loss": 0.1788, "step": 33124 }, { "epoch": 0.05873513602646858, "grad_norm": 0.37109375, "learning_rate": 0.0015382231037111597, "loss": 0.2021, "step": 33126 }, { "epoch": 0.05873868219177839, "grad_norm": 0.2578125, "learning_rate": 0.0015381711162131, "loss": 0.2191, "step": 33128 }, { "epoch": 0.05874222835708821, "grad_norm": 0.609375, "learning_rate": 0.0015381191267987348, "loss": 0.1894, "step": 33130 }, { "epoch": 0.05874577452239802, "grad_norm": 0.3046875, "learning_rate": 0.0015380671354682906, "loss": 0.1952, "step": 33132 }, { "epoch": 0.058749320687707836, "grad_norm": 0.828125, "learning_rate": 0.001538015142221996, "loss": 0.2016, "step": 33134 }, { "epoch": 0.05875286685301765, "grad_norm": 0.80859375, "learning_rate": 0.0015379631470600768, "loss": 0.3386, "step": 33136 }, { "epoch": 0.05875641301832747, "grad_norm": 0.232421875, "learning_rate": 0.0015379111499827615, "loss": 0.1745, "step": 33138 }, { "epoch": 0.058759959183637286, "grad_norm": 0.2333984375, "learning_rate": 0.0015378591509902773, "loss": 0.219, "step": 33140 }, { "epoch": 0.0587635053489471, "grad_norm": 0.376953125, "learning_rate": 0.0015378071500828515, "loss": 0.1798, "step": 33142 }, { "epoch": 0.058767051514256916, "grad_norm": 0.38671875, "learning_rate": 0.0015377551472607115, "loss": 0.2139, "step": 33144 }, { "epoch": 0.05877059767956673, "grad_norm": 0.21875, "learning_rate": 0.0015377031425240848, "loss": 0.1753, "step": 33146 }, { "epoch": 0.058774143844876545, "grad_norm": 0.408203125, "learning_rate": 0.0015376511358731987, "loss": 0.1457, "step": 33148 }, { "epoch": 0.05877769001018636, "grad_norm": 0.98046875, "learning_rate": 0.0015375991273082806, "loss": 0.2567, "step": 33150 }, { "epoch": 0.058781236175496174, "grad_norm": 5.34375, "learning_rate": 0.0015375471168295582, "loss": 0.4731, "step": 33152 }, { "epoch": 0.05878478234080599, "grad_norm": 0.423828125, "learning_rate": 0.001537495104437259, "loss": 0.2048, "step": 33154 }, { "epoch": 0.0587883285061158, "grad_norm": 0.54296875, "learning_rate": 0.0015374430901316104, "loss": 0.1737, "step": 33156 }, { "epoch": 0.05879187467142562, "grad_norm": 0.7421875, "learning_rate": 0.0015373910739128395, "loss": 0.2454, "step": 33158 }, { "epoch": 0.05879542083673544, "grad_norm": 1.1328125, "learning_rate": 0.0015373390557811744, "loss": 0.2127, "step": 33160 }, { "epoch": 0.05879896700204525, "grad_norm": 0.439453125, "learning_rate": 0.0015372870357368423, "loss": 0.2003, "step": 33162 }, { "epoch": 0.05880251316735507, "grad_norm": 0.65234375, "learning_rate": 0.0015372350137800705, "loss": 0.1808, "step": 33164 }, { "epoch": 0.05880605933266488, "grad_norm": 0.2021484375, "learning_rate": 0.0015371829899110867, "loss": 0.1441, "step": 33166 }, { "epoch": 0.0588096054979747, "grad_norm": 0.51953125, "learning_rate": 0.0015371309641301187, "loss": 0.3938, "step": 33168 }, { "epoch": 0.05881315166328451, "grad_norm": 0.54296875, "learning_rate": 0.0015370789364373939, "loss": 0.2284, "step": 33170 }, { "epoch": 0.058816697828594326, "grad_norm": 0.89453125, "learning_rate": 0.0015370269068331393, "loss": 0.1523, "step": 33172 }, { "epoch": 0.05882024399390414, "grad_norm": 0.640625, "learning_rate": 0.001536974875317583, "loss": 0.2535, "step": 33174 }, { "epoch": 0.058823790159213955, "grad_norm": 0.4609375, "learning_rate": 0.0015369228418909524, "loss": 0.2193, "step": 33176 }, { "epoch": 0.05882733632452377, "grad_norm": 0.328125, "learning_rate": 0.0015368708065534753, "loss": 0.2074, "step": 33178 }, { "epoch": 0.058830882489833584, "grad_norm": 1.1484375, "learning_rate": 0.001536818769305379, "loss": 0.2534, "step": 33180 }, { "epoch": 0.058834428655143406, "grad_norm": 0.390625, "learning_rate": 0.001536766730146891, "loss": 0.2513, "step": 33182 }, { "epoch": 0.05883797482045322, "grad_norm": 0.5, "learning_rate": 0.0015367146890782391, "loss": 0.1681, "step": 33184 }, { "epoch": 0.058841520985763035, "grad_norm": 0.322265625, "learning_rate": 0.0015366626460996506, "loss": 0.1588, "step": 33186 }, { "epoch": 0.05884506715107285, "grad_norm": 0.42578125, "learning_rate": 0.0015366106012113536, "loss": 0.2279, "step": 33188 }, { "epoch": 0.058848613316382664, "grad_norm": 4.375, "learning_rate": 0.0015365585544135753, "loss": 0.2279, "step": 33190 }, { "epoch": 0.05885215948169248, "grad_norm": 0.380859375, "learning_rate": 0.0015365065057065437, "loss": 0.2137, "step": 33192 }, { "epoch": 0.05885570564700229, "grad_norm": 0.1806640625, "learning_rate": 0.0015364544550904862, "loss": 0.1321, "step": 33194 }, { "epoch": 0.05885925181231211, "grad_norm": 0.29296875, "learning_rate": 0.0015364024025656302, "loss": 0.1958, "step": 33196 }, { "epoch": 0.05886279797762192, "grad_norm": 0.33984375, "learning_rate": 0.001536350348132204, "loss": 0.1442, "step": 33198 }, { "epoch": 0.05886634414293174, "grad_norm": 0.337890625, "learning_rate": 0.0015362982917904344, "loss": 0.2503, "step": 33200 }, { "epoch": 0.05886989030824155, "grad_norm": 0.6640625, "learning_rate": 0.0015362462335405495, "loss": 0.192, "step": 33202 }, { "epoch": 0.058873436473551366, "grad_norm": 0.703125, "learning_rate": 0.0015361941733827772, "loss": 0.2114, "step": 33204 }, { "epoch": 0.05887698263886119, "grad_norm": 0.37890625, "learning_rate": 0.0015361421113173446, "loss": 0.1664, "step": 33206 }, { "epoch": 0.058880528804171, "grad_norm": 0.671875, "learning_rate": 0.00153609004734448, "loss": 0.2578, "step": 33208 }, { "epoch": 0.05888407496948082, "grad_norm": 0.8984375, "learning_rate": 0.0015360379814644107, "loss": 0.4418, "step": 33210 }, { "epoch": 0.05888762113479063, "grad_norm": 0.236328125, "learning_rate": 0.0015359859136773648, "loss": 0.1694, "step": 33212 }, { "epoch": 0.058891167300100446, "grad_norm": 0.267578125, "learning_rate": 0.0015359338439835696, "loss": 0.2108, "step": 33214 }, { "epoch": 0.05889471346541026, "grad_norm": 0.578125, "learning_rate": 0.0015358817723832533, "loss": 0.149, "step": 33216 }, { "epoch": 0.058898259630720075, "grad_norm": 0.2041015625, "learning_rate": 0.001535829698876643, "loss": 0.1621, "step": 33218 }, { "epoch": 0.05890180579602989, "grad_norm": 2.5, "learning_rate": 0.001535777623463967, "loss": 0.5894, "step": 33220 }, { "epoch": 0.058905351961339704, "grad_norm": 0.47265625, "learning_rate": 0.0015357255461454524, "loss": 0.2307, "step": 33222 }, { "epoch": 0.05890889812664952, "grad_norm": 0.482421875, "learning_rate": 0.0015356734669213278, "loss": 0.1825, "step": 33224 }, { "epoch": 0.05891244429195933, "grad_norm": 0.796875, "learning_rate": 0.00153562138579182, "loss": 0.1274, "step": 33226 }, { "epoch": 0.058915990457269155, "grad_norm": 0.3203125, "learning_rate": 0.0015355693027571578, "loss": 0.212, "step": 33228 }, { "epoch": 0.05891953662257897, "grad_norm": 0.236328125, "learning_rate": 0.001535517217817568, "loss": 0.149, "step": 33230 }, { "epoch": 0.058923082787888784, "grad_norm": 0.34375, "learning_rate": 0.0015354651309732795, "loss": 0.1778, "step": 33232 }, { "epoch": 0.0589266289531986, "grad_norm": 0.37109375, "learning_rate": 0.0015354130422245192, "loss": 0.2064, "step": 33234 }, { "epoch": 0.05893017511850841, "grad_norm": 0.470703125, "learning_rate": 0.001535360951571515, "loss": 0.2078, "step": 33236 }, { "epoch": 0.05893372128381823, "grad_norm": 0.408203125, "learning_rate": 0.0015353088590144947, "loss": 0.2001, "step": 33238 }, { "epoch": 0.05893726744912804, "grad_norm": 0.208984375, "learning_rate": 0.0015352567645536869, "loss": 0.1969, "step": 33240 }, { "epoch": 0.058940813614437856, "grad_norm": 0.3203125, "learning_rate": 0.0015352046681893184, "loss": 0.1613, "step": 33242 }, { "epoch": 0.05894435977974767, "grad_norm": 0.2734375, "learning_rate": 0.0015351525699216176, "loss": 0.1828, "step": 33244 }, { "epoch": 0.058947905945057485, "grad_norm": 0.86328125, "learning_rate": 0.0015351004697508122, "loss": 0.2995, "step": 33246 }, { "epoch": 0.0589514521103673, "grad_norm": 0.48046875, "learning_rate": 0.0015350483676771299, "loss": 0.2031, "step": 33248 }, { "epoch": 0.05895499827567712, "grad_norm": 0.98828125, "learning_rate": 0.001534996263700799, "loss": 0.2361, "step": 33250 }, { "epoch": 0.058958544440986936, "grad_norm": 1.546875, "learning_rate": 0.0015349441578220472, "loss": 0.271, "step": 33252 }, { "epoch": 0.05896209060629675, "grad_norm": 0.6015625, "learning_rate": 0.0015348920500411023, "loss": 0.2313, "step": 33254 }, { "epoch": 0.058965636771606565, "grad_norm": 0.28515625, "learning_rate": 0.0015348399403581918, "loss": 0.1195, "step": 33256 }, { "epoch": 0.05896918293691638, "grad_norm": 0.8046875, "learning_rate": 0.0015347878287735445, "loss": 0.1331, "step": 33258 }, { "epoch": 0.058972729102226194, "grad_norm": 0.73046875, "learning_rate": 0.0015347357152873872, "loss": 0.1696, "step": 33260 }, { "epoch": 0.05897627526753601, "grad_norm": 1.953125, "learning_rate": 0.001534683599899949, "loss": 0.2244, "step": 33262 }, { "epoch": 0.05897982143284582, "grad_norm": 0.9375, "learning_rate": 0.001534631482611457, "loss": 0.1743, "step": 33264 }, { "epoch": 0.05898336759815564, "grad_norm": 1.0390625, "learning_rate": 0.001534579363422139, "loss": 0.2006, "step": 33266 }, { "epoch": 0.05898691376346545, "grad_norm": 0.7109375, "learning_rate": 0.0015345272423322237, "loss": 0.1934, "step": 33268 }, { "epoch": 0.05899045992877527, "grad_norm": 1.265625, "learning_rate": 0.0015344751193419387, "loss": 0.2028, "step": 33270 }, { "epoch": 0.05899400609408508, "grad_norm": 1.5234375, "learning_rate": 0.0015344229944515117, "loss": 0.4314, "step": 33272 }, { "epoch": 0.0589975522593949, "grad_norm": 0.4609375, "learning_rate": 0.001534370867661171, "loss": 0.1678, "step": 33274 }, { "epoch": 0.05900109842470472, "grad_norm": 0.263671875, "learning_rate": 0.0015343187389711445, "loss": 0.2136, "step": 33276 }, { "epoch": 0.05900464459001453, "grad_norm": 1.21875, "learning_rate": 0.0015342666083816598, "loss": 0.2308, "step": 33278 }, { "epoch": 0.05900819075532435, "grad_norm": 0.40625, "learning_rate": 0.0015342144758929452, "loss": 0.2164, "step": 33280 }, { "epoch": 0.05901173692063416, "grad_norm": 0.494140625, "learning_rate": 0.001534162341505229, "loss": 0.1469, "step": 33282 }, { "epoch": 0.059015283085943976, "grad_norm": 0.46484375, "learning_rate": 0.0015341102052187387, "loss": 0.2288, "step": 33284 }, { "epoch": 0.05901882925125379, "grad_norm": 0.427734375, "learning_rate": 0.001534058067033703, "loss": 0.157, "step": 33286 }, { "epoch": 0.059022375416563605, "grad_norm": 0.51953125, "learning_rate": 0.0015340059269503491, "loss": 0.2001, "step": 33288 }, { "epoch": 0.05902592158187342, "grad_norm": 4.1875, "learning_rate": 0.0015339537849689056, "loss": 0.2982, "step": 33290 }, { "epoch": 0.059029467747183234, "grad_norm": 0.953125, "learning_rate": 0.0015339016410895999, "loss": 0.2016, "step": 33292 }, { "epoch": 0.05903301391249305, "grad_norm": 0.6484375, "learning_rate": 0.0015338494953126608, "loss": 0.2555, "step": 33294 }, { "epoch": 0.05903656007780287, "grad_norm": 2.46875, "learning_rate": 0.0015337973476383156, "loss": 0.3006, "step": 33296 }, { "epoch": 0.059040106243112685, "grad_norm": 0.32421875, "learning_rate": 0.001533745198066793, "loss": 0.4011, "step": 33298 }, { "epoch": 0.0590436524084225, "grad_norm": 0.74609375, "learning_rate": 0.0015336930465983207, "loss": 0.2422, "step": 33300 }, { "epoch": 0.059047198573732314, "grad_norm": 0.73828125, "learning_rate": 0.0015336408932331274, "loss": 0.2428, "step": 33302 }, { "epoch": 0.05905074473904213, "grad_norm": 0.8046875, "learning_rate": 0.0015335887379714403, "loss": 0.1995, "step": 33304 }, { "epoch": 0.05905429090435194, "grad_norm": 0.98828125, "learning_rate": 0.0015335365808134883, "loss": 0.2213, "step": 33306 }, { "epoch": 0.05905783706966176, "grad_norm": 0.287109375, "learning_rate": 0.0015334844217594987, "loss": 0.1847, "step": 33308 }, { "epoch": 0.05906138323497157, "grad_norm": 1.0234375, "learning_rate": 0.0015334322608097004, "loss": 0.2486, "step": 33310 }, { "epoch": 0.05906492940028139, "grad_norm": 0.6171875, "learning_rate": 0.0015333800979643209, "loss": 0.1591, "step": 33312 }, { "epoch": 0.0590684755655912, "grad_norm": 0.8359375, "learning_rate": 0.0015333279332235888, "loss": 0.2214, "step": 33314 }, { "epoch": 0.059072021730901016, "grad_norm": 0.52734375, "learning_rate": 0.0015332757665877316, "loss": 0.1995, "step": 33316 }, { "epoch": 0.05907556789621084, "grad_norm": 0.296875, "learning_rate": 0.0015332235980569785, "loss": 0.2184, "step": 33318 }, { "epoch": 0.05907911406152065, "grad_norm": 1.5234375, "learning_rate": 0.0015331714276315563, "loss": 0.1999, "step": 33320 }, { "epoch": 0.059082660226830466, "grad_norm": 0.58203125, "learning_rate": 0.0015331192553116944, "loss": 0.232, "step": 33322 }, { "epoch": 0.05908620639214028, "grad_norm": 0.50390625, "learning_rate": 0.00153306708109762, "loss": 0.4622, "step": 33324 }, { "epoch": 0.059089752557450095, "grad_norm": 0.5234375, "learning_rate": 0.0015330149049895623, "loss": 0.2346, "step": 33326 }, { "epoch": 0.05909329872275991, "grad_norm": 0.298828125, "learning_rate": 0.0015329627269877488, "loss": 0.1224, "step": 33328 }, { "epoch": 0.059096844888069724, "grad_norm": 0.64453125, "learning_rate": 0.0015329105470924078, "loss": 0.1727, "step": 33330 }, { "epoch": 0.05910039105337954, "grad_norm": 0.33984375, "learning_rate": 0.0015328583653037675, "loss": 0.1426, "step": 33332 }, { "epoch": 0.059103937218689354, "grad_norm": 0.291015625, "learning_rate": 0.001532806181622056, "loss": 0.1845, "step": 33334 }, { "epoch": 0.05910748338399917, "grad_norm": 0.53515625, "learning_rate": 0.0015327539960475018, "loss": 0.231, "step": 33336 }, { "epoch": 0.05911102954930898, "grad_norm": 1.5859375, "learning_rate": 0.0015327018085803327, "loss": 0.2401, "step": 33338 }, { "epoch": 0.0591145757146188, "grad_norm": 0.6171875, "learning_rate": 0.001532649619220778, "loss": 0.1821, "step": 33340 }, { "epoch": 0.05911812187992862, "grad_norm": 7.96875, "learning_rate": 0.0015325974279690644, "loss": 0.1784, "step": 33342 }, { "epoch": 0.05912166804523843, "grad_norm": 0.326171875, "learning_rate": 0.0015325452348254214, "loss": 0.213, "step": 33344 }, { "epoch": 0.05912521421054825, "grad_norm": 0.322265625, "learning_rate": 0.0015324930397900766, "loss": 0.168, "step": 33346 }, { "epoch": 0.05912876037585806, "grad_norm": 0.59765625, "learning_rate": 0.0015324408428632585, "loss": 0.1221, "step": 33348 }, { "epoch": 0.05913230654116788, "grad_norm": 0.43359375, "learning_rate": 0.0015323886440451952, "loss": 0.1821, "step": 33350 }, { "epoch": 0.05913585270647769, "grad_norm": 0.2333984375, "learning_rate": 0.0015323364433361155, "loss": 0.1428, "step": 33352 }, { "epoch": 0.059139398871787506, "grad_norm": 0.89453125, "learning_rate": 0.001532284240736247, "loss": 0.3291, "step": 33354 }, { "epoch": 0.05914294503709732, "grad_norm": 0.2734375, "learning_rate": 0.0015322320362458183, "loss": 0.1918, "step": 33356 }, { "epoch": 0.059146491202407135, "grad_norm": 0.349609375, "learning_rate": 0.001532179829865058, "loss": 0.1663, "step": 33358 }, { "epoch": 0.05915003736771695, "grad_norm": 0.427734375, "learning_rate": 0.0015321276215941942, "loss": 0.2191, "step": 33360 }, { "epoch": 0.059153583533026764, "grad_norm": 0.3359375, "learning_rate": 0.001532075411433455, "loss": 0.1872, "step": 33362 }, { "epoch": 0.059157129698336586, "grad_norm": 1.0234375, "learning_rate": 0.0015320231993830688, "loss": 0.1627, "step": 33364 }, { "epoch": 0.0591606758636464, "grad_norm": 0.2392578125, "learning_rate": 0.0015319709854432644, "loss": 0.1638, "step": 33366 }, { "epoch": 0.059164222028956215, "grad_norm": 0.8671875, "learning_rate": 0.0015319187696142698, "loss": 0.125, "step": 33368 }, { "epoch": 0.05916776819426603, "grad_norm": 0.90625, "learning_rate": 0.0015318665518963132, "loss": 0.2462, "step": 33370 }, { "epoch": 0.059171314359575844, "grad_norm": 0.57421875, "learning_rate": 0.001531814332289623, "loss": 0.231, "step": 33372 }, { "epoch": 0.05917486052488566, "grad_norm": 0.451171875, "learning_rate": 0.001531762110794428, "loss": 0.1579, "step": 33374 }, { "epoch": 0.05917840669019547, "grad_norm": 0.671875, "learning_rate": 0.0015317098874109566, "loss": 0.1945, "step": 33376 }, { "epoch": 0.05918195285550529, "grad_norm": 0.30078125, "learning_rate": 0.0015316576621394362, "loss": 0.1435, "step": 33378 }, { "epoch": 0.0591854990208151, "grad_norm": 0.3671875, "learning_rate": 0.0015316054349800963, "loss": 0.1812, "step": 33380 }, { "epoch": 0.05918904518612492, "grad_norm": 0.5, "learning_rate": 0.0015315532059331652, "loss": 0.2136, "step": 33382 }, { "epoch": 0.05919259135143473, "grad_norm": 0.2041015625, "learning_rate": 0.0015315009749988708, "loss": 0.1933, "step": 33384 }, { "epoch": 0.05919613751674455, "grad_norm": 0.357421875, "learning_rate": 0.0015314487421774416, "loss": 0.2558, "step": 33386 }, { "epoch": 0.05919968368205437, "grad_norm": 0.369140625, "learning_rate": 0.0015313965074691064, "loss": 0.186, "step": 33388 }, { "epoch": 0.05920322984736418, "grad_norm": 0.83203125, "learning_rate": 0.0015313442708740934, "loss": 0.2182, "step": 33390 }, { "epoch": 0.059206776012673996, "grad_norm": 1.0078125, "learning_rate": 0.0015312920323926314, "loss": 0.1513, "step": 33392 }, { "epoch": 0.05921032217798381, "grad_norm": 0.443359375, "learning_rate": 0.0015312397920249484, "loss": 0.2523, "step": 33394 }, { "epoch": 0.059213868343293626, "grad_norm": 0.5078125, "learning_rate": 0.0015311875497712728, "loss": 0.2306, "step": 33396 }, { "epoch": 0.05921741450860344, "grad_norm": 0.5078125, "learning_rate": 0.0015311353056318333, "loss": 0.2207, "step": 33398 }, { "epoch": 0.059220960673913255, "grad_norm": 0.322265625, "learning_rate": 0.0015310830596068589, "loss": 0.1792, "step": 33400 }, { "epoch": 0.05922450683922307, "grad_norm": 0.341796875, "learning_rate": 0.0015310308116965774, "loss": 0.1667, "step": 33402 }, { "epoch": 0.059228053004532884, "grad_norm": 0.5078125, "learning_rate": 0.0015309785619012175, "loss": 0.5121, "step": 33404 }, { "epoch": 0.0592315991698427, "grad_norm": 0.396484375, "learning_rate": 0.0015309263102210075, "loss": 0.1846, "step": 33406 }, { "epoch": 0.05923514533515251, "grad_norm": 0.64453125, "learning_rate": 0.0015308740566561763, "loss": 0.2024, "step": 33408 }, { "epoch": 0.059238691500462334, "grad_norm": 0.58203125, "learning_rate": 0.001530821801206952, "loss": 0.228, "step": 33410 }, { "epoch": 0.05924223766577215, "grad_norm": 0.380859375, "learning_rate": 0.0015307695438735638, "loss": 0.1741, "step": 33412 }, { "epoch": 0.05924578383108196, "grad_norm": 1.2890625, "learning_rate": 0.0015307172846562396, "loss": 0.1821, "step": 33414 }, { "epoch": 0.05924932999639178, "grad_norm": 0.314453125, "learning_rate": 0.001530665023555208, "loss": 0.1917, "step": 33416 }, { "epoch": 0.05925287616170159, "grad_norm": 0.60546875, "learning_rate": 0.001530612760570698, "loss": 0.1541, "step": 33418 }, { "epoch": 0.05925642232701141, "grad_norm": 0.5390625, "learning_rate": 0.001530560495702938, "loss": 0.1883, "step": 33420 }, { "epoch": 0.05925996849232122, "grad_norm": 0.56640625, "learning_rate": 0.0015305082289521566, "loss": 0.1816, "step": 33422 }, { "epoch": 0.059263514657631036, "grad_norm": 0.369140625, "learning_rate": 0.0015304559603185818, "loss": 0.2245, "step": 33424 }, { "epoch": 0.05926706082294085, "grad_norm": 1.390625, "learning_rate": 0.001530403689802443, "loss": 0.2308, "step": 33426 }, { "epoch": 0.059270606988250665, "grad_norm": 0.80859375, "learning_rate": 0.0015303514174039683, "loss": 0.3468, "step": 33428 }, { "epoch": 0.05927415315356048, "grad_norm": 0.71875, "learning_rate": 0.0015302991431233868, "loss": 0.1712, "step": 33430 }, { "epoch": 0.0592776993188703, "grad_norm": 1.8671875, "learning_rate": 0.0015302468669609263, "loss": 0.2805, "step": 33432 }, { "epoch": 0.059281245484180116, "grad_norm": 1.1015625, "learning_rate": 0.0015301945889168165, "loss": 0.2135, "step": 33434 }, { "epoch": 0.05928479164948993, "grad_norm": 0.37109375, "learning_rate": 0.001530142308991285, "loss": 0.1621, "step": 33436 }, { "epoch": 0.059288337814799745, "grad_norm": 0.95703125, "learning_rate": 0.001530090027184561, "loss": 0.2674, "step": 33438 }, { "epoch": 0.05929188398010956, "grad_norm": 0.30078125, "learning_rate": 0.0015300377434968734, "loss": 0.1662, "step": 33440 }, { "epoch": 0.059295430145419374, "grad_norm": 0.43359375, "learning_rate": 0.0015299854579284501, "loss": 0.1861, "step": 33442 }, { "epoch": 0.05929897631072919, "grad_norm": 0.33984375, "learning_rate": 0.00152993317047952, "loss": 0.1812, "step": 33444 }, { "epoch": 0.059302522476039, "grad_norm": 0.33203125, "learning_rate": 0.0015298808811503124, "loss": 0.1583, "step": 33446 }, { "epoch": 0.05930606864134882, "grad_norm": 0.4296875, "learning_rate": 0.0015298285899410554, "loss": 0.235, "step": 33448 }, { "epoch": 0.05930961480665863, "grad_norm": 0.88671875, "learning_rate": 0.0015297762968519777, "loss": 0.2343, "step": 33450 }, { "epoch": 0.05931316097196845, "grad_norm": 0.150390625, "learning_rate": 0.0015297240018833083, "loss": 0.1557, "step": 33452 }, { "epoch": 0.05931670713727827, "grad_norm": 0.3359375, "learning_rate": 0.0015296717050352758, "loss": 0.222, "step": 33454 }, { "epoch": 0.05932025330258808, "grad_norm": 0.52734375, "learning_rate": 0.0015296194063081085, "loss": 0.1703, "step": 33456 }, { "epoch": 0.0593237994678979, "grad_norm": 0.79296875, "learning_rate": 0.0015295671057020358, "loss": 0.2106, "step": 33458 }, { "epoch": 0.05932734563320771, "grad_norm": 0.431640625, "learning_rate": 0.001529514803217286, "loss": 0.1945, "step": 33460 }, { "epoch": 0.05933089179851753, "grad_norm": 1.015625, "learning_rate": 0.0015294624988540877, "loss": 0.1778, "step": 33462 }, { "epoch": 0.05933443796382734, "grad_norm": 0.279296875, "learning_rate": 0.00152941019261267, "loss": 0.1273, "step": 33464 }, { "epoch": 0.059337984129137156, "grad_norm": 0.73828125, "learning_rate": 0.0015293578844932617, "loss": 0.1659, "step": 33466 }, { "epoch": 0.05934153029444697, "grad_norm": 0.78515625, "learning_rate": 0.0015293055744960912, "loss": 0.2101, "step": 33468 }, { "epoch": 0.059345076459756785, "grad_norm": 0.81640625, "learning_rate": 0.0015292532626213877, "loss": 0.2368, "step": 33470 }, { "epoch": 0.0593486226250666, "grad_norm": 1.34375, "learning_rate": 0.0015292009488693796, "loss": 0.2071, "step": 33472 }, { "epoch": 0.059352168790376414, "grad_norm": 0.8359375, "learning_rate": 0.0015291486332402963, "loss": 0.2293, "step": 33474 }, { "epoch": 0.05935571495568623, "grad_norm": 0.53515625, "learning_rate": 0.0015290963157343655, "loss": 0.2014, "step": 33476 }, { "epoch": 0.05935926112099605, "grad_norm": 0.6328125, "learning_rate": 0.0015290439963518171, "loss": 0.1763, "step": 33478 }, { "epoch": 0.059362807286305865, "grad_norm": 0.51171875, "learning_rate": 0.0015289916750928793, "loss": 0.1559, "step": 33480 }, { "epoch": 0.05936635345161568, "grad_norm": 0.71875, "learning_rate": 0.0015289393519577813, "loss": 0.2062, "step": 33482 }, { "epoch": 0.059369899616925494, "grad_norm": 0.42578125, "learning_rate": 0.0015288870269467515, "loss": 0.2502, "step": 33484 }, { "epoch": 0.05937344578223531, "grad_norm": 0.8359375, "learning_rate": 0.001528834700060019, "loss": 0.1516, "step": 33486 }, { "epoch": 0.05937699194754512, "grad_norm": 0.69140625, "learning_rate": 0.0015287823712978126, "loss": 0.206, "step": 33488 }, { "epoch": 0.05938053811285494, "grad_norm": 0.55078125, "learning_rate": 0.0015287300406603614, "loss": 0.1283, "step": 33490 }, { "epoch": 0.05938408427816475, "grad_norm": 0.296875, "learning_rate": 0.0015286777081478938, "loss": 0.1221, "step": 33492 }, { "epoch": 0.059387630443474566, "grad_norm": 0.7734375, "learning_rate": 0.001528625373760639, "loss": 0.1828, "step": 33494 }, { "epoch": 0.05939117660878438, "grad_norm": 1.4453125, "learning_rate": 0.001528573037498826, "loss": 0.2133, "step": 33496 }, { "epoch": 0.059394722774094195, "grad_norm": 0.2265625, "learning_rate": 0.0015285206993626832, "loss": 0.2185, "step": 33498 }, { "epoch": 0.05939826893940402, "grad_norm": 0.56640625, "learning_rate": 0.0015284683593524398, "loss": 0.2415, "step": 33500 }, { "epoch": 0.05940181510471383, "grad_norm": 0.2353515625, "learning_rate": 0.0015284160174683246, "loss": 0.2333, "step": 33502 }, { "epoch": 0.059405361270023646, "grad_norm": 1.0, "learning_rate": 0.0015283636737105667, "loss": 0.1883, "step": 33504 }, { "epoch": 0.05940890743533346, "grad_norm": 0.306640625, "learning_rate": 0.001528311328079395, "loss": 0.2563, "step": 33506 }, { "epoch": 0.059412453600643275, "grad_norm": 0.37109375, "learning_rate": 0.0015282589805750383, "loss": 0.1517, "step": 33508 }, { "epoch": 0.05941599976595309, "grad_norm": 0.7890625, "learning_rate": 0.0015282066311977254, "loss": 0.199, "step": 33510 }, { "epoch": 0.059419545931262904, "grad_norm": 0.63671875, "learning_rate": 0.0015281542799476858, "loss": 0.2212, "step": 33512 }, { "epoch": 0.05942309209657272, "grad_norm": 0.54296875, "learning_rate": 0.001528101926825148, "loss": 0.2061, "step": 33514 }, { "epoch": 0.05942663826188253, "grad_norm": 0.279296875, "learning_rate": 0.001528049571830341, "loss": 0.2251, "step": 33516 }, { "epoch": 0.05943018442719235, "grad_norm": 0.78125, "learning_rate": 0.0015279972149634935, "loss": 0.2116, "step": 33518 }, { "epoch": 0.05943373059250216, "grad_norm": 1.1171875, "learning_rate": 0.001527944856224835, "loss": 0.2418, "step": 33520 }, { "epoch": 0.059437276757811984, "grad_norm": 0.4140625, "learning_rate": 0.0015278924956145944, "loss": 0.2169, "step": 33522 }, { "epoch": 0.0594408229231218, "grad_norm": 0.423828125, "learning_rate": 0.0015278401331330002, "loss": 0.1614, "step": 33524 }, { "epoch": 0.05944436908843161, "grad_norm": 0.4140625, "learning_rate": 0.0015277877687802821, "loss": 0.2338, "step": 33526 }, { "epoch": 0.05944791525374143, "grad_norm": 0.65625, "learning_rate": 0.0015277354025566687, "loss": 0.2011, "step": 33528 }, { "epoch": 0.05945146141905124, "grad_norm": 0.4375, "learning_rate": 0.001527683034462389, "loss": 0.2025, "step": 33530 }, { "epoch": 0.05945500758436106, "grad_norm": 0.65234375, "learning_rate": 0.0015276306644976725, "loss": 0.1717, "step": 33532 }, { "epoch": 0.05945855374967087, "grad_norm": 0.8046875, "learning_rate": 0.0015275782926627477, "loss": 0.2071, "step": 33534 }, { "epoch": 0.059462099914980686, "grad_norm": 0.85546875, "learning_rate": 0.001527525918957844, "loss": 0.2889, "step": 33536 }, { "epoch": 0.0594656460802905, "grad_norm": 0.71875, "learning_rate": 0.0015274735433831897, "loss": 0.1874, "step": 33538 }, { "epoch": 0.059469192245600315, "grad_norm": 0.734375, "learning_rate": 0.0015274211659390149, "loss": 0.2549, "step": 33540 }, { "epoch": 0.05947273841091013, "grad_norm": 0.65234375, "learning_rate": 0.001527368786625548, "loss": 0.2318, "step": 33542 }, { "epoch": 0.059476284576219944, "grad_norm": 4.5, "learning_rate": 0.0015273164054430185, "loss": 0.4045, "step": 33544 }, { "epoch": 0.059479830741529766, "grad_norm": 0.9609375, "learning_rate": 0.001527264022391655, "loss": 0.2492, "step": 33546 }, { "epoch": 0.05948337690683958, "grad_norm": 2.265625, "learning_rate": 0.0015272116374716869, "loss": 0.3044, "step": 33548 }, { "epoch": 0.059486923072149395, "grad_norm": 0.3515625, "learning_rate": 0.0015271592506833434, "loss": 0.2788, "step": 33550 }, { "epoch": 0.05949046923745921, "grad_norm": 0.373046875, "learning_rate": 0.0015271068620268534, "loss": 0.17, "step": 33552 }, { "epoch": 0.059494015402769024, "grad_norm": 0.71875, "learning_rate": 0.001527054471502446, "loss": 0.1643, "step": 33554 }, { "epoch": 0.05949756156807884, "grad_norm": 0.373046875, "learning_rate": 0.0015270020791103502, "loss": 0.1782, "step": 33556 }, { "epoch": 0.05950110773338865, "grad_norm": 1.8984375, "learning_rate": 0.0015269496848507955, "loss": 0.3836, "step": 33558 }, { "epoch": 0.05950465389869847, "grad_norm": 0.185546875, "learning_rate": 0.0015268972887240112, "loss": 0.1612, "step": 33560 }, { "epoch": 0.05950820006400828, "grad_norm": 0.42578125, "learning_rate": 0.0015268448907302258, "loss": 0.1282, "step": 33562 }, { "epoch": 0.0595117462293181, "grad_norm": 0.2412109375, "learning_rate": 0.0015267924908696688, "loss": 0.2006, "step": 33564 }, { "epoch": 0.05951529239462791, "grad_norm": 0.236328125, "learning_rate": 0.0015267400891425694, "loss": 0.1791, "step": 33566 }, { "epoch": 0.05951883855993773, "grad_norm": 0.1962890625, "learning_rate": 0.0015266876855491568, "loss": 0.2274, "step": 33568 }, { "epoch": 0.05952238472524755, "grad_norm": 0.85546875, "learning_rate": 0.0015266352800896602, "loss": 0.207, "step": 33570 }, { "epoch": 0.05952593089055736, "grad_norm": 0.419921875, "learning_rate": 0.0015265828727643086, "loss": 0.1516, "step": 33572 }, { "epoch": 0.059529477055867176, "grad_norm": 1.125, "learning_rate": 0.001526530463573331, "loss": 0.2169, "step": 33574 }, { "epoch": 0.05953302322117699, "grad_norm": 5.4375, "learning_rate": 0.0015264780525169575, "loss": 0.2752, "step": 33576 }, { "epoch": 0.059536569386486805, "grad_norm": 1.3125, "learning_rate": 0.0015264256395954162, "loss": 0.2099, "step": 33578 }, { "epoch": 0.05954011555179662, "grad_norm": 0.322265625, "learning_rate": 0.001526373224808937, "loss": 0.14, "step": 33580 }, { "epoch": 0.059543661717106434, "grad_norm": 0.9921875, "learning_rate": 0.0015263208081577491, "loss": 0.1949, "step": 33582 }, { "epoch": 0.05954720788241625, "grad_norm": 0.40234375, "learning_rate": 0.0015262683896420815, "loss": 0.184, "step": 33584 }, { "epoch": 0.059550754047726064, "grad_norm": 1.140625, "learning_rate": 0.0015262159692621636, "loss": 0.1652, "step": 33586 }, { "epoch": 0.05955430021303588, "grad_norm": 0.2060546875, "learning_rate": 0.0015261635470182249, "loss": 0.1458, "step": 33588 }, { "epoch": 0.0595578463783457, "grad_norm": 0.169921875, "learning_rate": 0.001526111122910494, "loss": 0.3392, "step": 33590 }, { "epoch": 0.059561392543655514, "grad_norm": 0.2890625, "learning_rate": 0.0015260586969392004, "loss": 0.1825, "step": 33592 }, { "epoch": 0.05956493870896533, "grad_norm": 0.16015625, "learning_rate": 0.0015260062691045742, "loss": 0.4837, "step": 33594 }, { "epoch": 0.05956848487427514, "grad_norm": 1.8359375, "learning_rate": 0.0015259538394068435, "loss": 0.1416, "step": 33596 }, { "epoch": 0.05957203103958496, "grad_norm": 1.0703125, "learning_rate": 0.0015259014078462384, "loss": 0.2399, "step": 33598 }, { "epoch": 0.05957557720489477, "grad_norm": 1.0390625, "learning_rate": 0.0015258489744229878, "loss": 0.2929, "step": 33600 }, { "epoch": 0.05957912337020459, "grad_norm": 0.71484375, "learning_rate": 0.0015257965391373214, "loss": 0.251, "step": 33602 }, { "epoch": 0.0595826695355144, "grad_norm": 9.1875, "learning_rate": 0.0015257441019894678, "loss": 0.2291, "step": 33604 }, { "epoch": 0.059586215700824216, "grad_norm": 0.64453125, "learning_rate": 0.001525691662979657, "loss": 0.2211, "step": 33606 }, { "epoch": 0.05958976186613403, "grad_norm": 0.333984375, "learning_rate": 0.0015256392221081186, "loss": 0.1361, "step": 33608 }, { "epoch": 0.059593308031443845, "grad_norm": 0.37890625, "learning_rate": 0.001525586779375081, "loss": 0.1629, "step": 33610 }, { "epoch": 0.05959685419675366, "grad_norm": 1.140625, "learning_rate": 0.0015255343347807741, "loss": 0.2476, "step": 33612 }, { "epoch": 0.05960040036206348, "grad_norm": 1.53125, "learning_rate": 0.0015254818883254273, "loss": 0.3141, "step": 33614 }, { "epoch": 0.059603946527373296, "grad_norm": 1.328125, "learning_rate": 0.0015254294400092697, "loss": 0.2151, "step": 33616 }, { "epoch": 0.05960749269268311, "grad_norm": 0.33984375, "learning_rate": 0.001525376989832531, "loss": 0.2072, "step": 33618 }, { "epoch": 0.059611038857992925, "grad_norm": 1.1171875, "learning_rate": 0.0015253245377954401, "loss": 0.1455, "step": 33620 }, { "epoch": 0.05961458502330274, "grad_norm": 1.5625, "learning_rate": 0.0015252720838982272, "loss": 0.2009, "step": 33622 }, { "epoch": 0.059618131188612554, "grad_norm": 2.5, "learning_rate": 0.0015252196281411211, "loss": 0.1814, "step": 33624 }, { "epoch": 0.05962167735392237, "grad_norm": 1.1484375, "learning_rate": 0.0015251671705243514, "loss": 0.1753, "step": 33626 }, { "epoch": 0.05962522351923218, "grad_norm": 0.466796875, "learning_rate": 0.0015251147110481472, "loss": 0.19, "step": 33628 }, { "epoch": 0.059628769684542, "grad_norm": 0.87890625, "learning_rate": 0.0015250622497127388, "loss": 0.2177, "step": 33630 }, { "epoch": 0.05963231584985181, "grad_norm": 0.45703125, "learning_rate": 0.0015250097865183543, "loss": 0.1812, "step": 33632 }, { "epoch": 0.05963586201516163, "grad_norm": 1.328125, "learning_rate": 0.0015249573214652242, "loss": 0.2584, "step": 33634 }, { "epoch": 0.05963940818047145, "grad_norm": 0.390625, "learning_rate": 0.0015249048545535773, "loss": 0.212, "step": 33636 }, { "epoch": 0.05964295434578126, "grad_norm": 0.474609375, "learning_rate": 0.001524852385783644, "loss": 0.1502, "step": 33638 }, { "epoch": 0.05964650051109108, "grad_norm": 0.427734375, "learning_rate": 0.0015247999151556525, "loss": 0.1998, "step": 33640 }, { "epoch": 0.05965004667640089, "grad_norm": 1.3203125, "learning_rate": 0.0015247474426698334, "loss": 0.1832, "step": 33642 }, { "epoch": 0.059653592841710706, "grad_norm": 0.45703125, "learning_rate": 0.0015246949683264152, "loss": 0.1699, "step": 33644 }, { "epoch": 0.05965713900702052, "grad_norm": 0.48828125, "learning_rate": 0.0015246424921256283, "loss": 0.2387, "step": 33646 }, { "epoch": 0.059660685172330336, "grad_norm": 0.34765625, "learning_rate": 0.0015245900140677018, "loss": 0.127, "step": 33648 }, { "epoch": 0.05966423133764015, "grad_norm": 0.419921875, "learning_rate": 0.001524537534152865, "loss": 0.1548, "step": 33650 }, { "epoch": 0.059667777502949965, "grad_norm": 7.625, "learning_rate": 0.0015244850523813479, "loss": 0.2532, "step": 33652 }, { "epoch": 0.05967132366825978, "grad_norm": 0.55859375, "learning_rate": 0.0015244325687533795, "loss": 0.1848, "step": 33654 }, { "epoch": 0.059674869833569594, "grad_norm": 0.458984375, "learning_rate": 0.0015243800832691896, "loss": 0.2269, "step": 33656 }, { "epoch": 0.059678415998879415, "grad_norm": 0.26953125, "learning_rate": 0.001524327595929008, "loss": 0.2199, "step": 33658 }, { "epoch": 0.05968196216418923, "grad_norm": 0.671875, "learning_rate": 0.0015242751067330639, "loss": 0.1933, "step": 33660 }, { "epoch": 0.059685508329499044, "grad_norm": 0.421875, "learning_rate": 0.0015242226156815869, "loss": 0.1854, "step": 33662 }, { "epoch": 0.05968905449480886, "grad_norm": 0.94921875, "learning_rate": 0.0015241701227748064, "loss": 0.2249, "step": 33664 }, { "epoch": 0.05969260066011867, "grad_norm": 0.3203125, "learning_rate": 0.0015241176280129522, "loss": 0.3148, "step": 33666 }, { "epoch": 0.05969614682542849, "grad_norm": 0.455078125, "learning_rate": 0.001524065131396254, "loss": 0.2415, "step": 33668 }, { "epoch": 0.0596996929907383, "grad_norm": 0.65234375, "learning_rate": 0.001524012632924941, "loss": 0.1857, "step": 33670 }, { "epoch": 0.05970323915604812, "grad_norm": 0.2197265625, "learning_rate": 0.0015239601325992434, "loss": 0.2308, "step": 33672 }, { "epoch": 0.05970678532135793, "grad_norm": 1.203125, "learning_rate": 0.00152390763041939, "loss": 0.2336, "step": 33674 }, { "epoch": 0.059710331486667746, "grad_norm": 0.3515625, "learning_rate": 0.0015238551263856111, "loss": 0.2008, "step": 33676 }, { "epoch": 0.05971387765197756, "grad_norm": 0.69921875, "learning_rate": 0.001523802620498136, "loss": 0.1501, "step": 33678 }, { "epoch": 0.059717423817287375, "grad_norm": 1.515625, "learning_rate": 0.0015237501127571946, "loss": 0.1744, "step": 33680 }, { "epoch": 0.0597209699825972, "grad_norm": 0.5703125, "learning_rate": 0.0015236976031630166, "loss": 0.168, "step": 33682 }, { "epoch": 0.05972451614790701, "grad_norm": 0.478515625, "learning_rate": 0.0015236450917158308, "loss": 0.2884, "step": 33684 }, { "epoch": 0.059728062313216826, "grad_norm": 0.443359375, "learning_rate": 0.001523592578415868, "loss": 0.4404, "step": 33686 }, { "epoch": 0.05973160847852664, "grad_norm": 0.412109375, "learning_rate": 0.0015235400632633569, "loss": 0.2767, "step": 33688 }, { "epoch": 0.059735154643836455, "grad_norm": 0.478515625, "learning_rate": 0.0015234875462585276, "loss": 0.1229, "step": 33690 }, { "epoch": 0.05973870080914627, "grad_norm": 0.494140625, "learning_rate": 0.00152343502740161, "loss": 0.1463, "step": 33692 }, { "epoch": 0.059742246974456084, "grad_norm": 0.2578125, "learning_rate": 0.0015233825066928333, "loss": 0.1804, "step": 33694 }, { "epoch": 0.0597457931397659, "grad_norm": 0.345703125, "learning_rate": 0.0015233299841324278, "loss": 0.1556, "step": 33696 }, { "epoch": 0.05974933930507571, "grad_norm": 0.68359375, "learning_rate": 0.0015232774597206224, "loss": 0.2662, "step": 33698 }, { "epoch": 0.05975288547038553, "grad_norm": 0.37890625, "learning_rate": 0.0015232249334576477, "loss": 0.1838, "step": 33700 }, { "epoch": 0.05975643163569534, "grad_norm": 0.82421875, "learning_rate": 0.001523172405343733, "loss": 0.2632, "step": 33702 }, { "epoch": 0.059759977801005164, "grad_norm": 0.31640625, "learning_rate": 0.0015231198753791076, "loss": 0.1495, "step": 33704 }, { "epoch": 0.05976352396631498, "grad_norm": 0.7890625, "learning_rate": 0.0015230673435640016, "loss": 0.1744, "step": 33706 }, { "epoch": 0.05976707013162479, "grad_norm": 1.3515625, "learning_rate": 0.001523014809898645, "loss": 0.3071, "step": 33708 }, { "epoch": 0.05977061629693461, "grad_norm": 0.234375, "learning_rate": 0.001522962274383267, "loss": 0.1563, "step": 33710 }, { "epoch": 0.05977416246224442, "grad_norm": 1.0703125, "learning_rate": 0.0015229097370180985, "loss": 0.2597, "step": 33712 }, { "epoch": 0.05977770862755424, "grad_norm": 0.3671875, "learning_rate": 0.001522857197803368, "loss": 0.1859, "step": 33714 }, { "epoch": 0.05978125479286405, "grad_norm": 0.6953125, "learning_rate": 0.0015228046567393057, "loss": 0.2501, "step": 33716 }, { "epoch": 0.059784800958173866, "grad_norm": 1.2734375, "learning_rate": 0.0015227521138261415, "loss": 0.2428, "step": 33718 }, { "epoch": 0.05978834712348368, "grad_norm": 1.078125, "learning_rate": 0.001522699569064105, "loss": 0.2191, "step": 33720 }, { "epoch": 0.059791893288793495, "grad_norm": 0.59765625, "learning_rate": 0.001522647022453426, "loss": 0.2193, "step": 33722 }, { "epoch": 0.05979543945410331, "grad_norm": 0.353515625, "learning_rate": 0.0015225944739943348, "loss": 0.1806, "step": 33724 }, { "epoch": 0.059798985619413124, "grad_norm": 0.2255859375, "learning_rate": 0.0015225419236870605, "loss": 0.1832, "step": 33726 }, { "epoch": 0.059802531784722945, "grad_norm": 0.2177734375, "learning_rate": 0.0015224893715318339, "loss": 0.1792, "step": 33728 }, { "epoch": 0.05980607795003276, "grad_norm": 2.046875, "learning_rate": 0.0015224368175288834, "loss": 0.3896, "step": 33730 }, { "epoch": 0.059809624115342575, "grad_norm": 0.84765625, "learning_rate": 0.00152238426167844, "loss": 0.1639, "step": 33732 }, { "epoch": 0.05981317028065239, "grad_norm": 0.392578125, "learning_rate": 0.0015223317039807331, "loss": 0.1571, "step": 33734 }, { "epoch": 0.059816716445962204, "grad_norm": 0.51171875, "learning_rate": 0.001522279144435993, "loss": 0.2218, "step": 33736 }, { "epoch": 0.05982026261127202, "grad_norm": 1.515625, "learning_rate": 0.0015222265830444489, "loss": 0.246, "step": 33738 }, { "epoch": 0.05982380877658183, "grad_norm": 0.51171875, "learning_rate": 0.0015221740198063309, "loss": 0.161, "step": 33740 }, { "epoch": 0.05982735494189165, "grad_norm": 1.171875, "learning_rate": 0.0015221214547218689, "loss": 0.2855, "step": 33742 }, { "epoch": 0.05983090110720146, "grad_norm": 0.2021484375, "learning_rate": 0.0015220688877912933, "loss": 0.171, "step": 33744 }, { "epoch": 0.059834447272511276, "grad_norm": 0.4296875, "learning_rate": 0.0015220163190148332, "loss": 0.1967, "step": 33746 }, { "epoch": 0.05983799343782109, "grad_norm": 0.267578125, "learning_rate": 0.001521963748392719, "loss": 0.1789, "step": 33748 }, { "epoch": 0.05984153960313091, "grad_norm": 0.78515625, "learning_rate": 0.0015219111759251802, "loss": 0.1546, "step": 33750 }, { "epoch": 0.05984508576844073, "grad_norm": 0.625, "learning_rate": 0.0015218586016124472, "loss": 0.147, "step": 33752 }, { "epoch": 0.05984863193375054, "grad_norm": 0.349609375, "learning_rate": 0.0015218060254547497, "loss": 0.1367, "step": 33754 }, { "epoch": 0.059852178099060356, "grad_norm": 0.85546875, "learning_rate": 0.0015217534474523179, "loss": 0.1895, "step": 33756 }, { "epoch": 0.05985572426437017, "grad_norm": 1.046875, "learning_rate": 0.0015217008676053812, "loss": 0.236, "step": 33758 }, { "epoch": 0.059859270429679985, "grad_norm": 0.345703125, "learning_rate": 0.0015216482859141697, "loss": 0.1483, "step": 33760 }, { "epoch": 0.0598628165949898, "grad_norm": 0.4296875, "learning_rate": 0.001521595702378914, "loss": 0.229, "step": 33762 }, { "epoch": 0.059866362760299614, "grad_norm": 0.376953125, "learning_rate": 0.0015215431169998432, "loss": 0.2, "step": 33764 }, { "epoch": 0.05986990892560943, "grad_norm": 0.32421875, "learning_rate": 0.001521490529777188, "loss": 0.3237, "step": 33766 }, { "epoch": 0.05987345509091924, "grad_norm": 0.470703125, "learning_rate": 0.0015214379407111778, "loss": 0.2025, "step": 33768 }, { "epoch": 0.05987700125622906, "grad_norm": 0.478515625, "learning_rate": 0.001521385349802043, "loss": 0.201, "step": 33770 }, { "epoch": 0.05988054742153888, "grad_norm": 1.0859375, "learning_rate": 0.001521332757050013, "loss": 0.4623, "step": 33772 }, { "epoch": 0.059884093586848694, "grad_norm": 2.796875, "learning_rate": 0.001521280162455319, "loss": 0.3137, "step": 33774 }, { "epoch": 0.05988763975215851, "grad_norm": 0.369140625, "learning_rate": 0.0015212275660181898, "loss": 0.2116, "step": 33776 }, { "epoch": 0.05989118591746832, "grad_norm": 1.5390625, "learning_rate": 0.001521174967738856, "loss": 0.2527, "step": 33778 }, { "epoch": 0.05989473208277814, "grad_norm": 0.396484375, "learning_rate": 0.0015211223676175475, "loss": 0.1346, "step": 33780 }, { "epoch": 0.05989827824808795, "grad_norm": 1.6328125, "learning_rate": 0.0015210697656544946, "loss": 0.1888, "step": 33782 }, { "epoch": 0.05990182441339777, "grad_norm": 1.703125, "learning_rate": 0.0015210171618499268, "loss": 0.2323, "step": 33784 }, { "epoch": 0.05990537057870758, "grad_norm": 0.76953125, "learning_rate": 0.0015209645562040749, "loss": 0.1829, "step": 33786 }, { "epoch": 0.059908916744017396, "grad_norm": 0.81640625, "learning_rate": 0.001520911948717168, "loss": 0.1537, "step": 33788 }, { "epoch": 0.05991246290932721, "grad_norm": 0.77734375, "learning_rate": 0.0015208593393894372, "loss": 0.1669, "step": 33790 }, { "epoch": 0.059916009074637025, "grad_norm": 0.62890625, "learning_rate": 0.0015208067282211119, "loss": 0.2352, "step": 33792 }, { "epoch": 0.05991955523994684, "grad_norm": 0.828125, "learning_rate": 0.0015207541152124225, "loss": 0.2329, "step": 33794 }, { "epoch": 0.05992310140525666, "grad_norm": 1.1796875, "learning_rate": 0.001520701500363599, "loss": 0.2305, "step": 33796 }, { "epoch": 0.059926647570566476, "grad_norm": 0.291015625, "learning_rate": 0.0015206488836748717, "loss": 0.2023, "step": 33798 }, { "epoch": 0.05993019373587629, "grad_norm": 0.33984375, "learning_rate": 0.00152059626514647, "loss": 0.2245, "step": 33800 }, { "epoch": 0.059933739901186105, "grad_norm": 0.5234375, "learning_rate": 0.0015205436447786249, "loss": 0.2198, "step": 33802 }, { "epoch": 0.05993728606649592, "grad_norm": 0.51171875, "learning_rate": 0.001520491022571566, "loss": 0.2925, "step": 33804 }, { "epoch": 0.059940832231805734, "grad_norm": 0.6171875, "learning_rate": 0.0015204383985255238, "loss": 0.1998, "step": 33806 }, { "epoch": 0.05994437839711555, "grad_norm": 0.30859375, "learning_rate": 0.0015203857726407283, "loss": 0.1828, "step": 33808 }, { "epoch": 0.05994792456242536, "grad_norm": 0.546875, "learning_rate": 0.0015203331449174097, "loss": 0.1675, "step": 33810 }, { "epoch": 0.05995147072773518, "grad_norm": 0.6171875, "learning_rate": 0.0015202805153557977, "loss": 0.2022, "step": 33812 }, { "epoch": 0.05995501689304499, "grad_norm": 0.6796875, "learning_rate": 0.0015202278839561232, "loss": 0.2144, "step": 33814 }, { "epoch": 0.05995856305835481, "grad_norm": 0.56640625, "learning_rate": 0.0015201752507186157, "loss": 0.2096, "step": 33816 }, { "epoch": 0.05996210922366463, "grad_norm": 4.40625, "learning_rate": 0.0015201226156435063, "loss": 0.2596, "step": 33818 }, { "epoch": 0.05996565538897444, "grad_norm": 0.421875, "learning_rate": 0.001520069978731024, "loss": 0.2253, "step": 33820 }, { "epoch": 0.05996920155428426, "grad_norm": 0.486328125, "learning_rate": 0.0015200173399813998, "loss": 0.2063, "step": 33822 }, { "epoch": 0.05997274771959407, "grad_norm": 0.275390625, "learning_rate": 0.0015199646993948638, "loss": 0.1858, "step": 33824 }, { "epoch": 0.059976293884903886, "grad_norm": 0.291015625, "learning_rate": 0.0015199120569716461, "loss": 0.1811, "step": 33826 }, { "epoch": 0.0599798400502137, "grad_norm": 0.37109375, "learning_rate": 0.001519859412711977, "loss": 0.2077, "step": 33828 }, { "epoch": 0.059983386215523515, "grad_norm": 0.498046875, "learning_rate": 0.0015198067666160866, "loss": 0.1673, "step": 33830 }, { "epoch": 0.05998693238083333, "grad_norm": 0.66015625, "learning_rate": 0.0015197541186842056, "loss": 0.2353, "step": 33832 }, { "epoch": 0.059990478546143144, "grad_norm": 0.91015625, "learning_rate": 0.0015197014689165636, "loss": 0.2075, "step": 33834 }, { "epoch": 0.05999402471145296, "grad_norm": 0.248046875, "learning_rate": 0.0015196488173133912, "loss": 0.1912, "step": 33836 }, { "epoch": 0.059997570876762774, "grad_norm": 0.78515625, "learning_rate": 0.0015195961638749184, "loss": 0.2717, "step": 33838 }, { "epoch": 0.060001117042072595, "grad_norm": 0.6171875, "learning_rate": 0.0015195435086013762, "loss": 0.1822, "step": 33840 }, { "epoch": 0.06000466320738241, "grad_norm": 0.55078125, "learning_rate": 0.0015194908514929942, "loss": 0.1893, "step": 33842 }, { "epoch": 0.060008209372692224, "grad_norm": 1.4765625, "learning_rate": 0.0015194381925500028, "loss": 0.1865, "step": 33844 }, { "epoch": 0.06001175553800204, "grad_norm": 8.0, "learning_rate": 0.0015193855317726325, "loss": 0.3614, "step": 33846 }, { "epoch": 0.06001530170331185, "grad_norm": 0.875, "learning_rate": 0.0015193328691611135, "loss": 0.2128, "step": 33848 }, { "epoch": 0.06001884786862167, "grad_norm": 0.35546875, "learning_rate": 0.0015192802047156759, "loss": 0.2325, "step": 33850 }, { "epoch": 0.06002239403393148, "grad_norm": 0.66015625, "learning_rate": 0.0015192275384365505, "loss": 0.2296, "step": 33852 }, { "epoch": 0.0600259401992413, "grad_norm": 0.404296875, "learning_rate": 0.0015191748703239673, "loss": 0.1579, "step": 33854 }, { "epoch": 0.06002948636455111, "grad_norm": 0.453125, "learning_rate": 0.0015191222003781567, "loss": 0.2651, "step": 33856 }, { "epoch": 0.060033032529860926, "grad_norm": 0.76171875, "learning_rate": 0.001519069528599349, "loss": 0.1455, "step": 33858 }, { "epoch": 0.06003657869517074, "grad_norm": 0.5390625, "learning_rate": 0.0015190168549877748, "loss": 0.1585, "step": 33860 }, { "epoch": 0.060040124860480555, "grad_norm": 0.419921875, "learning_rate": 0.0015189641795436638, "loss": 0.1533, "step": 33862 }, { "epoch": 0.06004367102579038, "grad_norm": 3.0625, "learning_rate": 0.0015189115022672473, "loss": 0.3335, "step": 33864 }, { "epoch": 0.06004721719110019, "grad_norm": 0.359375, "learning_rate": 0.0015188588231587548, "loss": 0.1873, "step": 33866 }, { "epoch": 0.060050763356410006, "grad_norm": 0.28515625, "learning_rate": 0.0015188061422184173, "loss": 0.1881, "step": 33868 }, { "epoch": 0.06005430952171982, "grad_norm": 0.1796875, "learning_rate": 0.001518753459446465, "loss": 0.1841, "step": 33870 }, { "epoch": 0.060057855687029635, "grad_norm": 0.94140625, "learning_rate": 0.0015187007748431287, "loss": 0.1934, "step": 33872 }, { "epoch": 0.06006140185233945, "grad_norm": 1.25, "learning_rate": 0.0015186480884086379, "loss": 0.1845, "step": 33874 }, { "epoch": 0.060064948017649264, "grad_norm": 0.9453125, "learning_rate": 0.0015185954001432239, "loss": 0.1668, "step": 33876 }, { "epoch": 0.06006849418295908, "grad_norm": 0.171875, "learning_rate": 0.0015185427100471165, "loss": 0.1315, "step": 33878 }, { "epoch": 0.06007204034826889, "grad_norm": 0.578125, "learning_rate": 0.0015184900181205465, "loss": 0.195, "step": 33880 }, { "epoch": 0.06007558651357871, "grad_norm": 0.953125, "learning_rate": 0.001518437324363744, "loss": 0.1963, "step": 33882 }, { "epoch": 0.06007913267888852, "grad_norm": 0.80859375, "learning_rate": 0.00151838462877694, "loss": 0.4066, "step": 33884 }, { "epoch": 0.060082678844198344, "grad_norm": 0.330078125, "learning_rate": 0.0015183319313603644, "loss": 0.2819, "step": 33886 }, { "epoch": 0.06008622500950816, "grad_norm": 0.53515625, "learning_rate": 0.0015182792321142484, "loss": 0.3097, "step": 33888 }, { "epoch": 0.06008977117481797, "grad_norm": 4.21875, "learning_rate": 0.001518226531038822, "loss": 0.2158, "step": 33890 }, { "epoch": 0.06009331734012779, "grad_norm": 0.28515625, "learning_rate": 0.0015181738281343153, "loss": 0.1932, "step": 33892 }, { "epoch": 0.0600968635054376, "grad_norm": 0.298828125, "learning_rate": 0.0015181211234009593, "loss": 0.1992, "step": 33894 }, { "epoch": 0.060100409670747416, "grad_norm": 0.375, "learning_rate": 0.0015180684168389844, "loss": 0.1465, "step": 33896 }, { "epoch": 0.06010395583605723, "grad_norm": 0.2470703125, "learning_rate": 0.0015180157084486211, "loss": 0.2195, "step": 33898 }, { "epoch": 0.060107502001367046, "grad_norm": 0.439453125, "learning_rate": 0.0015179629982300998, "loss": 0.1595, "step": 33900 }, { "epoch": 0.06011104816667686, "grad_norm": 0.466796875, "learning_rate": 0.0015179102861836511, "loss": 0.2757, "step": 33902 }, { "epoch": 0.060114594331986675, "grad_norm": 0.337890625, "learning_rate": 0.0015178575723095058, "loss": 0.2595, "step": 33904 }, { "epoch": 0.06011814049729649, "grad_norm": 0.6953125, "learning_rate": 0.0015178048566078942, "loss": 0.1907, "step": 33906 }, { "epoch": 0.06012168666260631, "grad_norm": 0.9296875, "learning_rate": 0.0015177521390790467, "loss": 0.1921, "step": 33908 }, { "epoch": 0.060125232827916125, "grad_norm": 0.4140625, "learning_rate": 0.001517699419723194, "loss": 0.1847, "step": 33910 }, { "epoch": 0.06012877899322594, "grad_norm": 1.0078125, "learning_rate": 0.0015176466985405666, "loss": 0.1707, "step": 33912 }, { "epoch": 0.060132325158535754, "grad_norm": 1.3046875, "learning_rate": 0.0015175939755313953, "loss": 0.1607, "step": 33914 }, { "epoch": 0.06013587132384557, "grad_norm": 0.4453125, "learning_rate": 0.0015175412506959103, "loss": 0.2688, "step": 33916 }, { "epoch": 0.06013941748915538, "grad_norm": 0.671875, "learning_rate": 0.0015174885240343426, "loss": 0.2525, "step": 33918 }, { "epoch": 0.0601429636544652, "grad_norm": 0.43359375, "learning_rate": 0.0015174357955469225, "loss": 0.216, "step": 33920 }, { "epoch": 0.06014650981977501, "grad_norm": 0.484375, "learning_rate": 0.001517383065233881, "loss": 0.1534, "step": 33922 }, { "epoch": 0.06015005598508483, "grad_norm": 0.359375, "learning_rate": 0.001517330333095448, "loss": 0.2138, "step": 33924 }, { "epoch": 0.06015360215039464, "grad_norm": 0.45703125, "learning_rate": 0.0015172775991318548, "loss": 0.1554, "step": 33926 }, { "epoch": 0.060157148315704456, "grad_norm": 0.9765625, "learning_rate": 0.0015172248633433317, "loss": 0.2329, "step": 33928 }, { "epoch": 0.06016069448101427, "grad_norm": 0.6484375, "learning_rate": 0.0015171721257301093, "loss": 0.2151, "step": 33930 }, { "epoch": 0.06016424064632409, "grad_norm": 1.9921875, "learning_rate": 0.0015171193862924181, "loss": 0.2967, "step": 33932 }, { "epoch": 0.06016778681163391, "grad_norm": 1.796875, "learning_rate": 0.0015170666450304896, "loss": 0.2321, "step": 33934 }, { "epoch": 0.06017133297694372, "grad_norm": 0.3046875, "learning_rate": 0.0015170139019445538, "loss": 0.2119, "step": 33936 }, { "epoch": 0.060174879142253536, "grad_norm": 0.5078125, "learning_rate": 0.001516961157034841, "loss": 0.2121, "step": 33938 }, { "epoch": 0.06017842530756335, "grad_norm": 0.55859375, "learning_rate": 0.0015169084103015824, "loss": 0.1798, "step": 33940 }, { "epoch": 0.060181971472873165, "grad_norm": 0.27734375, "learning_rate": 0.0015168556617450087, "loss": 0.1507, "step": 33942 }, { "epoch": 0.06018551763818298, "grad_norm": 0.232421875, "learning_rate": 0.0015168029113653506, "loss": 0.1495, "step": 33944 }, { "epoch": 0.060189063803492794, "grad_norm": 12.25, "learning_rate": 0.0015167501591628386, "loss": 0.277, "step": 33946 }, { "epoch": 0.06019260996880261, "grad_norm": 1.3828125, "learning_rate": 0.0015166974051377031, "loss": 0.2228, "step": 33948 }, { "epoch": 0.06019615613411242, "grad_norm": 1.2265625, "learning_rate": 0.0015166446492901758, "loss": 0.2798, "step": 33950 }, { "epoch": 0.06019970229942224, "grad_norm": 0.55078125, "learning_rate": 0.0015165918916204863, "loss": 0.252, "step": 33952 }, { "epoch": 0.06020324846473206, "grad_norm": 0.283203125, "learning_rate": 0.0015165391321288661, "loss": 0.1662, "step": 33954 }, { "epoch": 0.060206794630041874, "grad_norm": 0.267578125, "learning_rate": 0.0015164863708155459, "loss": 0.1696, "step": 33956 }, { "epoch": 0.06021034079535169, "grad_norm": 0.31640625, "learning_rate": 0.001516433607680756, "loss": 0.2241, "step": 33958 }, { "epoch": 0.0602138869606615, "grad_norm": 1.140625, "learning_rate": 0.0015163808427247271, "loss": 0.1989, "step": 33960 }, { "epoch": 0.06021743312597132, "grad_norm": 0.62890625, "learning_rate": 0.0015163280759476906, "loss": 0.2271, "step": 33962 }, { "epoch": 0.06022097929128113, "grad_norm": 0.419921875, "learning_rate": 0.001516275307349877, "loss": 0.2162, "step": 33964 }, { "epoch": 0.06022452545659095, "grad_norm": 0.392578125, "learning_rate": 0.001516222536931517, "loss": 0.2406, "step": 33966 }, { "epoch": 0.06022807162190076, "grad_norm": 0.7578125, "learning_rate": 0.0015161697646928409, "loss": 0.2464, "step": 33968 }, { "epoch": 0.060231617787210576, "grad_norm": 0.40234375, "learning_rate": 0.0015161169906340805, "loss": 0.2167, "step": 33970 }, { "epoch": 0.06023516395252039, "grad_norm": 0.203125, "learning_rate": 0.0015160642147554661, "loss": 0.4023, "step": 33972 }, { "epoch": 0.060238710117830205, "grad_norm": 0.2431640625, "learning_rate": 0.0015160114370572284, "loss": 0.1892, "step": 33974 }, { "epoch": 0.060242256283140026, "grad_norm": 0.37890625, "learning_rate": 0.0015159586575395983, "loss": 0.1649, "step": 33976 }, { "epoch": 0.06024580244844984, "grad_norm": 0.37890625, "learning_rate": 0.0015159058762028066, "loss": 0.2155, "step": 33978 }, { "epoch": 0.060249348613759655, "grad_norm": 0.41796875, "learning_rate": 0.001515853093047084, "loss": 0.1429, "step": 33980 }, { "epoch": 0.06025289477906947, "grad_norm": 0.294921875, "learning_rate": 0.0015158003080726619, "loss": 0.1716, "step": 33982 }, { "epoch": 0.060256440944379284, "grad_norm": 0.90625, "learning_rate": 0.0015157475212797709, "loss": 0.1806, "step": 33984 }, { "epoch": 0.0602599871096891, "grad_norm": 0.81640625, "learning_rate": 0.0015156947326686412, "loss": 0.2279, "step": 33986 }, { "epoch": 0.060263533274998914, "grad_norm": 0.56640625, "learning_rate": 0.0015156419422395044, "loss": 0.2468, "step": 33988 }, { "epoch": 0.06026707944030873, "grad_norm": 0.3125, "learning_rate": 0.0015155891499925916, "loss": 0.2239, "step": 33990 }, { "epoch": 0.06027062560561854, "grad_norm": 1.125, "learning_rate": 0.0015155363559281325, "loss": 0.3549, "step": 33992 }, { "epoch": 0.06027417177092836, "grad_norm": 0.369140625, "learning_rate": 0.0015154835600463595, "loss": 0.255, "step": 33994 }, { "epoch": 0.06027771793623817, "grad_norm": 0.326171875, "learning_rate": 0.001515430762347502, "loss": 0.1716, "step": 33996 }, { "epoch": 0.060281264101547986, "grad_norm": 0.93359375, "learning_rate": 0.0015153779628317922, "loss": 0.394, "step": 33998 }, { "epoch": 0.06028481026685781, "grad_norm": 0.45703125, "learning_rate": 0.0015153251614994606, "loss": 0.1607, "step": 34000 }, { "epoch": 0.06028835643216762, "grad_norm": 1.4296875, "learning_rate": 0.0015152723583507376, "loss": 0.1951, "step": 34002 }, { "epoch": 0.06029190259747744, "grad_norm": 1.0546875, "learning_rate": 0.0015152195533858549, "loss": 0.1775, "step": 34004 }, { "epoch": 0.06029544876278725, "grad_norm": 1.3671875, "learning_rate": 0.0015151667466050426, "loss": 0.1756, "step": 34006 }, { "epoch": 0.060298994928097066, "grad_norm": 4.40625, "learning_rate": 0.0015151139380085324, "loss": 0.2326, "step": 34008 }, { "epoch": 0.06030254109340688, "grad_norm": 0.498046875, "learning_rate": 0.0015150611275965547, "loss": 0.2136, "step": 34010 }, { "epoch": 0.060306087258716695, "grad_norm": 0.490234375, "learning_rate": 0.001515008315369341, "loss": 0.2687, "step": 34012 }, { "epoch": 0.06030963342402651, "grad_norm": 0.279296875, "learning_rate": 0.0015149555013271216, "loss": 0.1845, "step": 34014 }, { "epoch": 0.060313179589336324, "grad_norm": 0.306640625, "learning_rate": 0.0015149026854701286, "loss": 0.2654, "step": 34016 }, { "epoch": 0.06031672575464614, "grad_norm": 0.5546875, "learning_rate": 0.0015148498677985919, "loss": 0.1894, "step": 34018 }, { "epoch": 0.06032027191995595, "grad_norm": 0.9453125, "learning_rate": 0.0015147970483127427, "loss": 0.2625, "step": 34020 }, { "epoch": 0.060323818085265775, "grad_norm": 0.279296875, "learning_rate": 0.0015147442270128121, "loss": 0.1874, "step": 34022 }, { "epoch": 0.06032736425057559, "grad_norm": 0.6640625, "learning_rate": 0.0015146914038990315, "loss": 0.1924, "step": 34024 }, { "epoch": 0.060330910415885404, "grad_norm": 0.23046875, "learning_rate": 0.0015146385789716313, "loss": 0.1551, "step": 34026 }, { "epoch": 0.06033445658119522, "grad_norm": 1.453125, "learning_rate": 0.0015145857522308429, "loss": 0.3098, "step": 34028 }, { "epoch": 0.06033800274650503, "grad_norm": 0.439453125, "learning_rate": 0.001514532923676897, "loss": 0.1806, "step": 34030 }, { "epoch": 0.06034154891181485, "grad_norm": 2.15625, "learning_rate": 0.0015144800933100254, "loss": 0.2005, "step": 34032 }, { "epoch": 0.06034509507712466, "grad_norm": 0.77734375, "learning_rate": 0.0015144272611304582, "loss": 0.1806, "step": 34034 }, { "epoch": 0.06034864124243448, "grad_norm": 0.75, "learning_rate": 0.001514374427138427, "loss": 0.1802, "step": 34036 }, { "epoch": 0.06035218740774429, "grad_norm": 0.65625, "learning_rate": 0.001514321591334163, "loss": 0.2183, "step": 34038 }, { "epoch": 0.060355733573054106, "grad_norm": 0.2431640625, "learning_rate": 0.0015142687537178967, "loss": 0.1689, "step": 34040 }, { "epoch": 0.06035927973836392, "grad_norm": 0.98046875, "learning_rate": 0.0015142159142898597, "loss": 0.1868, "step": 34042 }, { "epoch": 0.06036282590367374, "grad_norm": 0.50390625, "learning_rate": 0.0015141630730502826, "loss": 0.2014, "step": 34044 }, { "epoch": 0.060366372068983556, "grad_norm": 0.375, "learning_rate": 0.001514110229999397, "loss": 0.1807, "step": 34046 }, { "epoch": 0.06036991823429337, "grad_norm": 0.88671875, "learning_rate": 0.001514057385137434, "loss": 0.2364, "step": 34048 }, { "epoch": 0.060373464399603186, "grad_norm": 0.361328125, "learning_rate": 0.0015140045384646242, "loss": 0.1413, "step": 34050 }, { "epoch": 0.060377010564913, "grad_norm": 0.32421875, "learning_rate": 0.001513951689981199, "loss": 0.2001, "step": 34052 }, { "epoch": 0.060380556730222815, "grad_norm": 0.29296875, "learning_rate": 0.0015138988396873898, "loss": 0.1857, "step": 34054 }, { "epoch": 0.06038410289553263, "grad_norm": 0.404296875, "learning_rate": 0.0015138459875834274, "loss": 0.2107, "step": 34056 }, { "epoch": 0.060387649060842444, "grad_norm": 0.265625, "learning_rate": 0.001513793133669543, "loss": 0.1201, "step": 34058 }, { "epoch": 0.06039119522615226, "grad_norm": 2.96875, "learning_rate": 0.001513740277945968, "loss": 0.4326, "step": 34060 }, { "epoch": 0.06039474139146207, "grad_norm": 0.1669921875, "learning_rate": 0.001513687420412933, "loss": 0.17, "step": 34062 }, { "epoch": 0.06039828755677189, "grad_norm": 0.33984375, "learning_rate": 0.0015136345610706698, "loss": 0.1742, "step": 34064 }, { "epoch": 0.0604018337220817, "grad_norm": 0.2734375, "learning_rate": 0.0015135816999194093, "loss": 0.2435, "step": 34066 }, { "epoch": 0.060405379887391523, "grad_norm": 0.56640625, "learning_rate": 0.0015135288369593825, "loss": 0.1591, "step": 34068 }, { "epoch": 0.06040892605270134, "grad_norm": 0.306640625, "learning_rate": 0.001513475972190821, "loss": 0.1999, "step": 34070 }, { "epoch": 0.06041247221801115, "grad_norm": 0.37890625, "learning_rate": 0.0015134231056139556, "loss": 0.2328, "step": 34072 }, { "epoch": 0.06041601838332097, "grad_norm": 0.41796875, "learning_rate": 0.0015133702372290177, "loss": 0.189, "step": 34074 }, { "epoch": 0.06041956454863078, "grad_norm": 0.90234375, "learning_rate": 0.0015133173670362386, "loss": 0.1827, "step": 34076 }, { "epoch": 0.060423110713940596, "grad_norm": 0.92578125, "learning_rate": 0.0015132644950358492, "loss": 0.1789, "step": 34078 }, { "epoch": 0.06042665687925041, "grad_norm": 0.388671875, "learning_rate": 0.0015132116212280811, "loss": 0.2465, "step": 34080 }, { "epoch": 0.060430203044560225, "grad_norm": 0.25, "learning_rate": 0.0015131587456131653, "loss": 0.1803, "step": 34082 }, { "epoch": 0.06043374920987004, "grad_norm": 0.3046875, "learning_rate": 0.0015131058681913333, "loss": 0.209, "step": 34084 }, { "epoch": 0.060437295375179854, "grad_norm": 0.59375, "learning_rate": 0.001513052988962816, "loss": 0.2628, "step": 34086 }, { "epoch": 0.06044084154048967, "grad_norm": 2.3125, "learning_rate": 0.0015130001079278449, "loss": 0.2879, "step": 34088 }, { "epoch": 0.06044438770579949, "grad_norm": 0.6171875, "learning_rate": 0.0015129472250866513, "loss": 0.2624, "step": 34090 }, { "epoch": 0.060447933871109305, "grad_norm": 0.1953125, "learning_rate": 0.0015128943404394666, "loss": 0.1649, "step": 34092 }, { "epoch": 0.06045148003641912, "grad_norm": 0.83984375, "learning_rate": 0.0015128414539865216, "loss": 0.2521, "step": 34094 }, { "epoch": 0.060455026201728934, "grad_norm": 0.55859375, "learning_rate": 0.0015127885657280481, "loss": 0.1574, "step": 34096 }, { "epoch": 0.06045857236703875, "grad_norm": 1.3125, "learning_rate": 0.001512735675664277, "loss": 0.2852, "step": 34098 }, { "epoch": 0.06046211853234856, "grad_norm": 0.224609375, "learning_rate": 0.0015126827837954398, "loss": 0.2197, "step": 34100 }, { "epoch": 0.06046566469765838, "grad_norm": 0.2080078125, "learning_rate": 0.0015126298901217679, "loss": 0.1599, "step": 34102 }, { "epoch": 0.06046921086296819, "grad_norm": 0.345703125, "learning_rate": 0.0015125769946434924, "loss": 0.2079, "step": 34104 }, { "epoch": 0.06047275702827801, "grad_norm": 4.15625, "learning_rate": 0.001512524097360845, "loss": 0.258, "step": 34106 }, { "epoch": 0.06047630319358782, "grad_norm": 0.72265625, "learning_rate": 0.0015124711982740565, "loss": 0.1847, "step": 34108 }, { "epoch": 0.060479849358897636, "grad_norm": 0.474609375, "learning_rate": 0.001512418297383359, "loss": 0.2351, "step": 34110 }, { "epoch": 0.06048339552420746, "grad_norm": 0.32421875, "learning_rate": 0.0015123653946889832, "loss": 0.2111, "step": 34112 }, { "epoch": 0.06048694168951727, "grad_norm": 0.37890625, "learning_rate": 0.0015123124901911607, "loss": 0.1839, "step": 34114 }, { "epoch": 0.06049048785482709, "grad_norm": 0.546875, "learning_rate": 0.0015122595838901228, "loss": 0.1635, "step": 34116 }, { "epoch": 0.0604940340201369, "grad_norm": 0.65625, "learning_rate": 0.0015122066757861012, "loss": 0.1856, "step": 34118 }, { "epoch": 0.060497580185446716, "grad_norm": 1.2578125, "learning_rate": 0.0015121537658793268, "loss": 0.3814, "step": 34120 }, { "epoch": 0.06050112635075653, "grad_norm": 0.4296875, "learning_rate": 0.0015121008541700313, "loss": 0.3229, "step": 34122 }, { "epoch": 0.060504672516066345, "grad_norm": 0.458984375, "learning_rate": 0.001512047940658446, "loss": 0.1881, "step": 34124 }, { "epoch": 0.06050821868137616, "grad_norm": 0.29296875, "learning_rate": 0.0015119950253448026, "loss": 0.1305, "step": 34126 }, { "epoch": 0.060511764846685974, "grad_norm": 5.78125, "learning_rate": 0.0015119421082293318, "loss": 0.2737, "step": 34128 }, { "epoch": 0.06051531101199579, "grad_norm": 1.6171875, "learning_rate": 0.001511889189312266, "loss": 0.2569, "step": 34130 }, { "epoch": 0.0605188571773056, "grad_norm": 0.6171875, "learning_rate": 0.001511836268593836, "loss": 0.2814, "step": 34132 }, { "epoch": 0.06052240334261542, "grad_norm": 0.51953125, "learning_rate": 0.0015117833460742735, "loss": 0.2319, "step": 34134 }, { "epoch": 0.06052594950792524, "grad_norm": 0.5703125, "learning_rate": 0.0015117304217538096, "loss": 0.2404, "step": 34136 }, { "epoch": 0.060529495673235054, "grad_norm": 0.59375, "learning_rate": 0.0015116774956326759, "loss": 0.3036, "step": 34138 }, { "epoch": 0.06053304183854487, "grad_norm": 0.609375, "learning_rate": 0.001511624567711104, "loss": 0.1689, "step": 34140 }, { "epoch": 0.06053658800385468, "grad_norm": 1.0703125, "learning_rate": 0.0015115716379893254, "loss": 0.1982, "step": 34142 }, { "epoch": 0.0605401341691645, "grad_norm": 0.4375, "learning_rate": 0.0015115187064675714, "loss": 0.1422, "step": 34144 }, { "epoch": 0.06054368033447431, "grad_norm": 1.1875, "learning_rate": 0.0015114657731460739, "loss": 0.1621, "step": 34146 }, { "epoch": 0.060547226499784126, "grad_norm": 0.263671875, "learning_rate": 0.001511412838025064, "loss": 0.2336, "step": 34148 }, { "epoch": 0.06055077266509394, "grad_norm": 0.365234375, "learning_rate": 0.0015113599011047731, "loss": 0.1732, "step": 34150 }, { "epoch": 0.060554318830403756, "grad_norm": 0.279296875, "learning_rate": 0.0015113069623854335, "loss": 0.2518, "step": 34152 }, { "epoch": 0.06055786499571357, "grad_norm": 0.77734375, "learning_rate": 0.0015112540218672759, "loss": 0.1525, "step": 34154 }, { "epoch": 0.060561411161023385, "grad_norm": 0.4375, "learning_rate": 0.0015112010795505317, "loss": 0.1819, "step": 34156 }, { "epoch": 0.060564957326333206, "grad_norm": 0.3515625, "learning_rate": 0.0015111481354354332, "loss": 0.239, "step": 34158 }, { "epoch": 0.06056850349164302, "grad_norm": 0.486328125, "learning_rate": 0.0015110951895222116, "loss": 0.1717, "step": 34160 }, { "epoch": 0.060572049656952835, "grad_norm": 0.341796875, "learning_rate": 0.0015110422418110985, "loss": 0.175, "step": 34162 }, { "epoch": 0.06057559582226265, "grad_norm": 1.15625, "learning_rate": 0.001510989292302325, "loss": 0.3236, "step": 34164 }, { "epoch": 0.060579141987572464, "grad_norm": 0.94921875, "learning_rate": 0.0015109363409961235, "loss": 0.2762, "step": 34166 }, { "epoch": 0.06058268815288228, "grad_norm": 0.6015625, "learning_rate": 0.001510883387892725, "loss": 0.1782, "step": 34168 }, { "epoch": 0.06058623431819209, "grad_norm": 0.2890625, "learning_rate": 0.0015108304329923613, "loss": 0.1949, "step": 34170 }, { "epoch": 0.06058978048350191, "grad_norm": 0.265625, "learning_rate": 0.0015107774762952637, "loss": 0.2138, "step": 34172 }, { "epoch": 0.06059332664881172, "grad_norm": 0.55859375, "learning_rate": 0.001510724517801664, "loss": 0.1808, "step": 34174 }, { "epoch": 0.06059687281412154, "grad_norm": 0.7578125, "learning_rate": 0.0015106715575117942, "loss": 0.2233, "step": 34176 }, { "epoch": 0.06060041897943135, "grad_norm": 0.443359375, "learning_rate": 0.0015106185954258853, "loss": 0.2484, "step": 34178 }, { "epoch": 0.06060396514474117, "grad_norm": 0.5546875, "learning_rate": 0.0015105656315441691, "loss": 0.225, "step": 34180 }, { "epoch": 0.06060751131005099, "grad_norm": 1.3828125, "learning_rate": 0.0015105126658668774, "loss": 0.2889, "step": 34182 }, { "epoch": 0.0606110574753608, "grad_norm": 0.333984375, "learning_rate": 0.0015104596983942418, "loss": 0.3335, "step": 34184 }, { "epoch": 0.06061460364067062, "grad_norm": 1.3203125, "learning_rate": 0.001510406729126494, "loss": 0.2388, "step": 34186 }, { "epoch": 0.06061814980598043, "grad_norm": 0.265625, "learning_rate": 0.0015103537580638655, "loss": 0.2029, "step": 34188 }, { "epoch": 0.060621695971290246, "grad_norm": 0.69140625, "learning_rate": 0.001510300785206588, "loss": 0.1933, "step": 34190 }, { "epoch": 0.06062524213660006, "grad_norm": 0.259765625, "learning_rate": 0.001510247810554893, "loss": 0.1872, "step": 34192 }, { "epoch": 0.060628788301909875, "grad_norm": 0.96484375, "learning_rate": 0.0015101948341090126, "loss": 0.2337, "step": 34194 }, { "epoch": 0.06063233446721969, "grad_norm": 0.390625, "learning_rate": 0.0015101418558691783, "loss": 0.1762, "step": 34196 }, { "epoch": 0.060635880632529504, "grad_norm": 0.25390625, "learning_rate": 0.0015100888758356214, "loss": 0.1542, "step": 34198 }, { "epoch": 0.06063942679783932, "grad_norm": 0.3671875, "learning_rate": 0.0015100358940085743, "loss": 0.1525, "step": 34200 }, { "epoch": 0.06064297296314913, "grad_norm": 0.6015625, "learning_rate": 0.0015099829103882685, "loss": 0.2375, "step": 34202 }, { "epoch": 0.060646519128458955, "grad_norm": 0.6171875, "learning_rate": 0.0015099299249749353, "loss": 0.2025, "step": 34204 }, { "epoch": 0.06065006529376877, "grad_norm": 0.5390625, "learning_rate": 0.001509876937768807, "loss": 0.2203, "step": 34206 }, { "epoch": 0.060653611459078584, "grad_norm": 0.33984375, "learning_rate": 0.001509823948770115, "loss": 0.2714, "step": 34208 }, { "epoch": 0.0606571576243884, "grad_norm": 0.369140625, "learning_rate": 0.0015097709579790907, "loss": 0.1877, "step": 34210 }, { "epoch": 0.06066070378969821, "grad_norm": 0.6328125, "learning_rate": 0.0015097179653959668, "loss": 0.2006, "step": 34212 }, { "epoch": 0.06066424995500803, "grad_norm": 0.458984375, "learning_rate": 0.001509664971020974, "loss": 0.1893, "step": 34214 }, { "epoch": 0.06066779612031784, "grad_norm": 0.703125, "learning_rate": 0.001509611974854345, "loss": 0.1881, "step": 34216 }, { "epoch": 0.06067134228562766, "grad_norm": 0.212890625, "learning_rate": 0.0015095589768963108, "loss": 0.1801, "step": 34218 }, { "epoch": 0.06067488845093747, "grad_norm": 0.353515625, "learning_rate": 0.0015095059771471039, "loss": 0.2047, "step": 34220 }, { "epoch": 0.060678434616247286, "grad_norm": 0.30078125, "learning_rate": 0.0015094529756069556, "loss": 0.1918, "step": 34222 }, { "epoch": 0.0606819807815571, "grad_norm": 1.1015625, "learning_rate": 0.0015093999722760976, "loss": 0.2197, "step": 34224 }, { "epoch": 0.06068552694686692, "grad_norm": 0.423828125, "learning_rate": 0.0015093469671547623, "loss": 0.164, "step": 34226 }, { "epoch": 0.060689073112176736, "grad_norm": 0.53125, "learning_rate": 0.001509293960243181, "loss": 0.216, "step": 34228 }, { "epoch": 0.06069261927748655, "grad_norm": 0.83984375, "learning_rate": 0.0015092409515415856, "loss": 0.178, "step": 34230 }, { "epoch": 0.060696165442796365, "grad_norm": 0.39453125, "learning_rate": 0.0015091879410502078, "loss": 0.1749, "step": 34232 }, { "epoch": 0.06069971160810618, "grad_norm": 1.3203125, "learning_rate": 0.0015091349287692797, "loss": 0.1363, "step": 34234 }, { "epoch": 0.060703257773415994, "grad_norm": 0.58984375, "learning_rate": 0.0015090819146990335, "loss": 0.2424, "step": 34236 }, { "epoch": 0.06070680393872581, "grad_norm": 0.8046875, "learning_rate": 0.0015090288988397, "loss": 0.179, "step": 34238 }, { "epoch": 0.060710350104035624, "grad_norm": 0.482421875, "learning_rate": 0.0015089758811915122, "loss": 0.1567, "step": 34240 }, { "epoch": 0.06071389626934544, "grad_norm": 0.345703125, "learning_rate": 0.0015089228617547012, "loss": 0.1653, "step": 34242 }, { "epoch": 0.06071744243465525, "grad_norm": 2.140625, "learning_rate": 0.0015088698405294994, "loss": 0.345, "step": 34244 }, { "epoch": 0.06072098859996507, "grad_norm": 0.4609375, "learning_rate": 0.0015088168175161382, "loss": 0.1705, "step": 34246 }, { "epoch": 0.06072453476527489, "grad_norm": 0.546875, "learning_rate": 0.00150876379271485, "loss": 0.2321, "step": 34248 }, { "epoch": 0.0607280809305847, "grad_norm": 0.87109375, "learning_rate": 0.0015087107661258658, "loss": 0.2493, "step": 34250 }, { "epoch": 0.06073162709589452, "grad_norm": 0.365234375, "learning_rate": 0.0015086577377494187, "loss": 0.202, "step": 34252 }, { "epoch": 0.06073517326120433, "grad_norm": 0.55859375, "learning_rate": 0.0015086047075857395, "loss": 0.154, "step": 34254 }, { "epoch": 0.06073871942651415, "grad_norm": 0.6015625, "learning_rate": 0.001508551675635061, "loss": 0.2466, "step": 34256 }, { "epoch": 0.06074226559182396, "grad_norm": 0.2353515625, "learning_rate": 0.0015084986418976147, "loss": 0.1816, "step": 34258 }, { "epoch": 0.060745811757133776, "grad_norm": 0.55078125, "learning_rate": 0.001508445606373633, "loss": 0.1485, "step": 34260 }, { "epoch": 0.06074935792244359, "grad_norm": 0.275390625, "learning_rate": 0.0015083925690633471, "loss": 0.2092, "step": 34262 }, { "epoch": 0.060752904087753405, "grad_norm": 0.28515625, "learning_rate": 0.0015083395299669892, "loss": 0.1285, "step": 34264 }, { "epoch": 0.06075645025306322, "grad_norm": 0.35546875, "learning_rate": 0.0015082864890847915, "loss": 0.2556, "step": 34266 }, { "epoch": 0.060759996418373034, "grad_norm": 0.6875, "learning_rate": 0.001508233446416986, "loss": 0.2296, "step": 34268 }, { "epoch": 0.06076354258368285, "grad_norm": 0.6328125, "learning_rate": 0.0015081804019638045, "loss": 0.1855, "step": 34270 }, { "epoch": 0.06076708874899267, "grad_norm": 0.232421875, "learning_rate": 0.0015081273557254788, "loss": 0.1682, "step": 34272 }, { "epoch": 0.060770634914302485, "grad_norm": 0.326171875, "learning_rate": 0.0015080743077022414, "loss": 0.1619, "step": 34274 }, { "epoch": 0.0607741810796123, "grad_norm": 0.55078125, "learning_rate": 0.0015080212578943237, "loss": 0.1589, "step": 34276 }, { "epoch": 0.060777727244922114, "grad_norm": 0.38671875, "learning_rate": 0.0015079682063019583, "loss": 0.2142, "step": 34278 }, { "epoch": 0.06078127341023193, "grad_norm": 0.314453125, "learning_rate": 0.0015079151529253769, "loss": 0.2577, "step": 34280 }, { "epoch": 0.06078481957554174, "grad_norm": 0.28515625, "learning_rate": 0.0015078620977648117, "loss": 0.1375, "step": 34282 }, { "epoch": 0.06078836574085156, "grad_norm": 0.27734375, "learning_rate": 0.0015078090408204943, "loss": 0.2366, "step": 34284 }, { "epoch": 0.06079191190616137, "grad_norm": 0.640625, "learning_rate": 0.0015077559820926571, "loss": 0.2469, "step": 34286 }, { "epoch": 0.06079545807147119, "grad_norm": 1.7265625, "learning_rate": 0.001507702921581532, "loss": 0.3724, "step": 34288 }, { "epoch": 0.060799004236781, "grad_norm": 0.54296875, "learning_rate": 0.0015076498592873516, "loss": 0.2846, "step": 34290 }, { "epoch": 0.060802550402090816, "grad_norm": 0.51171875, "learning_rate": 0.0015075967952103472, "loss": 0.1489, "step": 34292 }, { "epoch": 0.06080609656740064, "grad_norm": 0.6171875, "learning_rate": 0.001507543729350751, "loss": 0.2349, "step": 34294 }, { "epoch": 0.06080964273271045, "grad_norm": 0.296875, "learning_rate": 0.0015074906617087956, "loss": 0.1639, "step": 34296 }, { "epoch": 0.060813188898020266, "grad_norm": 0.56640625, "learning_rate": 0.0015074375922847123, "loss": 0.1612, "step": 34298 }, { "epoch": 0.06081673506333008, "grad_norm": 1.890625, "learning_rate": 0.0015073845210787343, "loss": 0.2798, "step": 34300 }, { "epoch": 0.060820281228639896, "grad_norm": 0.271484375, "learning_rate": 0.0015073314480910927, "loss": 0.2723, "step": 34302 }, { "epoch": 0.06082382739394971, "grad_norm": 0.94921875, "learning_rate": 0.0015072783733220197, "loss": 0.2183, "step": 34304 }, { "epoch": 0.060827373559259525, "grad_norm": 0.494140625, "learning_rate": 0.0015072252967717479, "loss": 0.1875, "step": 34306 }, { "epoch": 0.06083091972456934, "grad_norm": 0.45703125, "learning_rate": 0.001507172218440509, "loss": 0.1897, "step": 34308 }, { "epoch": 0.060834465889879154, "grad_norm": 0.59765625, "learning_rate": 0.0015071191383285356, "loss": 0.2031, "step": 34310 }, { "epoch": 0.06083801205518897, "grad_norm": 0.2890625, "learning_rate": 0.0015070660564360596, "loss": 0.1898, "step": 34312 }, { "epoch": 0.06084155822049878, "grad_norm": 0.287109375, "learning_rate": 0.001507012972763313, "loss": 0.2115, "step": 34314 }, { "epoch": 0.060845104385808604, "grad_norm": 0.3046875, "learning_rate": 0.0015069598873105282, "loss": 0.2433, "step": 34316 }, { "epoch": 0.06084865055111842, "grad_norm": 0.82421875, "learning_rate": 0.001506906800077937, "loss": 0.1732, "step": 34318 }, { "epoch": 0.060852196716428233, "grad_norm": 0.51953125, "learning_rate": 0.001506853711065772, "loss": 0.2336, "step": 34320 }, { "epoch": 0.06085574288173805, "grad_norm": 0.46875, "learning_rate": 0.0015068006202742653, "loss": 0.185, "step": 34322 }, { "epoch": 0.06085928904704786, "grad_norm": 0.333984375, "learning_rate": 0.0015067475277036487, "loss": 0.231, "step": 34324 }, { "epoch": 0.06086283521235768, "grad_norm": 0.412109375, "learning_rate": 0.0015066944333541547, "loss": 0.1607, "step": 34326 }, { "epoch": 0.06086638137766749, "grad_norm": 0.21875, "learning_rate": 0.0015066413372260157, "loss": 0.1652, "step": 34328 }, { "epoch": 0.060869927542977306, "grad_norm": 1.046875, "learning_rate": 0.0015065882393194637, "loss": 0.2363, "step": 34330 }, { "epoch": 0.06087347370828712, "grad_norm": 0.40234375, "learning_rate": 0.0015065351396347307, "loss": 0.1671, "step": 34332 }, { "epoch": 0.060877019873596935, "grad_norm": 0.59765625, "learning_rate": 0.0015064820381720492, "loss": 0.2462, "step": 34334 }, { "epoch": 0.06088056603890675, "grad_norm": 0.5625, "learning_rate": 0.0015064289349316514, "loss": 0.2213, "step": 34336 }, { "epoch": 0.060884112204216564, "grad_norm": 0.294921875, "learning_rate": 0.0015063758299137696, "loss": 0.1974, "step": 34338 }, { "epoch": 0.060887658369526386, "grad_norm": 0.1826171875, "learning_rate": 0.0015063227231186358, "loss": 0.2979, "step": 34340 }, { "epoch": 0.0608912045348362, "grad_norm": 2.578125, "learning_rate": 0.0015062696145464827, "loss": 0.2572, "step": 34342 }, { "epoch": 0.060894750700146015, "grad_norm": 0.80078125, "learning_rate": 0.001506216504197542, "loss": 0.2297, "step": 34344 }, { "epoch": 0.06089829686545583, "grad_norm": 0.7265625, "learning_rate": 0.0015061633920720463, "loss": 0.2506, "step": 34346 }, { "epoch": 0.060901843030765644, "grad_norm": 0.451171875, "learning_rate": 0.0015061102781702281, "loss": 0.1948, "step": 34348 }, { "epoch": 0.06090538919607546, "grad_norm": 1.2578125, "learning_rate": 0.0015060571624923192, "loss": 0.2515, "step": 34350 }, { "epoch": 0.06090893536138527, "grad_norm": 0.466796875, "learning_rate": 0.0015060040450385523, "loss": 0.282, "step": 34352 }, { "epoch": 0.06091248152669509, "grad_norm": 0.408203125, "learning_rate": 0.0015059509258091594, "loss": 0.1922, "step": 34354 }, { "epoch": 0.0609160276920049, "grad_norm": 1.7890625, "learning_rate": 0.0015058978048043733, "loss": 0.2961, "step": 34356 }, { "epoch": 0.06091957385731472, "grad_norm": 1.1171875, "learning_rate": 0.0015058446820244255, "loss": 0.2125, "step": 34358 }, { "epoch": 0.06092312002262453, "grad_norm": 0.6328125, "learning_rate": 0.001505791557469549, "loss": 0.2456, "step": 34360 }, { "epoch": 0.06092666618793435, "grad_norm": 1.53125, "learning_rate": 0.0015057384311399759, "loss": 0.3092, "step": 34362 }, { "epoch": 0.06093021235324417, "grad_norm": 0.33203125, "learning_rate": 0.0015056853030359387, "loss": 0.272, "step": 34364 }, { "epoch": 0.06093375851855398, "grad_norm": 0.37890625, "learning_rate": 0.0015056321731576696, "loss": 0.183, "step": 34366 }, { "epoch": 0.0609373046838638, "grad_norm": 0.373046875, "learning_rate": 0.001505579041505401, "loss": 0.208, "step": 34368 }, { "epoch": 0.06094085084917361, "grad_norm": 0.69140625, "learning_rate": 0.0015055259080793651, "loss": 0.1941, "step": 34370 }, { "epoch": 0.060944397014483426, "grad_norm": 0.58203125, "learning_rate": 0.0015054727728797946, "loss": 0.1646, "step": 34372 }, { "epoch": 0.06094794317979324, "grad_norm": 0.306640625, "learning_rate": 0.001505419635906922, "loss": 0.2005, "step": 34374 }, { "epoch": 0.060951489345103055, "grad_norm": 0.2373046875, "learning_rate": 0.001505366497160979, "loss": 0.2277, "step": 34376 }, { "epoch": 0.06095503551041287, "grad_norm": 0.265625, "learning_rate": 0.0015053133566421983, "loss": 0.1794, "step": 34378 }, { "epoch": 0.060958581675722684, "grad_norm": 0.37109375, "learning_rate": 0.0015052602143508127, "loss": 0.2272, "step": 34380 }, { "epoch": 0.0609621278410325, "grad_norm": 0.28515625, "learning_rate": 0.0015052070702870541, "loss": 0.1788, "step": 34382 }, { "epoch": 0.06096567400634232, "grad_norm": 2.484375, "learning_rate": 0.0015051539244511553, "loss": 0.1994, "step": 34384 }, { "epoch": 0.060969220171652135, "grad_norm": 0.95703125, "learning_rate": 0.0015051007768433486, "loss": 0.1803, "step": 34386 }, { "epoch": 0.06097276633696195, "grad_norm": 0.8046875, "learning_rate": 0.0015050476274638665, "loss": 0.1937, "step": 34388 }, { "epoch": 0.060976312502271764, "grad_norm": 0.56640625, "learning_rate": 0.001504994476312941, "loss": 0.4065, "step": 34390 }, { "epoch": 0.06097985866758158, "grad_norm": 0.9609375, "learning_rate": 0.0015049413233908053, "loss": 0.2244, "step": 34392 }, { "epoch": 0.06098340483289139, "grad_norm": 0.33203125, "learning_rate": 0.0015048881686976912, "loss": 0.2023, "step": 34394 }, { "epoch": 0.06098695099820121, "grad_norm": 0.4375, "learning_rate": 0.001504835012233832, "loss": 0.2578, "step": 34396 }, { "epoch": 0.06099049716351102, "grad_norm": 0.67578125, "learning_rate": 0.0015047818539994588, "loss": 0.1824, "step": 34398 }, { "epoch": 0.060994043328820836, "grad_norm": 0.2470703125, "learning_rate": 0.0015047286939948055, "loss": 0.2634, "step": 34400 }, { "epoch": 0.06099758949413065, "grad_norm": 5.53125, "learning_rate": 0.0015046755322201033, "loss": 0.1652, "step": 34402 }, { "epoch": 0.061001135659440466, "grad_norm": 0.2890625, "learning_rate": 0.0015046223686755863, "loss": 0.1776, "step": 34404 }, { "epoch": 0.06100468182475028, "grad_norm": 0.3828125, "learning_rate": 0.0015045692033614855, "loss": 0.1928, "step": 34406 }, { "epoch": 0.0610082279900601, "grad_norm": 0.271484375, "learning_rate": 0.001504516036278034, "loss": 0.1548, "step": 34408 }, { "epoch": 0.061011774155369916, "grad_norm": 1.015625, "learning_rate": 0.0015044628674254644, "loss": 0.2158, "step": 34410 }, { "epoch": 0.06101532032067973, "grad_norm": 0.318359375, "learning_rate": 0.0015044096968040092, "loss": 0.1866, "step": 34412 }, { "epoch": 0.061018866485989545, "grad_norm": 0.431640625, "learning_rate": 0.001504356524413901, "loss": 0.1999, "step": 34414 }, { "epoch": 0.06102241265129936, "grad_norm": 0.306640625, "learning_rate": 0.0015043033502553723, "loss": 0.2185, "step": 34416 }, { "epoch": 0.061025958816609174, "grad_norm": 2.890625, "learning_rate": 0.0015042501743286552, "loss": 0.3017, "step": 34418 }, { "epoch": 0.06102950498191899, "grad_norm": 0.4609375, "learning_rate": 0.0015041969966339828, "loss": 0.257, "step": 34420 }, { "epoch": 0.0610330511472288, "grad_norm": 0.236328125, "learning_rate": 0.0015041438171715877, "loss": 0.1876, "step": 34422 }, { "epoch": 0.06103659731253862, "grad_norm": 0.396484375, "learning_rate": 0.001504090635941702, "loss": 0.1703, "step": 34424 }, { "epoch": 0.06104014347784843, "grad_norm": 0.9453125, "learning_rate": 0.0015040374529445588, "loss": 0.1794, "step": 34426 }, { "epoch": 0.06104368964315825, "grad_norm": 0.404296875, "learning_rate": 0.0015039842681803905, "loss": 0.1854, "step": 34428 }, { "epoch": 0.06104723580846807, "grad_norm": 0.3046875, "learning_rate": 0.00150393108164943, "loss": 0.1819, "step": 34430 }, { "epoch": 0.06105078197377788, "grad_norm": 0.416015625, "learning_rate": 0.001503877893351909, "loss": 0.1912, "step": 34432 }, { "epoch": 0.0610543281390877, "grad_norm": 0.51171875, "learning_rate": 0.001503824703288061, "loss": 0.1576, "step": 34434 }, { "epoch": 0.06105787430439751, "grad_norm": 0.30859375, "learning_rate": 0.0015037715114581181, "loss": 0.2359, "step": 34436 }, { "epoch": 0.06106142046970733, "grad_norm": 0.75, "learning_rate": 0.0015037183178623135, "loss": 0.2798, "step": 34438 }, { "epoch": 0.06106496663501714, "grad_norm": 0.6953125, "learning_rate": 0.001503665122500879, "loss": 0.2177, "step": 34440 }, { "epoch": 0.061068512800326956, "grad_norm": 0.5390625, "learning_rate": 0.0015036119253740482, "loss": 0.236, "step": 34442 }, { "epoch": 0.06107205896563677, "grad_norm": 0.80078125, "learning_rate": 0.001503558726482053, "loss": 0.491, "step": 34444 }, { "epoch": 0.061075605130946585, "grad_norm": 0.322265625, "learning_rate": 0.0015035055258251265, "loss": 0.2714, "step": 34446 }, { "epoch": 0.0610791512962564, "grad_norm": 0.69921875, "learning_rate": 0.001503452323403501, "loss": 0.1709, "step": 34448 }, { "epoch": 0.061082697461566214, "grad_norm": 0.34765625, "learning_rate": 0.0015033991192174097, "loss": 0.1825, "step": 34450 }, { "epoch": 0.061086243626876036, "grad_norm": 1.5390625, "learning_rate": 0.0015033459132670847, "loss": 0.1944, "step": 34452 }, { "epoch": 0.06108978979218585, "grad_norm": 0.8515625, "learning_rate": 0.0015032927055527593, "loss": 0.184, "step": 34454 }, { "epoch": 0.061093335957495665, "grad_norm": 0.78515625, "learning_rate": 0.0015032394960746655, "loss": 0.1712, "step": 34456 }, { "epoch": 0.06109688212280548, "grad_norm": 0.380859375, "learning_rate": 0.0015031862848330364, "loss": 0.1529, "step": 34458 }, { "epoch": 0.061100428288115294, "grad_norm": 0.2294921875, "learning_rate": 0.0015031330718281047, "loss": 0.1914, "step": 34460 }, { "epoch": 0.06110397445342511, "grad_norm": 0.60546875, "learning_rate": 0.0015030798570601033, "loss": 0.2384, "step": 34462 }, { "epoch": 0.06110752061873492, "grad_norm": 0.75, "learning_rate": 0.0015030266405292646, "loss": 0.1939, "step": 34464 }, { "epoch": 0.06111106678404474, "grad_norm": 0.34375, "learning_rate": 0.0015029734222358215, "loss": 0.2311, "step": 34466 }, { "epoch": 0.06111461294935455, "grad_norm": 0.828125, "learning_rate": 0.0015029202021800067, "loss": 0.1865, "step": 34468 }, { "epoch": 0.06111815911466437, "grad_norm": 0.609375, "learning_rate": 0.0015028669803620533, "loss": 0.2351, "step": 34470 }, { "epoch": 0.06112170527997418, "grad_norm": 0.265625, "learning_rate": 0.0015028137567821933, "loss": 0.1433, "step": 34472 }, { "epoch": 0.061125251445283996, "grad_norm": 8.5625, "learning_rate": 0.0015027605314406603, "loss": 0.2143, "step": 34474 }, { "epoch": 0.06112879761059382, "grad_norm": 0.341796875, "learning_rate": 0.0015027073043376862, "loss": 0.1764, "step": 34476 }, { "epoch": 0.06113234377590363, "grad_norm": 0.345703125, "learning_rate": 0.0015026540754735048, "loss": 0.1791, "step": 34478 }, { "epoch": 0.061135889941213446, "grad_norm": 0.494140625, "learning_rate": 0.001502600844848348, "loss": 0.1756, "step": 34480 }, { "epoch": 0.06113943610652326, "grad_norm": 0.2734375, "learning_rate": 0.0015025476124624492, "loss": 0.2309, "step": 34482 }, { "epoch": 0.061142982271833075, "grad_norm": 0.4921875, "learning_rate": 0.0015024943783160407, "loss": 0.2502, "step": 34484 }, { "epoch": 0.06114652843714289, "grad_norm": 1.2109375, "learning_rate": 0.0015024411424093558, "loss": 0.2166, "step": 34486 }, { "epoch": 0.061150074602452704, "grad_norm": 0.263671875, "learning_rate": 0.001502387904742627, "loss": 0.1709, "step": 34488 }, { "epoch": 0.06115362076776252, "grad_norm": 0.482421875, "learning_rate": 0.0015023346653160875, "loss": 0.1955, "step": 34490 }, { "epoch": 0.061157166933072334, "grad_norm": 2.984375, "learning_rate": 0.0015022814241299697, "loss": 0.2294, "step": 34492 }, { "epoch": 0.06116071309838215, "grad_norm": 0.427734375, "learning_rate": 0.0015022281811845064, "loss": 0.2518, "step": 34494 }, { "epoch": 0.06116425926369196, "grad_norm": 0.88671875, "learning_rate": 0.0015021749364799307, "loss": 0.2057, "step": 34496 }, { "epoch": 0.061167805429001784, "grad_norm": 0.64453125, "learning_rate": 0.0015021216900164755, "loss": 0.2221, "step": 34498 }, { "epoch": 0.0611713515943116, "grad_norm": 0.443359375, "learning_rate": 0.0015020684417943737, "loss": 0.1698, "step": 34500 }, { "epoch": 0.06117489775962141, "grad_norm": 0.3359375, "learning_rate": 0.001502015191813858, "loss": 0.1485, "step": 34502 }, { "epoch": 0.06117844392493123, "grad_norm": 0.1591796875, "learning_rate": 0.0015019619400751616, "loss": 0.2195, "step": 34504 }, { "epoch": 0.06118199009024104, "grad_norm": 0.69921875, "learning_rate": 0.0015019086865785173, "loss": 0.3035, "step": 34506 }, { "epoch": 0.06118553625555086, "grad_norm": 0.30078125, "learning_rate": 0.0015018554313241572, "loss": 0.3628, "step": 34508 }, { "epoch": 0.06118908242086067, "grad_norm": 1.0390625, "learning_rate": 0.0015018021743123154, "loss": 0.4544, "step": 34510 }, { "epoch": 0.061192628586170486, "grad_norm": 0.3046875, "learning_rate": 0.001501748915543224, "loss": 0.1726, "step": 34512 }, { "epoch": 0.0611961747514803, "grad_norm": 0.25390625, "learning_rate": 0.0015016956550171164, "loss": 0.2081, "step": 34514 }, { "epoch": 0.061199720916790115, "grad_norm": 1.2109375, "learning_rate": 0.0015016423927342252, "loss": 0.2451, "step": 34516 }, { "epoch": 0.06120326708209993, "grad_norm": 0.56640625, "learning_rate": 0.0015015891286947834, "loss": 0.2175, "step": 34518 }, { "epoch": 0.06120681324740975, "grad_norm": 0.51171875, "learning_rate": 0.0015015358628990246, "loss": 0.2116, "step": 34520 }, { "epoch": 0.061210359412719566, "grad_norm": 1.0625, "learning_rate": 0.0015014825953471806, "loss": 0.191, "step": 34522 }, { "epoch": 0.06121390557802938, "grad_norm": 0.458984375, "learning_rate": 0.0015014293260394852, "loss": 0.1777, "step": 34524 }, { "epoch": 0.061217451743339195, "grad_norm": 12.375, "learning_rate": 0.0015013760549761708, "loss": 0.2444, "step": 34526 }, { "epoch": 0.06122099790864901, "grad_norm": 1.3125, "learning_rate": 0.001501322782157471, "loss": 0.2308, "step": 34528 }, { "epoch": 0.061224544073958824, "grad_norm": 0.2490234375, "learning_rate": 0.0015012695075836183, "loss": 0.1894, "step": 34530 }, { "epoch": 0.06122809023926864, "grad_norm": 0.12353515625, "learning_rate": 0.0015012162312548459, "loss": 0.1279, "step": 34532 }, { "epoch": 0.06123163640457845, "grad_norm": 0.365234375, "learning_rate": 0.0015011629531713866, "loss": 0.2134, "step": 34534 }, { "epoch": 0.06123518256988827, "grad_norm": 0.25, "learning_rate": 0.0015011096733334739, "loss": 0.237, "step": 34536 }, { "epoch": 0.06123872873519808, "grad_norm": 2.921875, "learning_rate": 0.00150105639174134, "loss": 0.2756, "step": 34538 }, { "epoch": 0.0612422749005079, "grad_norm": 0.76171875, "learning_rate": 0.001501003108395219, "loss": 0.1535, "step": 34540 }, { "epoch": 0.06124582106581771, "grad_norm": 0.3359375, "learning_rate": 0.001500949823295343, "loss": 0.3248, "step": 34542 }, { "epoch": 0.06124936723112753, "grad_norm": 0.310546875, "learning_rate": 0.0015008965364419456, "loss": 0.1457, "step": 34544 }, { "epoch": 0.06125291339643735, "grad_norm": 1.0859375, "learning_rate": 0.0015008432478352592, "loss": 0.2861, "step": 34546 }, { "epoch": 0.06125645956174716, "grad_norm": 0.69921875, "learning_rate": 0.0015007899574755177, "loss": 0.227, "step": 34548 }, { "epoch": 0.061260005727056976, "grad_norm": 0.5390625, "learning_rate": 0.0015007366653629532, "loss": 0.1863, "step": 34550 }, { "epoch": 0.06126355189236679, "grad_norm": 0.79296875, "learning_rate": 0.0015006833714978, "loss": 0.2456, "step": 34552 }, { "epoch": 0.061267098057676606, "grad_norm": 0.3359375, "learning_rate": 0.00150063007588029, "loss": 0.2015, "step": 34554 }, { "epoch": 0.06127064422298642, "grad_norm": 0.373046875, "learning_rate": 0.0015005767785106568, "loss": 0.1789, "step": 34556 }, { "epoch": 0.061274190388296235, "grad_norm": 0.56640625, "learning_rate": 0.0015005234793891333, "loss": 0.1635, "step": 34558 }, { "epoch": 0.06127773655360605, "grad_norm": 0.263671875, "learning_rate": 0.0015004701785159534, "loss": 0.1848, "step": 34560 }, { "epoch": 0.061281282718915864, "grad_norm": 0.330078125, "learning_rate": 0.0015004168758913491, "loss": 0.2295, "step": 34562 }, { "epoch": 0.06128482888422568, "grad_norm": 0.435546875, "learning_rate": 0.001500363571515554, "loss": 0.181, "step": 34564 }, { "epoch": 0.0612883750495355, "grad_norm": 0.3671875, "learning_rate": 0.0015003102653888011, "loss": 0.1916, "step": 34566 }, { "epoch": 0.061291921214845314, "grad_norm": 0.75, "learning_rate": 0.0015002569575113239, "loss": 0.1759, "step": 34568 }, { "epoch": 0.06129546738015513, "grad_norm": 0.41015625, "learning_rate": 0.001500203647883355, "loss": 0.2176, "step": 34570 }, { "epoch": 0.06129901354546494, "grad_norm": 0.39453125, "learning_rate": 0.0015001503365051278, "loss": 0.21, "step": 34572 }, { "epoch": 0.06130255971077476, "grad_norm": 0.79296875, "learning_rate": 0.0015000970233768757, "loss": 0.4213, "step": 34574 }, { "epoch": 0.06130610587608457, "grad_norm": 0.58984375, "learning_rate": 0.0015000437084988314, "loss": 0.2624, "step": 34576 }, { "epoch": 0.06130965204139439, "grad_norm": 0.71875, "learning_rate": 0.0014999903918712283, "loss": 0.2069, "step": 34578 }, { "epoch": 0.0613131982067042, "grad_norm": 0.93359375, "learning_rate": 0.0014999370734942999, "loss": 0.2187, "step": 34580 }, { "epoch": 0.061316744372014016, "grad_norm": 0.51171875, "learning_rate": 0.0014998837533682788, "loss": 0.1739, "step": 34582 }, { "epoch": 0.06132029053732383, "grad_norm": 0.30078125, "learning_rate": 0.0014998304314933987, "loss": 0.2151, "step": 34584 }, { "epoch": 0.061323836702633645, "grad_norm": 1.515625, "learning_rate": 0.001499777107869892, "loss": 0.1902, "step": 34586 }, { "epoch": 0.06132738286794347, "grad_norm": 0.6484375, "learning_rate": 0.0014997237824979927, "loss": 0.2281, "step": 34588 }, { "epoch": 0.06133092903325328, "grad_norm": 0.515625, "learning_rate": 0.001499670455377934, "loss": 0.1539, "step": 34590 }, { "epoch": 0.061334475198563096, "grad_norm": 0.65234375, "learning_rate": 0.0014996171265099483, "loss": 0.2229, "step": 34592 }, { "epoch": 0.06133802136387291, "grad_norm": 0.380859375, "learning_rate": 0.00149956379589427, "loss": 0.199, "step": 34594 }, { "epoch": 0.061341567529182725, "grad_norm": 0.53125, "learning_rate": 0.0014995104635311314, "loss": 0.2446, "step": 34596 }, { "epoch": 0.06134511369449254, "grad_norm": 0.453125, "learning_rate": 0.0014994571294207665, "loss": 0.1404, "step": 34598 }, { "epoch": 0.061348659859802354, "grad_norm": 0.33203125, "learning_rate": 0.001499403793563408, "loss": 0.2608, "step": 34600 }, { "epoch": 0.06135220602511217, "grad_norm": 0.828125, "learning_rate": 0.001499350455959289, "loss": 0.2032, "step": 34602 }, { "epoch": 0.06135575219042198, "grad_norm": 0.6484375, "learning_rate": 0.0014992971166086432, "loss": 0.1631, "step": 34604 }, { "epoch": 0.0613592983557318, "grad_norm": 0.390625, "learning_rate": 0.001499243775511704, "loss": 0.2224, "step": 34606 }, { "epoch": 0.06136284452104161, "grad_norm": 0.44921875, "learning_rate": 0.0014991904326687043, "loss": 0.1413, "step": 34608 }, { "epoch": 0.06136639068635143, "grad_norm": 0.326171875, "learning_rate": 0.0014991370880798776, "loss": 0.1558, "step": 34610 }, { "epoch": 0.06136993685166125, "grad_norm": 1.4140625, "learning_rate": 0.0014990837417454567, "loss": 0.2008, "step": 34612 }, { "epoch": 0.06137348301697106, "grad_norm": 0.365234375, "learning_rate": 0.001499030393665676, "loss": 0.2262, "step": 34614 }, { "epoch": 0.06137702918228088, "grad_norm": 0.8515625, "learning_rate": 0.0014989770438407677, "loss": 0.2118, "step": 34616 }, { "epoch": 0.06138057534759069, "grad_norm": 0.40625, "learning_rate": 0.0014989236922709657, "loss": 0.2164, "step": 34618 }, { "epoch": 0.06138412151290051, "grad_norm": 0.37109375, "learning_rate": 0.001498870338956503, "loss": 0.1972, "step": 34620 }, { "epoch": 0.06138766767821032, "grad_norm": 1.4921875, "learning_rate": 0.0014988169838976134, "loss": 0.2374, "step": 34622 }, { "epoch": 0.061391213843520136, "grad_norm": 0.65234375, "learning_rate": 0.0014987636270945297, "loss": 0.1705, "step": 34624 }, { "epoch": 0.06139476000882995, "grad_norm": 0.47265625, "learning_rate": 0.0014987102685474858, "loss": 0.1743, "step": 34626 }, { "epoch": 0.061398306174139765, "grad_norm": 0.59765625, "learning_rate": 0.0014986569082567145, "loss": 0.2685, "step": 34628 }, { "epoch": 0.06140185233944958, "grad_norm": 1.203125, "learning_rate": 0.0014986035462224497, "loss": 0.2621, "step": 34630 }, { "epoch": 0.061405398504759394, "grad_norm": 0.6484375, "learning_rate": 0.0014985501824449244, "loss": 0.1588, "step": 34632 }, { "epoch": 0.061408944670069215, "grad_norm": 0.68359375, "learning_rate": 0.0014984968169243723, "loss": 0.4406, "step": 34634 }, { "epoch": 0.06141249083537903, "grad_norm": 0.462890625, "learning_rate": 0.0014984434496610264, "loss": 0.1562, "step": 34636 }, { "epoch": 0.061416037000688845, "grad_norm": 0.4296875, "learning_rate": 0.0014983900806551204, "loss": 0.2989, "step": 34638 }, { "epoch": 0.06141958316599866, "grad_norm": 1.6015625, "learning_rate": 0.0014983367099068875, "loss": 0.2141, "step": 34640 }, { "epoch": 0.061423129331308474, "grad_norm": 0.39453125, "learning_rate": 0.0014982833374165613, "loss": 0.2135, "step": 34642 }, { "epoch": 0.06142667549661829, "grad_norm": 0.34375, "learning_rate": 0.0014982299631843747, "loss": 0.1691, "step": 34644 }, { "epoch": 0.0614302216619281, "grad_norm": 0.3125, "learning_rate": 0.0014981765872105622, "loss": 0.1785, "step": 34646 }, { "epoch": 0.06143376782723792, "grad_norm": 0.435546875, "learning_rate": 0.0014981232094953563, "loss": 0.1763, "step": 34648 }, { "epoch": 0.06143731399254773, "grad_norm": 0.4140625, "learning_rate": 0.0014980698300389908, "loss": 0.1682, "step": 34650 }, { "epoch": 0.061440860157857546, "grad_norm": 0.416015625, "learning_rate": 0.001498016448841699, "loss": 0.2167, "step": 34652 }, { "epoch": 0.06144440632316736, "grad_norm": 0.81640625, "learning_rate": 0.0014979630659037145, "loss": 0.181, "step": 34654 }, { "epoch": 0.06144795248847718, "grad_norm": 1.71875, "learning_rate": 0.0014979096812252708, "loss": 0.217, "step": 34656 }, { "epoch": 0.061451498653787, "grad_norm": 2.28125, "learning_rate": 0.0014978562948066013, "loss": 0.2298, "step": 34658 }, { "epoch": 0.06145504481909681, "grad_norm": 0.357421875, "learning_rate": 0.0014978029066479394, "loss": 0.1449, "step": 34660 }, { "epoch": 0.061458590984406626, "grad_norm": 0.4453125, "learning_rate": 0.0014977495167495184, "loss": 0.1586, "step": 34662 }, { "epoch": 0.06146213714971644, "grad_norm": 0.2490234375, "learning_rate": 0.0014976961251115725, "loss": 0.1415, "step": 34664 }, { "epoch": 0.061465683315026255, "grad_norm": 0.56640625, "learning_rate": 0.0014976427317343346, "loss": 0.2266, "step": 34666 }, { "epoch": 0.06146922948033607, "grad_norm": 0.73828125, "learning_rate": 0.0014975893366180383, "loss": 0.2184, "step": 34668 }, { "epoch": 0.061472775645645884, "grad_norm": 0.353515625, "learning_rate": 0.0014975359397629171, "loss": 0.1466, "step": 34670 }, { "epoch": 0.0614763218109557, "grad_norm": 1.328125, "learning_rate": 0.0014974825411692048, "loss": 0.2423, "step": 34672 }, { "epoch": 0.06147986797626551, "grad_norm": 0.365234375, "learning_rate": 0.0014974291408371347, "loss": 0.1627, "step": 34674 }, { "epoch": 0.06148341414157533, "grad_norm": 0.875, "learning_rate": 0.0014973757387669404, "loss": 0.2008, "step": 34676 }, { "epoch": 0.06148696030688514, "grad_norm": 0.328125, "learning_rate": 0.0014973223349588553, "loss": 0.1922, "step": 34678 }, { "epoch": 0.061490506472194964, "grad_norm": 0.87890625, "learning_rate": 0.0014972689294131135, "loss": 0.1672, "step": 34680 }, { "epoch": 0.06149405263750478, "grad_norm": 2.125, "learning_rate": 0.0014972155221299477, "loss": 0.2697, "step": 34682 }, { "epoch": 0.06149759880281459, "grad_norm": 0.447265625, "learning_rate": 0.001497162113109592, "loss": 0.1629, "step": 34684 }, { "epoch": 0.06150114496812441, "grad_norm": 0.251953125, "learning_rate": 0.00149710870235228, "loss": 0.1873, "step": 34686 }, { "epoch": 0.06150469113343422, "grad_norm": 0.33203125, "learning_rate": 0.0014970552898582452, "loss": 0.3756, "step": 34688 }, { "epoch": 0.06150823729874404, "grad_norm": 0.396484375, "learning_rate": 0.0014970018756277216, "loss": 0.208, "step": 34690 }, { "epoch": 0.06151178346405385, "grad_norm": 0.14453125, "learning_rate": 0.001496948459660942, "loss": 0.1601, "step": 34692 }, { "epoch": 0.061515329629363666, "grad_norm": 0.74609375, "learning_rate": 0.0014968950419581401, "loss": 0.2215, "step": 34694 }, { "epoch": 0.06151887579467348, "grad_norm": 0.9609375, "learning_rate": 0.0014968416225195505, "loss": 0.3586, "step": 34696 }, { "epoch": 0.061522421959983295, "grad_norm": 0.228515625, "learning_rate": 0.0014967882013454058, "loss": 0.1772, "step": 34698 }, { "epoch": 0.06152596812529311, "grad_norm": 0.48828125, "learning_rate": 0.00149673477843594, "loss": 0.1518, "step": 34700 }, { "epoch": 0.06152951429060293, "grad_norm": 0.447265625, "learning_rate": 0.0014966813537913865, "loss": 0.2412, "step": 34702 }, { "epoch": 0.061533060455912746, "grad_norm": 1.421875, "learning_rate": 0.0014966279274119796, "loss": 0.3623, "step": 34704 }, { "epoch": 0.06153660662122256, "grad_norm": 0.50390625, "learning_rate": 0.0014965744992979525, "loss": 0.2091, "step": 34706 }, { "epoch": 0.061540152786532375, "grad_norm": 0.4296875, "learning_rate": 0.0014965210694495388, "loss": 0.1975, "step": 34708 }, { "epoch": 0.06154369895184219, "grad_norm": 0.287109375, "learning_rate": 0.0014964676378669722, "loss": 0.1665, "step": 34710 }, { "epoch": 0.061547245117152004, "grad_norm": 0.94140625, "learning_rate": 0.0014964142045504865, "loss": 0.3113, "step": 34712 }, { "epoch": 0.06155079128246182, "grad_norm": 0.330078125, "learning_rate": 0.0014963607695003152, "loss": 0.3338, "step": 34714 }, { "epoch": 0.06155433744777163, "grad_norm": 0.380859375, "learning_rate": 0.0014963073327166922, "loss": 0.1652, "step": 34716 }, { "epoch": 0.06155788361308145, "grad_norm": 0.4921875, "learning_rate": 0.0014962538941998512, "loss": 0.1986, "step": 34718 }, { "epoch": 0.06156142977839126, "grad_norm": 0.310546875, "learning_rate": 0.0014962004539500258, "loss": 0.1905, "step": 34720 }, { "epoch": 0.06156497594370108, "grad_norm": 0.458984375, "learning_rate": 0.0014961470119674498, "loss": 0.2426, "step": 34722 }, { "epoch": 0.0615685221090109, "grad_norm": 0.3125, "learning_rate": 0.0014960935682523566, "loss": 0.1844, "step": 34724 }, { "epoch": 0.06157206827432071, "grad_norm": 0.431640625, "learning_rate": 0.0014960401228049807, "loss": 0.149, "step": 34726 }, { "epoch": 0.06157561443963053, "grad_norm": 5.21875, "learning_rate": 0.001495986675625555, "loss": 0.5408, "step": 34728 }, { "epoch": 0.06157916060494034, "grad_norm": 0.54296875, "learning_rate": 0.0014959332267143138, "loss": 0.2147, "step": 34730 }, { "epoch": 0.061582706770250156, "grad_norm": 0.86328125, "learning_rate": 0.0014958797760714906, "loss": 0.225, "step": 34732 }, { "epoch": 0.06158625293555997, "grad_norm": 0.37109375, "learning_rate": 0.0014958263236973192, "loss": 0.1772, "step": 34734 }, { "epoch": 0.061589799100869785, "grad_norm": 0.494140625, "learning_rate": 0.0014957728695920332, "loss": 0.1997, "step": 34736 }, { "epoch": 0.0615933452661796, "grad_norm": 0.232421875, "learning_rate": 0.0014957194137558668, "loss": 0.1952, "step": 34738 }, { "epoch": 0.061596891431489414, "grad_norm": 0.8203125, "learning_rate": 0.0014956659561890535, "loss": 0.1859, "step": 34740 }, { "epoch": 0.06160043759679923, "grad_norm": 0.51953125, "learning_rate": 0.001495612496891827, "loss": 0.3235, "step": 34742 }, { "epoch": 0.061603983762109044, "grad_norm": 0.298828125, "learning_rate": 0.0014955590358644213, "loss": 0.213, "step": 34744 }, { "epoch": 0.06160752992741886, "grad_norm": 1.015625, "learning_rate": 0.00149550557310707, "loss": 0.2689, "step": 34746 }, { "epoch": 0.06161107609272868, "grad_norm": 0.384765625, "learning_rate": 0.0014954521086200075, "loss": 0.1958, "step": 34748 }, { "epoch": 0.061614622258038494, "grad_norm": 0.3984375, "learning_rate": 0.001495398642403467, "loss": 0.1578, "step": 34750 }, { "epoch": 0.06161816842334831, "grad_norm": 0.72265625, "learning_rate": 0.0014953451744576824, "loss": 0.1753, "step": 34752 }, { "epoch": 0.06162171458865812, "grad_norm": 0.2333984375, "learning_rate": 0.0014952917047828878, "loss": 0.164, "step": 34754 }, { "epoch": 0.06162526075396794, "grad_norm": 0.498046875, "learning_rate": 0.0014952382333793164, "loss": 0.1998, "step": 34756 }, { "epoch": 0.06162880691927775, "grad_norm": 0.3671875, "learning_rate": 0.001495184760247203, "loss": 0.1611, "step": 34758 }, { "epoch": 0.06163235308458757, "grad_norm": 0.26953125, "learning_rate": 0.0014951312853867809, "loss": 0.1548, "step": 34760 }, { "epoch": 0.06163589924989738, "grad_norm": 0.45703125, "learning_rate": 0.0014950778087982845, "loss": 0.1226, "step": 34762 }, { "epoch": 0.061639445415207196, "grad_norm": 1.0234375, "learning_rate": 0.0014950243304819467, "loss": 0.1984, "step": 34764 }, { "epoch": 0.06164299158051701, "grad_norm": 0.921875, "learning_rate": 0.001494970850438002, "loss": 0.2171, "step": 34766 }, { "epoch": 0.061646537745826825, "grad_norm": 1.1171875, "learning_rate": 0.0014949173686666843, "loss": 0.1799, "step": 34768 }, { "epoch": 0.06165008391113665, "grad_norm": 0.287109375, "learning_rate": 0.0014948638851682277, "loss": 0.192, "step": 34770 }, { "epoch": 0.06165363007644646, "grad_norm": 0.30078125, "learning_rate": 0.0014948103999428656, "loss": 0.1935, "step": 34772 }, { "epoch": 0.061657176241756276, "grad_norm": 0.53515625, "learning_rate": 0.001494756912990832, "loss": 0.1811, "step": 34774 }, { "epoch": 0.06166072240706609, "grad_norm": 0.431640625, "learning_rate": 0.001494703424312361, "loss": 0.1886, "step": 34776 }, { "epoch": 0.061664268572375905, "grad_norm": 0.609375, "learning_rate": 0.0014946499339076867, "loss": 0.1569, "step": 34778 }, { "epoch": 0.06166781473768572, "grad_norm": 0.47265625, "learning_rate": 0.0014945964417770426, "loss": 0.1745, "step": 34780 }, { "epoch": 0.061671360902995534, "grad_norm": 0.66015625, "learning_rate": 0.0014945429479206631, "loss": 0.183, "step": 34782 }, { "epoch": 0.06167490706830535, "grad_norm": 0.33984375, "learning_rate": 0.001494489452338782, "loss": 0.147, "step": 34784 }, { "epoch": 0.06167845323361516, "grad_norm": 4.09375, "learning_rate": 0.0014944359550316329, "loss": 0.2284, "step": 34786 }, { "epoch": 0.06168199939892498, "grad_norm": 0.58984375, "learning_rate": 0.0014943824559994498, "loss": 0.2031, "step": 34788 }, { "epoch": 0.06168554556423479, "grad_norm": 0.33984375, "learning_rate": 0.0014943289552424674, "loss": 0.1411, "step": 34790 }, { "epoch": 0.061689091729544614, "grad_norm": 0.267578125, "learning_rate": 0.0014942754527609192, "loss": 0.1774, "step": 34792 }, { "epoch": 0.06169263789485443, "grad_norm": 0.515625, "learning_rate": 0.0014942219485550392, "loss": 0.1769, "step": 34794 }, { "epoch": 0.06169618406016424, "grad_norm": 0.224609375, "learning_rate": 0.001494168442625061, "loss": 0.139, "step": 34796 }, { "epoch": 0.06169973022547406, "grad_norm": 0.67578125, "learning_rate": 0.0014941149349712193, "loss": 0.1892, "step": 34798 }, { "epoch": 0.06170327639078387, "grad_norm": 0.357421875, "learning_rate": 0.0014940614255937476, "loss": 0.3919, "step": 34800 }, { "epoch": 0.061706822556093686, "grad_norm": 0.376953125, "learning_rate": 0.0014940079144928804, "loss": 0.2716, "step": 34802 }, { "epoch": 0.0617103687214035, "grad_norm": 0.671875, "learning_rate": 0.0014939544016688516, "loss": 0.471, "step": 34804 }, { "epoch": 0.061713914886713316, "grad_norm": 0.5546875, "learning_rate": 0.0014939008871218947, "loss": 0.1717, "step": 34806 }, { "epoch": 0.06171746105202313, "grad_norm": 1.2421875, "learning_rate": 0.0014938473708522443, "loss": 0.1561, "step": 34808 }, { "epoch": 0.061721007217332945, "grad_norm": 3.609375, "learning_rate": 0.001493793852860134, "loss": 0.3465, "step": 34810 }, { "epoch": 0.06172455338264276, "grad_norm": 0.9296875, "learning_rate": 0.0014937403331457984, "loss": 0.1834, "step": 34812 }, { "epoch": 0.061728099547952574, "grad_norm": 0.404296875, "learning_rate": 0.0014936868117094713, "loss": 0.2331, "step": 34814 }, { "epoch": 0.061731645713262395, "grad_norm": 0.52734375, "learning_rate": 0.0014936332885513864, "loss": 0.2051, "step": 34816 }, { "epoch": 0.06173519187857221, "grad_norm": 0.8359375, "learning_rate": 0.0014935797636717787, "loss": 0.1999, "step": 34818 }, { "epoch": 0.061738738043882024, "grad_norm": 3.859375, "learning_rate": 0.0014935262370708814, "loss": 0.1448, "step": 34820 }, { "epoch": 0.06174228420919184, "grad_norm": 0.333984375, "learning_rate": 0.0014934727087489291, "loss": 0.2183, "step": 34822 }, { "epoch": 0.06174583037450165, "grad_norm": 0.423828125, "learning_rate": 0.0014934191787061555, "loss": 0.1729, "step": 34824 }, { "epoch": 0.06174937653981147, "grad_norm": 1.2265625, "learning_rate": 0.001493365646942795, "loss": 0.4009, "step": 34826 }, { "epoch": 0.06175292270512128, "grad_norm": 0.263671875, "learning_rate": 0.0014933121134590817, "loss": 0.1703, "step": 34828 }, { "epoch": 0.0617564688704311, "grad_norm": 0.435546875, "learning_rate": 0.00149325857825525, "loss": 0.1811, "step": 34830 }, { "epoch": 0.06176001503574091, "grad_norm": 1.0078125, "learning_rate": 0.001493205041331533, "loss": 0.2168, "step": 34832 }, { "epoch": 0.061763561201050726, "grad_norm": 3.03125, "learning_rate": 0.0014931515026881661, "loss": 0.4823, "step": 34834 }, { "epoch": 0.06176710736636054, "grad_norm": 0.7109375, "learning_rate": 0.0014930979623253827, "loss": 0.1723, "step": 34836 }, { "epoch": 0.06177065353167036, "grad_norm": 0.734375, "learning_rate": 0.0014930444202434171, "loss": 0.1929, "step": 34838 }, { "epoch": 0.06177419969698018, "grad_norm": 0.546875, "learning_rate": 0.0014929908764425035, "loss": 0.3628, "step": 34840 }, { "epoch": 0.06177774586228999, "grad_norm": 0.84765625, "learning_rate": 0.0014929373309228767, "loss": 0.2019, "step": 34842 }, { "epoch": 0.061781292027599806, "grad_norm": 0.38671875, "learning_rate": 0.0014928837836847696, "loss": 0.2733, "step": 34844 }, { "epoch": 0.06178483819290962, "grad_norm": 0.52734375, "learning_rate": 0.0014928302347284172, "loss": 0.1806, "step": 34846 }, { "epoch": 0.061788384358219435, "grad_norm": 0.419921875, "learning_rate": 0.0014927766840540535, "loss": 0.1568, "step": 34848 }, { "epoch": 0.06179193052352925, "grad_norm": 0.6015625, "learning_rate": 0.0014927231316619128, "loss": 0.2004, "step": 34850 }, { "epoch": 0.061795476688839064, "grad_norm": 4.9375, "learning_rate": 0.0014926695775522292, "loss": 0.3137, "step": 34852 }, { "epoch": 0.06179902285414888, "grad_norm": 0.2080078125, "learning_rate": 0.0014926160217252366, "loss": 0.2168, "step": 34854 }, { "epoch": 0.06180256901945869, "grad_norm": 0.70703125, "learning_rate": 0.0014925624641811703, "loss": 0.2396, "step": 34856 }, { "epoch": 0.06180611518476851, "grad_norm": 0.5703125, "learning_rate": 0.0014925089049202633, "loss": 0.1988, "step": 34858 }, { "epoch": 0.06180966135007833, "grad_norm": 1.6171875, "learning_rate": 0.0014924553439427505, "loss": 0.2242, "step": 34860 }, { "epoch": 0.061813207515388144, "grad_norm": 0.49609375, "learning_rate": 0.0014924017812488661, "loss": 0.1721, "step": 34862 }, { "epoch": 0.06181675368069796, "grad_norm": 0.41015625, "learning_rate": 0.0014923482168388443, "loss": 0.1703, "step": 34864 }, { "epoch": 0.06182029984600777, "grad_norm": 1.15625, "learning_rate": 0.001492294650712919, "loss": 0.1427, "step": 34866 }, { "epoch": 0.06182384601131759, "grad_norm": 0.376953125, "learning_rate": 0.001492241082871325, "loss": 0.3468, "step": 34868 }, { "epoch": 0.0618273921766274, "grad_norm": 0.91015625, "learning_rate": 0.001492187513314296, "loss": 0.2412, "step": 34870 }, { "epoch": 0.06183093834193722, "grad_norm": 0.361328125, "learning_rate": 0.0014921339420420673, "loss": 0.2019, "step": 34872 }, { "epoch": 0.06183448450724703, "grad_norm": 0.34375, "learning_rate": 0.0014920803690548717, "loss": 0.4797, "step": 34874 }, { "epoch": 0.061838030672556846, "grad_norm": 0.76953125, "learning_rate": 0.0014920267943529448, "loss": 0.2345, "step": 34876 }, { "epoch": 0.06184157683786666, "grad_norm": 0.80859375, "learning_rate": 0.0014919732179365203, "loss": 0.1779, "step": 34878 }, { "epoch": 0.061845123003176475, "grad_norm": 0.7109375, "learning_rate": 0.001491919639805833, "loss": 0.1951, "step": 34880 }, { "epoch": 0.06184866916848629, "grad_norm": 0.5078125, "learning_rate": 0.0014918660599611163, "loss": 0.2229, "step": 34882 }, { "epoch": 0.06185221533379611, "grad_norm": 1.578125, "learning_rate": 0.0014918124784026054, "loss": 0.1722, "step": 34884 }, { "epoch": 0.061855761499105925, "grad_norm": 0.390625, "learning_rate": 0.001491758895130534, "loss": 0.1015, "step": 34886 }, { "epoch": 0.06185930766441574, "grad_norm": 0.49609375, "learning_rate": 0.0014917053101451373, "loss": 0.3386, "step": 34888 }, { "epoch": 0.061862853829725555, "grad_norm": 0.46484375, "learning_rate": 0.0014916517234466484, "loss": 0.1611, "step": 34890 }, { "epoch": 0.06186639999503537, "grad_norm": 0.400390625, "learning_rate": 0.001491598135035303, "loss": 0.1656, "step": 34892 }, { "epoch": 0.061869946160345184, "grad_norm": 0.921875, "learning_rate": 0.0014915445449113346, "loss": 0.15, "step": 34894 }, { "epoch": 0.061873492325655, "grad_norm": 1.40625, "learning_rate": 0.0014914909530749777, "loss": 0.2563, "step": 34896 }, { "epoch": 0.06187703849096481, "grad_norm": 0.51171875, "learning_rate": 0.0014914373595264667, "loss": 0.3065, "step": 34898 }, { "epoch": 0.06188058465627463, "grad_norm": 0.80078125, "learning_rate": 0.001491383764266036, "loss": 0.1922, "step": 34900 }, { "epoch": 0.06188413082158444, "grad_norm": 0.27734375, "learning_rate": 0.0014913301672939206, "loss": 0.2454, "step": 34902 }, { "epoch": 0.061887676986894256, "grad_norm": 0.43359375, "learning_rate": 0.0014912765686103538, "loss": 0.2055, "step": 34904 }, { "epoch": 0.06189122315220408, "grad_norm": 0.412109375, "learning_rate": 0.0014912229682155706, "loss": 0.2258, "step": 34906 }, { "epoch": 0.06189476931751389, "grad_norm": 1.2890625, "learning_rate": 0.0014911693661098058, "loss": 0.368, "step": 34908 }, { "epoch": 0.06189831548282371, "grad_norm": 0.15234375, "learning_rate": 0.0014911157622932928, "loss": 0.1522, "step": 34910 }, { "epoch": 0.06190186164813352, "grad_norm": 0.294921875, "learning_rate": 0.0014910621567662673, "loss": 0.1261, "step": 34912 }, { "epoch": 0.061905407813443336, "grad_norm": 0.66796875, "learning_rate": 0.0014910085495289625, "loss": 0.272, "step": 34914 }, { "epoch": 0.06190895397875315, "grad_norm": 0.28125, "learning_rate": 0.0014909549405816137, "loss": 0.1547, "step": 34916 }, { "epoch": 0.061912500144062965, "grad_norm": 0.734375, "learning_rate": 0.0014909013299244556, "loss": 0.17, "step": 34918 }, { "epoch": 0.06191604630937278, "grad_norm": 0.5234375, "learning_rate": 0.0014908477175577216, "loss": 0.1807, "step": 34920 }, { "epoch": 0.061919592474682594, "grad_norm": 0.3515625, "learning_rate": 0.0014907941034816466, "loss": 0.1939, "step": 34922 }, { "epoch": 0.06192313863999241, "grad_norm": 0.57421875, "learning_rate": 0.0014907404876964652, "loss": 0.2025, "step": 34924 }, { "epoch": 0.06192668480530222, "grad_norm": 0.3828125, "learning_rate": 0.001490686870202412, "loss": 0.1695, "step": 34926 }, { "epoch": 0.061930230970612045, "grad_norm": 0.90625, "learning_rate": 0.0014906332509997213, "loss": 0.3066, "step": 34928 }, { "epoch": 0.06193377713592186, "grad_norm": 0.48828125, "learning_rate": 0.0014905796300886279, "loss": 0.2318, "step": 34930 }, { "epoch": 0.061937323301231674, "grad_norm": 0.4609375, "learning_rate": 0.0014905260074693659, "loss": 0.2111, "step": 34932 }, { "epoch": 0.06194086946654149, "grad_norm": 0.44921875, "learning_rate": 0.00149047238314217, "loss": 0.5131, "step": 34934 }, { "epoch": 0.0619444156318513, "grad_norm": 0.2216796875, "learning_rate": 0.001490418757107275, "loss": 0.1889, "step": 34936 }, { "epoch": 0.06194796179716112, "grad_norm": 0.39453125, "learning_rate": 0.0014903651293649148, "loss": 0.1632, "step": 34938 }, { "epoch": 0.06195150796247093, "grad_norm": 0.4375, "learning_rate": 0.0014903114999153242, "loss": 0.2092, "step": 34940 }, { "epoch": 0.06195505412778075, "grad_norm": 0.220703125, "learning_rate": 0.001490257868758738, "loss": 0.1987, "step": 34942 }, { "epoch": 0.06195860029309056, "grad_norm": 4.53125, "learning_rate": 0.0014902042358953904, "loss": 0.256, "step": 34944 }, { "epoch": 0.061962146458400376, "grad_norm": 0.875, "learning_rate": 0.0014901506013255165, "loss": 0.2012, "step": 34946 }, { "epoch": 0.06196569262371019, "grad_norm": 0.6328125, "learning_rate": 0.0014900969650493501, "loss": 0.1424, "step": 34948 }, { "epoch": 0.061969238789020005, "grad_norm": 0.5234375, "learning_rate": 0.0014900433270671263, "loss": 0.1572, "step": 34950 }, { "epoch": 0.061972784954329826, "grad_norm": 0.63671875, "learning_rate": 0.0014899896873790798, "loss": 0.3867, "step": 34952 }, { "epoch": 0.06197633111963964, "grad_norm": 0.94140625, "learning_rate": 0.001489936045985445, "loss": 0.181, "step": 34954 }, { "epoch": 0.061979877284949456, "grad_norm": 0.349609375, "learning_rate": 0.0014898824028864558, "loss": 0.2172, "step": 34956 }, { "epoch": 0.06198342345025927, "grad_norm": 0.77734375, "learning_rate": 0.0014898287580823479, "loss": 0.2898, "step": 34958 }, { "epoch": 0.061986969615569085, "grad_norm": 0.318359375, "learning_rate": 0.0014897751115733554, "loss": 0.1683, "step": 34960 }, { "epoch": 0.0619905157808789, "grad_norm": 0.37890625, "learning_rate": 0.0014897214633597126, "loss": 0.1707, "step": 34962 }, { "epoch": 0.061994061946188714, "grad_norm": 1.0234375, "learning_rate": 0.0014896678134416548, "loss": 0.2218, "step": 34964 }, { "epoch": 0.06199760811149853, "grad_norm": 0.345703125, "learning_rate": 0.0014896141618194166, "loss": 0.1509, "step": 34966 }, { "epoch": 0.06200115427680834, "grad_norm": 0.6953125, "learning_rate": 0.001489560508493232, "loss": 0.2277, "step": 34968 }, { "epoch": 0.06200470044211816, "grad_norm": 0.16015625, "learning_rate": 0.0014895068534633363, "loss": 0.1865, "step": 34970 }, { "epoch": 0.06200824660742797, "grad_norm": 0.98828125, "learning_rate": 0.0014894531967299639, "loss": 0.2356, "step": 34972 }, { "epoch": 0.062011792772737793, "grad_norm": 1.59375, "learning_rate": 0.0014893995382933489, "loss": 0.497, "step": 34974 }, { "epoch": 0.06201533893804761, "grad_norm": 0.341796875, "learning_rate": 0.0014893458781537268, "loss": 0.1718, "step": 34976 }, { "epoch": 0.06201888510335742, "grad_norm": 0.189453125, "learning_rate": 0.0014892922163113323, "loss": 0.1593, "step": 34978 }, { "epoch": 0.06202243126866724, "grad_norm": 0.84375, "learning_rate": 0.0014892385527663994, "loss": 0.1671, "step": 34980 }, { "epoch": 0.06202597743397705, "grad_norm": 0.21484375, "learning_rate": 0.0014891848875191634, "loss": 0.1733, "step": 34982 }, { "epoch": 0.062029523599286866, "grad_norm": 0.265625, "learning_rate": 0.0014891312205698587, "loss": 0.1736, "step": 34984 }, { "epoch": 0.06203306976459668, "grad_norm": 0.45703125, "learning_rate": 0.0014890775519187198, "loss": 0.1702, "step": 34986 }, { "epoch": 0.062036615929906495, "grad_norm": 0.337890625, "learning_rate": 0.001489023881565982, "loss": 0.1582, "step": 34988 }, { "epoch": 0.06204016209521631, "grad_norm": 0.1572265625, "learning_rate": 0.0014889702095118797, "loss": 0.1041, "step": 34990 }, { "epoch": 0.062043708260526124, "grad_norm": 0.890625, "learning_rate": 0.0014889165357566477, "loss": 0.2486, "step": 34992 }, { "epoch": 0.06204725442583594, "grad_norm": 1.0859375, "learning_rate": 0.0014888628603005206, "loss": 0.2016, "step": 34994 }, { "epoch": 0.06205080059114576, "grad_norm": 0.322265625, "learning_rate": 0.0014888091831437328, "loss": 0.1644, "step": 34996 }, { "epoch": 0.062054346756455575, "grad_norm": 0.8671875, "learning_rate": 0.0014887555042865201, "loss": 0.1793, "step": 34998 }, { "epoch": 0.06205789292176539, "grad_norm": 1.03125, "learning_rate": 0.0014887018237291164, "loss": 0.206, "step": 35000 }, { "epoch": 0.062061439087075204, "grad_norm": 0.328125, "learning_rate": 0.001488648141471757, "loss": 0.2532, "step": 35002 }, { "epoch": 0.06206498525238502, "grad_norm": 0.349609375, "learning_rate": 0.0014885944575146757, "loss": 0.1634, "step": 35004 }, { "epoch": 0.06206853141769483, "grad_norm": 0.302734375, "learning_rate": 0.0014885407718581082, "loss": 0.1815, "step": 35006 }, { "epoch": 0.06207207758300465, "grad_norm": 0.6484375, "learning_rate": 0.0014884870845022896, "loss": 0.2531, "step": 35008 }, { "epoch": 0.06207562374831446, "grad_norm": 0.37890625, "learning_rate": 0.0014884333954474538, "loss": 0.1742, "step": 35010 }, { "epoch": 0.06207916991362428, "grad_norm": 1.8125, "learning_rate": 0.0014883797046938358, "loss": 0.2339, "step": 35012 }, { "epoch": 0.06208271607893409, "grad_norm": 0.6171875, "learning_rate": 0.0014883260122416707, "loss": 0.2188, "step": 35014 }, { "epoch": 0.062086262244243906, "grad_norm": 0.326171875, "learning_rate": 0.0014882723180911933, "loss": 0.1286, "step": 35016 }, { "epoch": 0.06208980840955372, "grad_norm": 0.54296875, "learning_rate": 0.001488218622242638, "loss": 0.1976, "step": 35018 }, { "epoch": 0.06209335457486354, "grad_norm": 0.31640625, "learning_rate": 0.0014881649246962403, "loss": 0.2099, "step": 35020 }, { "epoch": 0.06209690074017336, "grad_norm": 0.298828125, "learning_rate": 0.0014881112254522344, "loss": 0.2442, "step": 35022 }, { "epoch": 0.06210044690548317, "grad_norm": 0.83984375, "learning_rate": 0.0014880575245108559, "loss": 0.2699, "step": 35024 }, { "epoch": 0.062103993070792986, "grad_norm": 1.265625, "learning_rate": 0.0014880038218723385, "loss": 0.2267, "step": 35026 }, { "epoch": 0.0621075392361028, "grad_norm": 0.55859375, "learning_rate": 0.0014879501175369184, "loss": 0.2197, "step": 35028 }, { "epoch": 0.062111085401412615, "grad_norm": 0.5546875, "learning_rate": 0.0014878964115048296, "loss": 0.2088, "step": 35030 }, { "epoch": 0.06211463156672243, "grad_norm": 0.2138671875, "learning_rate": 0.0014878427037763073, "loss": 0.1558, "step": 35032 }, { "epoch": 0.062118177732032244, "grad_norm": 0.361328125, "learning_rate": 0.0014877889943515862, "loss": 0.1718, "step": 35034 }, { "epoch": 0.06212172389734206, "grad_norm": 0.70703125, "learning_rate": 0.0014877352832309012, "loss": 0.2185, "step": 35036 }, { "epoch": 0.06212527006265187, "grad_norm": 0.76171875, "learning_rate": 0.0014876815704144872, "loss": 0.164, "step": 35038 }, { "epoch": 0.06212881622796169, "grad_norm": 0.47265625, "learning_rate": 0.0014876278559025795, "loss": 0.2216, "step": 35040 }, { "epoch": 0.06213236239327151, "grad_norm": 0.361328125, "learning_rate": 0.0014875741396954125, "loss": 0.1654, "step": 35042 }, { "epoch": 0.062135908558581324, "grad_norm": 0.306640625, "learning_rate": 0.0014875204217932214, "loss": 0.17, "step": 35044 }, { "epoch": 0.06213945472389114, "grad_norm": 0.6328125, "learning_rate": 0.0014874667021962413, "loss": 0.1626, "step": 35046 }, { "epoch": 0.06214300088920095, "grad_norm": 0.357421875, "learning_rate": 0.001487412980904707, "loss": 0.1793, "step": 35048 }, { "epoch": 0.06214654705451077, "grad_norm": 0.5390625, "learning_rate": 0.001487359257918853, "loss": 0.1741, "step": 35050 }, { "epoch": 0.06215009321982058, "grad_norm": 0.3203125, "learning_rate": 0.0014873055332389148, "loss": 0.2394, "step": 35052 }, { "epoch": 0.062153639385130396, "grad_norm": 0.2060546875, "learning_rate": 0.0014872518068651268, "loss": 0.212, "step": 35054 }, { "epoch": 0.06215718555044021, "grad_norm": 0.1923828125, "learning_rate": 0.0014871980787977248, "loss": 0.2122, "step": 35056 }, { "epoch": 0.062160731715750026, "grad_norm": 1.28125, "learning_rate": 0.001487144349036943, "loss": 0.4389, "step": 35058 }, { "epoch": 0.06216427788105984, "grad_norm": 0.33203125, "learning_rate": 0.0014870906175830168, "loss": 0.2401, "step": 35060 }, { "epoch": 0.062167824046369655, "grad_norm": 0.484375, "learning_rate": 0.001487036884436181, "loss": 0.3665, "step": 35062 }, { "epoch": 0.062171370211679476, "grad_norm": 0.36328125, "learning_rate": 0.001486983149596671, "loss": 0.3, "step": 35064 }, { "epoch": 0.06217491637698929, "grad_norm": 0.2470703125, "learning_rate": 0.0014869294130647212, "loss": 0.1655, "step": 35066 }, { "epoch": 0.062178462542299105, "grad_norm": 1.0390625, "learning_rate": 0.0014868756748405672, "loss": 0.2129, "step": 35068 }, { "epoch": 0.06218200870760892, "grad_norm": 0.275390625, "learning_rate": 0.0014868219349244434, "loss": 0.2032, "step": 35070 }, { "epoch": 0.062185554872918734, "grad_norm": 0.318359375, "learning_rate": 0.0014867681933165853, "loss": 0.2293, "step": 35072 }, { "epoch": 0.06218910103822855, "grad_norm": 0.384765625, "learning_rate": 0.0014867144500172278, "loss": 0.1492, "step": 35074 }, { "epoch": 0.06219264720353836, "grad_norm": 0.271484375, "learning_rate": 0.0014866607050266058, "loss": 0.1853, "step": 35076 }, { "epoch": 0.06219619336884818, "grad_norm": 2.203125, "learning_rate": 0.0014866069583449547, "loss": 0.171, "step": 35078 }, { "epoch": 0.06219973953415799, "grad_norm": 0.89453125, "learning_rate": 0.0014865532099725093, "loss": 0.1527, "step": 35080 }, { "epoch": 0.06220328569946781, "grad_norm": 0.60546875, "learning_rate": 0.0014864994599095042, "loss": 0.3475, "step": 35082 }, { "epoch": 0.06220683186477762, "grad_norm": 0.5, "learning_rate": 0.0014864457081561755, "loss": 0.2288, "step": 35084 }, { "epoch": 0.062210378030087436, "grad_norm": 0.33203125, "learning_rate": 0.0014863919547127577, "loss": 0.1631, "step": 35086 }, { "epoch": 0.06221392419539726, "grad_norm": 0.408203125, "learning_rate": 0.001486338199579486, "loss": 0.2385, "step": 35088 }, { "epoch": 0.06221747036070707, "grad_norm": 0.80859375, "learning_rate": 0.001486284442756595, "loss": 0.2086, "step": 35090 }, { "epoch": 0.06222101652601689, "grad_norm": 0.546875, "learning_rate": 0.0014862306842443206, "loss": 0.1674, "step": 35092 }, { "epoch": 0.0622245626913267, "grad_norm": 0.453125, "learning_rate": 0.0014861769240428972, "loss": 0.2327, "step": 35094 }, { "epoch": 0.062228108856636516, "grad_norm": 0.28515625, "learning_rate": 0.0014861231621525607, "loss": 0.1948, "step": 35096 }, { "epoch": 0.06223165502194633, "grad_norm": 0.90234375, "learning_rate": 0.0014860693985735455, "loss": 0.1888, "step": 35098 }, { "epoch": 0.062235201187256145, "grad_norm": 0.83203125, "learning_rate": 0.0014860156333060869, "loss": 0.158, "step": 35100 }, { "epoch": 0.06223874735256596, "grad_norm": 0.28125, "learning_rate": 0.0014859618663504205, "loss": 0.2115, "step": 35102 }, { "epoch": 0.062242293517875774, "grad_norm": 1.0390625, "learning_rate": 0.001485908097706781, "loss": 0.2001, "step": 35104 }, { "epoch": 0.06224583968318559, "grad_norm": 0.427734375, "learning_rate": 0.0014858543273754035, "loss": 0.1844, "step": 35106 }, { "epoch": 0.0622493858484954, "grad_norm": 0.34375, "learning_rate": 0.0014858005553565231, "loss": 0.2336, "step": 35108 }, { "epoch": 0.062252932013805225, "grad_norm": 0.29296875, "learning_rate": 0.0014857467816503755, "loss": 0.2058, "step": 35110 }, { "epoch": 0.06225647817911504, "grad_norm": 0.55078125, "learning_rate": 0.0014856930062571957, "loss": 0.1788, "step": 35112 }, { "epoch": 0.062260024344424854, "grad_norm": 0.23828125, "learning_rate": 0.0014856392291772182, "loss": 0.1794, "step": 35114 }, { "epoch": 0.06226357050973467, "grad_norm": 0.322265625, "learning_rate": 0.001485585450410679, "loss": 0.3101, "step": 35116 }, { "epoch": 0.06226711667504448, "grad_norm": 0.76171875, "learning_rate": 0.001485531669957813, "loss": 0.2033, "step": 35118 }, { "epoch": 0.0622706628403543, "grad_norm": 0.330078125, "learning_rate": 0.0014854778878188556, "loss": 0.2024, "step": 35120 }, { "epoch": 0.06227420900566411, "grad_norm": 0.5703125, "learning_rate": 0.0014854241039940416, "loss": 0.1736, "step": 35122 }, { "epoch": 0.06227775517097393, "grad_norm": 0.3359375, "learning_rate": 0.0014853703184836064, "loss": 0.1448, "step": 35124 }, { "epoch": 0.06228130133628374, "grad_norm": 1.296875, "learning_rate": 0.0014853165312877857, "loss": 0.2654, "step": 35126 }, { "epoch": 0.062284847501593556, "grad_norm": 0.388671875, "learning_rate": 0.0014852627424068138, "loss": 0.2421, "step": 35128 }, { "epoch": 0.06228839366690337, "grad_norm": 0.380859375, "learning_rate": 0.0014852089518409266, "loss": 0.1682, "step": 35130 }, { "epoch": 0.06229193983221319, "grad_norm": 0.6328125, "learning_rate": 0.001485155159590359, "loss": 0.2994, "step": 35132 }, { "epoch": 0.062295485997523006, "grad_norm": 0.31640625, "learning_rate": 0.001485101365655347, "loss": 0.2161, "step": 35134 }, { "epoch": 0.06229903216283282, "grad_norm": 0.59765625, "learning_rate": 0.0014850475700361248, "loss": 0.3094, "step": 35136 }, { "epoch": 0.062302578328142635, "grad_norm": 1.140625, "learning_rate": 0.0014849937727329286, "loss": 0.1759, "step": 35138 }, { "epoch": 0.06230612449345245, "grad_norm": 0.48046875, "learning_rate": 0.0014849399737459931, "loss": 0.1888, "step": 35140 }, { "epoch": 0.062309670658762265, "grad_norm": 0.2734375, "learning_rate": 0.001484886173075554, "loss": 0.2161, "step": 35142 }, { "epoch": 0.06231321682407208, "grad_norm": 0.9921875, "learning_rate": 0.001484832370721846, "loss": 0.2459, "step": 35144 }, { "epoch": 0.062316762989381894, "grad_norm": 0.29296875, "learning_rate": 0.0014847785666851048, "loss": 0.2351, "step": 35146 }, { "epoch": 0.06232030915469171, "grad_norm": 0.55859375, "learning_rate": 0.0014847247609655657, "loss": 0.1642, "step": 35148 }, { "epoch": 0.06232385532000152, "grad_norm": 0.232421875, "learning_rate": 0.0014846709535634639, "loss": 0.1752, "step": 35150 }, { "epoch": 0.06232740148531134, "grad_norm": 0.5703125, "learning_rate": 0.0014846171444790348, "loss": 0.361, "step": 35152 }, { "epoch": 0.06233094765062115, "grad_norm": 0.302734375, "learning_rate": 0.0014845633337125137, "loss": 0.2161, "step": 35154 }, { "epoch": 0.06233449381593097, "grad_norm": 0.30859375, "learning_rate": 0.0014845095212641362, "loss": 0.1494, "step": 35156 }, { "epoch": 0.06233803998124079, "grad_norm": 0.3125, "learning_rate": 0.0014844557071341375, "loss": 0.1965, "step": 35158 }, { "epoch": 0.0623415861465506, "grad_norm": 1.4140625, "learning_rate": 0.0014844018913227524, "loss": 0.2168, "step": 35160 }, { "epoch": 0.06234513231186042, "grad_norm": 0.283203125, "learning_rate": 0.001484348073830217, "loss": 0.1812, "step": 35162 }, { "epoch": 0.06234867847717023, "grad_norm": 0.400390625, "learning_rate": 0.001484294254656766, "loss": 0.1374, "step": 35164 }, { "epoch": 0.062352224642480046, "grad_norm": 0.75390625, "learning_rate": 0.0014842404338026357, "loss": 0.1904, "step": 35166 }, { "epoch": 0.06235577080778986, "grad_norm": 2.015625, "learning_rate": 0.0014841866112680607, "loss": 0.2592, "step": 35168 }, { "epoch": 0.062359316973099675, "grad_norm": 0.89453125, "learning_rate": 0.0014841327870532767, "loss": 0.1901, "step": 35170 }, { "epoch": 0.06236286313840949, "grad_norm": 0.451171875, "learning_rate": 0.0014840789611585189, "loss": 0.2195, "step": 35172 }, { "epoch": 0.062366409303719304, "grad_norm": 0.69140625, "learning_rate": 0.0014840251335840225, "loss": 0.2031, "step": 35174 }, { "epoch": 0.06236995546902912, "grad_norm": 0.255859375, "learning_rate": 0.0014839713043300236, "loss": 0.1687, "step": 35176 }, { "epoch": 0.06237350163433894, "grad_norm": 0.54296875, "learning_rate": 0.0014839174733967573, "loss": 0.2125, "step": 35178 }, { "epoch": 0.062377047799648755, "grad_norm": 0.984375, "learning_rate": 0.0014838636407844591, "loss": 0.2542, "step": 35180 }, { "epoch": 0.06238059396495857, "grad_norm": 0.76171875, "learning_rate": 0.001483809806493364, "loss": 0.2222, "step": 35182 }, { "epoch": 0.062384140130268384, "grad_norm": 0.42578125, "learning_rate": 0.0014837559705237079, "loss": 0.223, "step": 35184 }, { "epoch": 0.0623876862955782, "grad_norm": 0.21484375, "learning_rate": 0.001483702132875726, "loss": 0.1674, "step": 35186 }, { "epoch": 0.06239123246088801, "grad_norm": 0.98046875, "learning_rate": 0.001483648293549654, "loss": 0.2395, "step": 35188 }, { "epoch": 0.06239477862619783, "grad_norm": 0.380859375, "learning_rate": 0.0014835944525457272, "loss": 0.147, "step": 35190 }, { "epoch": 0.06239832479150764, "grad_norm": 1.8984375, "learning_rate": 0.001483540609864181, "loss": 0.2589, "step": 35192 }, { "epoch": 0.06240187095681746, "grad_norm": 0.357421875, "learning_rate": 0.0014834867655052508, "loss": 0.2185, "step": 35194 }, { "epoch": 0.06240541712212727, "grad_norm": 0.353515625, "learning_rate": 0.0014834329194691725, "loss": 0.1883, "step": 35196 }, { "epoch": 0.062408963287437086, "grad_norm": 0.87109375, "learning_rate": 0.0014833790717561815, "loss": 0.1837, "step": 35198 }, { "epoch": 0.06241250945274691, "grad_norm": 0.76953125, "learning_rate": 0.0014833252223665128, "loss": 0.2438, "step": 35200 }, { "epoch": 0.06241605561805672, "grad_norm": 0.1806640625, "learning_rate": 0.0014832713713004023, "loss": 0.2045, "step": 35202 }, { "epoch": 0.062419601783366536, "grad_norm": 0.35546875, "learning_rate": 0.0014832175185580856, "loss": 0.1637, "step": 35204 }, { "epoch": 0.06242314794867635, "grad_norm": 1.2109375, "learning_rate": 0.0014831636641397979, "loss": 0.2046, "step": 35206 }, { "epoch": 0.062426694113986166, "grad_norm": 0.466796875, "learning_rate": 0.001483109808045775, "loss": 0.1538, "step": 35208 }, { "epoch": 0.06243024027929598, "grad_norm": 0.375, "learning_rate": 0.0014830559502762523, "loss": 0.2567, "step": 35210 }, { "epoch": 0.062433786444605795, "grad_norm": 0.396484375, "learning_rate": 0.0014830020908314652, "loss": 0.1658, "step": 35212 }, { "epoch": 0.06243733260991561, "grad_norm": 0.20703125, "learning_rate": 0.0014829482297116497, "loss": 0.1711, "step": 35214 }, { "epoch": 0.062440878775225424, "grad_norm": 0.9609375, "learning_rate": 0.001482894366917041, "loss": 0.2109, "step": 35216 }, { "epoch": 0.06244442494053524, "grad_norm": 0.275390625, "learning_rate": 0.0014828405024478748, "loss": 0.2354, "step": 35218 }, { "epoch": 0.06244797110584505, "grad_norm": 1.4375, "learning_rate": 0.0014827866363043864, "loss": 0.252, "step": 35220 }, { "epoch": 0.06245151727115487, "grad_norm": 1.046875, "learning_rate": 0.0014827327684868118, "loss": 0.1767, "step": 35222 }, { "epoch": 0.06245506343646469, "grad_norm": 0.2734375, "learning_rate": 0.0014826788989953862, "loss": 0.1924, "step": 35224 }, { "epoch": 0.062458609601774503, "grad_norm": 0.5390625, "learning_rate": 0.0014826250278303457, "loss": 0.1857, "step": 35226 }, { "epoch": 0.06246215576708432, "grad_norm": 0.224609375, "learning_rate": 0.0014825711549919253, "loss": 0.1837, "step": 35228 }, { "epoch": 0.06246570193239413, "grad_norm": 1.3359375, "learning_rate": 0.001482517280480361, "loss": 0.2279, "step": 35230 }, { "epoch": 0.06246924809770395, "grad_norm": 3.390625, "learning_rate": 0.0014824634042958885, "loss": 0.3722, "step": 35232 }, { "epoch": 0.06247279426301376, "grad_norm": 0.294921875, "learning_rate": 0.0014824095264387431, "loss": 0.1633, "step": 35234 }, { "epoch": 0.062476340428323576, "grad_norm": 0.1806640625, "learning_rate": 0.0014823556469091607, "loss": 0.3073, "step": 35236 }, { "epoch": 0.06247988659363339, "grad_norm": 0.326171875, "learning_rate": 0.0014823017657073765, "loss": 0.1881, "step": 35238 }, { "epoch": 0.062483432758943205, "grad_norm": 0.546875, "learning_rate": 0.0014822478828336264, "loss": 0.1618, "step": 35240 }, { "epoch": 0.06248697892425302, "grad_norm": 0.2412109375, "learning_rate": 0.0014821939982881463, "loss": 0.1704, "step": 35242 }, { "epoch": 0.062490525089562834, "grad_norm": 0.439453125, "learning_rate": 0.0014821401120711716, "loss": 0.1884, "step": 35244 }, { "epoch": 0.062494071254872656, "grad_norm": 0.8125, "learning_rate": 0.0014820862241829381, "loss": 0.1874, "step": 35246 }, { "epoch": 0.06249761742018247, "grad_norm": 0.2021484375, "learning_rate": 0.0014820323346236813, "loss": 0.2118, "step": 35248 }, { "epoch": 0.06250116358549228, "grad_norm": 0.251953125, "learning_rate": 0.0014819784433936368, "loss": 0.16, "step": 35250 }, { "epoch": 0.06250470975080209, "grad_norm": 0.5390625, "learning_rate": 0.0014819245504930408, "loss": 0.1893, "step": 35252 }, { "epoch": 0.06250825591611191, "grad_norm": 0.23046875, "learning_rate": 0.0014818706559221284, "loss": 0.1972, "step": 35254 }, { "epoch": 0.06251180208142172, "grad_norm": 0.470703125, "learning_rate": 0.001481816759681136, "loss": 0.187, "step": 35256 }, { "epoch": 0.06251534824673155, "grad_norm": 1.0703125, "learning_rate": 0.0014817628617702985, "loss": 0.1698, "step": 35258 }, { "epoch": 0.06251889441204136, "grad_norm": 0.546875, "learning_rate": 0.0014817089621898519, "loss": 0.193, "step": 35260 }, { "epoch": 0.06252244057735118, "grad_norm": 1.2109375, "learning_rate": 0.0014816550609400322, "loss": 0.2731, "step": 35262 }, { "epoch": 0.062525986742661, "grad_norm": 0.8125, "learning_rate": 0.001481601158021075, "loss": 0.1805, "step": 35264 }, { "epoch": 0.06252953290797081, "grad_norm": 0.333984375, "learning_rate": 0.001481547253433216, "loss": 0.3314, "step": 35266 }, { "epoch": 0.06253307907328062, "grad_norm": 1.375, "learning_rate": 0.0014814933471766908, "loss": 0.4282, "step": 35268 }, { "epoch": 0.06253662523859044, "grad_norm": 0.61328125, "learning_rate": 0.0014814394392517354, "loss": 0.2284, "step": 35270 }, { "epoch": 0.06254017140390025, "grad_norm": 0.6875, "learning_rate": 0.0014813855296585856, "loss": 0.1755, "step": 35272 }, { "epoch": 0.06254371756921007, "grad_norm": 0.2490234375, "learning_rate": 0.001481331618397477, "loss": 0.1523, "step": 35274 }, { "epoch": 0.06254726373451988, "grad_norm": 0.6875, "learning_rate": 0.0014812777054686451, "loss": 0.2012, "step": 35276 }, { "epoch": 0.0625508098998297, "grad_norm": 0.478515625, "learning_rate": 0.0014812237908723262, "loss": 0.1701, "step": 35278 }, { "epoch": 0.06255435606513951, "grad_norm": 0.24609375, "learning_rate": 0.001481169874608756, "loss": 0.1398, "step": 35280 }, { "epoch": 0.06255790223044932, "grad_norm": 0.333984375, "learning_rate": 0.0014811159566781697, "loss": 0.1859, "step": 35282 }, { "epoch": 0.06256144839575914, "grad_norm": 0.3984375, "learning_rate": 0.0014810620370808041, "loss": 0.1604, "step": 35284 }, { "epoch": 0.06256499456106895, "grad_norm": 0.43359375, "learning_rate": 0.0014810081158168943, "loss": 0.1818, "step": 35286 }, { "epoch": 0.06256854072637877, "grad_norm": 0.23828125, "learning_rate": 0.001480954192886676, "loss": 0.1811, "step": 35288 }, { "epoch": 0.06257208689168858, "grad_norm": 0.40234375, "learning_rate": 0.0014809002682903858, "loss": 0.2088, "step": 35290 }, { "epoch": 0.0625756330569984, "grad_norm": 0.458984375, "learning_rate": 0.001480846342028259, "loss": 0.1384, "step": 35292 }, { "epoch": 0.06257917922230821, "grad_norm": 0.439453125, "learning_rate": 0.0014807924141005317, "loss": 0.2553, "step": 35294 }, { "epoch": 0.06258272538761803, "grad_norm": 0.384765625, "learning_rate": 0.0014807384845074393, "loss": 0.2424, "step": 35296 }, { "epoch": 0.06258627155292784, "grad_norm": 1.078125, "learning_rate": 0.0014806845532492178, "loss": 0.2108, "step": 35298 }, { "epoch": 0.06258981771823766, "grad_norm": 0.419921875, "learning_rate": 0.0014806306203261036, "loss": 0.1759, "step": 35300 }, { "epoch": 0.06259336388354748, "grad_norm": 0.7265625, "learning_rate": 0.0014805766857383317, "loss": 0.1393, "step": 35302 }, { "epoch": 0.0625969100488573, "grad_norm": 0.275390625, "learning_rate": 0.001480522749486139, "loss": 0.2655, "step": 35304 }, { "epoch": 0.06260045621416711, "grad_norm": 0.423828125, "learning_rate": 0.0014804688115697604, "loss": 0.1482, "step": 35306 }, { "epoch": 0.06260400237947693, "grad_norm": 0.365234375, "learning_rate": 0.001480414871989432, "loss": 0.1616, "step": 35308 }, { "epoch": 0.06260754854478674, "grad_norm": 0.83984375, "learning_rate": 0.0014803609307453907, "loss": 0.1948, "step": 35310 }, { "epoch": 0.06261109471009656, "grad_norm": 0.46484375, "learning_rate": 0.001480306987837871, "loss": 0.1892, "step": 35312 }, { "epoch": 0.06261464087540637, "grad_norm": 0.91015625, "learning_rate": 0.0014802530432671095, "loss": 0.2245, "step": 35314 }, { "epoch": 0.06261818704071619, "grad_norm": 0.1533203125, "learning_rate": 0.001480199097033342, "loss": 0.2041, "step": 35316 }, { "epoch": 0.062621733206026, "grad_norm": 0.205078125, "learning_rate": 0.0014801451491368049, "loss": 0.1221, "step": 35318 }, { "epoch": 0.06262527937133582, "grad_norm": 0.296875, "learning_rate": 0.0014800911995777335, "loss": 0.1729, "step": 35320 }, { "epoch": 0.06262882553664563, "grad_norm": 1.4296875, "learning_rate": 0.001480037248356364, "loss": 0.2403, "step": 35322 }, { "epoch": 0.06263237170195544, "grad_norm": 0.75, "learning_rate": 0.0014799832954729322, "loss": 0.1642, "step": 35324 }, { "epoch": 0.06263591786726526, "grad_norm": 0.62890625, "learning_rate": 0.0014799293409276744, "loss": 0.248, "step": 35326 }, { "epoch": 0.06263946403257507, "grad_norm": 3.25, "learning_rate": 0.0014798753847208262, "loss": 0.253, "step": 35328 }, { "epoch": 0.06264301019788489, "grad_norm": 0.318359375, "learning_rate": 0.001479821426852624, "loss": 0.4124, "step": 35330 }, { "epoch": 0.0626465563631947, "grad_norm": 0.578125, "learning_rate": 0.0014797674673233032, "loss": 0.1623, "step": 35332 }, { "epoch": 0.06265010252850452, "grad_norm": 2.625, "learning_rate": 0.0014797135061331002, "loss": 0.2028, "step": 35334 }, { "epoch": 0.06265364869381433, "grad_norm": 0.27734375, "learning_rate": 0.001479659543282251, "loss": 0.2467, "step": 35336 }, { "epoch": 0.06265719485912415, "grad_norm": 0.36328125, "learning_rate": 0.0014796055787709911, "loss": 0.2784, "step": 35338 }, { "epoch": 0.06266074102443396, "grad_norm": 0.71875, "learning_rate": 0.0014795516125995572, "loss": 0.1841, "step": 35340 }, { "epoch": 0.06266428718974378, "grad_norm": 0.2138671875, "learning_rate": 0.0014794976447681848, "loss": 0.1606, "step": 35342 }, { "epoch": 0.06266783335505359, "grad_norm": 1.0546875, "learning_rate": 0.0014794436752771105, "loss": 0.2936, "step": 35344 }, { "epoch": 0.0626713795203634, "grad_norm": 0.81640625, "learning_rate": 0.0014793897041265698, "loss": 0.144, "step": 35346 }, { "epoch": 0.06267492568567323, "grad_norm": 0.41796875, "learning_rate": 0.001479335731316799, "loss": 0.2096, "step": 35348 }, { "epoch": 0.06267847185098305, "grad_norm": 0.376953125, "learning_rate": 0.001479281756848034, "loss": 0.1729, "step": 35350 }, { "epoch": 0.06268201801629286, "grad_norm": 0.400390625, "learning_rate": 0.0014792277807205107, "loss": 0.1441, "step": 35352 }, { "epoch": 0.06268556418160268, "grad_norm": 0.8203125, "learning_rate": 0.0014791738029344655, "loss": 0.2053, "step": 35354 }, { "epoch": 0.06268911034691249, "grad_norm": 0.416015625, "learning_rate": 0.0014791198234901345, "loss": 0.1669, "step": 35356 }, { "epoch": 0.0626926565122223, "grad_norm": 2.171875, "learning_rate": 0.0014790658423877532, "loss": 0.2074, "step": 35358 }, { "epoch": 0.06269620267753212, "grad_norm": 0.60546875, "learning_rate": 0.0014790118596275585, "loss": 0.216, "step": 35360 }, { "epoch": 0.06269974884284193, "grad_norm": 0.98828125, "learning_rate": 0.0014789578752097856, "loss": 0.2371, "step": 35362 }, { "epoch": 0.06270329500815175, "grad_norm": 0.259765625, "learning_rate": 0.0014789038891346719, "loss": 0.2207, "step": 35364 }, { "epoch": 0.06270684117346156, "grad_norm": 0.89453125, "learning_rate": 0.001478849901402452, "loss": 0.1826, "step": 35366 }, { "epoch": 0.06271038733877138, "grad_norm": 0.3125, "learning_rate": 0.001478795912013363, "loss": 0.1501, "step": 35368 }, { "epoch": 0.06271393350408119, "grad_norm": 0.376953125, "learning_rate": 0.0014787419209676405, "loss": 0.2108, "step": 35370 }, { "epoch": 0.06271747966939101, "grad_norm": 1.375, "learning_rate": 0.001478687928265521, "loss": 0.1877, "step": 35372 }, { "epoch": 0.06272102583470082, "grad_norm": 0.578125, "learning_rate": 0.0014786339339072406, "loss": 0.3032, "step": 35374 }, { "epoch": 0.06272457200001064, "grad_norm": 0.31640625, "learning_rate": 0.001478579937893035, "loss": 0.1295, "step": 35376 }, { "epoch": 0.06272811816532045, "grad_norm": 0.306640625, "learning_rate": 0.0014785259402231407, "loss": 0.1997, "step": 35378 }, { "epoch": 0.06273166433063027, "grad_norm": 0.423828125, "learning_rate": 0.001478471940897794, "loss": 0.202, "step": 35380 }, { "epoch": 0.06273521049594008, "grad_norm": 0.443359375, "learning_rate": 0.0014784179399172305, "loss": 0.18, "step": 35382 }, { "epoch": 0.0627387566612499, "grad_norm": 2.640625, "learning_rate": 0.0014783639372816873, "loss": 0.3024, "step": 35384 }, { "epoch": 0.06274230282655971, "grad_norm": 0.51953125, "learning_rate": 0.0014783099329913998, "loss": 0.3659, "step": 35386 }, { "epoch": 0.06274584899186952, "grad_norm": 0.275390625, "learning_rate": 0.0014782559270466043, "loss": 0.1957, "step": 35388 }, { "epoch": 0.06274939515717934, "grad_norm": 0.40625, "learning_rate": 0.001478201919447537, "loss": 0.2311, "step": 35390 }, { "epoch": 0.06275294132248915, "grad_norm": 0.326171875, "learning_rate": 0.0014781479101944343, "loss": 0.1766, "step": 35392 }, { "epoch": 0.06275648748779898, "grad_norm": 0.5078125, "learning_rate": 0.0014780938992875322, "loss": 0.2785, "step": 35394 }, { "epoch": 0.0627600336531088, "grad_norm": 0.271484375, "learning_rate": 0.0014780398867270672, "loss": 0.1313, "step": 35396 }, { "epoch": 0.06276357981841861, "grad_norm": 0.2001953125, "learning_rate": 0.0014779858725132752, "loss": 0.1616, "step": 35398 }, { "epoch": 0.06276712598372843, "grad_norm": 1.015625, "learning_rate": 0.0014779318566463925, "loss": 0.1844, "step": 35400 }, { "epoch": 0.06277067214903824, "grad_norm": 0.4375, "learning_rate": 0.0014778778391266556, "loss": 0.1794, "step": 35402 }, { "epoch": 0.06277421831434805, "grad_norm": 0.34765625, "learning_rate": 0.0014778238199543005, "loss": 0.1423, "step": 35404 }, { "epoch": 0.06277776447965787, "grad_norm": 0.287109375, "learning_rate": 0.0014777697991295634, "loss": 0.1599, "step": 35406 }, { "epoch": 0.06278131064496768, "grad_norm": 0.302734375, "learning_rate": 0.0014777157766526809, "loss": 0.187, "step": 35408 }, { "epoch": 0.0627848568102775, "grad_norm": 0.53125, "learning_rate": 0.0014776617525238884, "loss": 0.2273, "step": 35410 }, { "epoch": 0.06278840297558731, "grad_norm": 0.59375, "learning_rate": 0.0014776077267434232, "loss": 0.1652, "step": 35412 }, { "epoch": 0.06279194914089713, "grad_norm": 1.328125, "learning_rate": 0.0014775536993115207, "loss": 0.2128, "step": 35414 }, { "epoch": 0.06279549530620694, "grad_norm": 1.09375, "learning_rate": 0.0014774996702284183, "loss": 0.3473, "step": 35416 }, { "epoch": 0.06279904147151676, "grad_norm": 0.271484375, "learning_rate": 0.0014774456394943511, "loss": 0.208, "step": 35418 }, { "epoch": 0.06280258763682657, "grad_norm": 1.7109375, "learning_rate": 0.0014773916071095562, "loss": 0.2341, "step": 35420 }, { "epoch": 0.06280613380213639, "grad_norm": 0.37890625, "learning_rate": 0.0014773375730742697, "loss": 0.1819, "step": 35422 }, { "epoch": 0.0628096799674462, "grad_norm": 0.5546875, "learning_rate": 0.0014772835373887277, "loss": 0.1234, "step": 35424 }, { "epoch": 0.06281322613275601, "grad_norm": 0.5, "learning_rate": 0.0014772295000531667, "loss": 0.3345, "step": 35426 }, { "epoch": 0.06281677229806583, "grad_norm": 0.546875, "learning_rate": 0.0014771754610678224, "loss": 0.1855, "step": 35428 }, { "epoch": 0.06282031846337564, "grad_norm": 0.5234375, "learning_rate": 0.0014771214204329325, "loss": 0.2362, "step": 35430 }, { "epoch": 0.06282386462868546, "grad_norm": 0.66796875, "learning_rate": 0.0014770673781487323, "loss": 0.17, "step": 35432 }, { "epoch": 0.06282741079399527, "grad_norm": 0.6015625, "learning_rate": 0.0014770133342154584, "loss": 0.2483, "step": 35434 }, { "epoch": 0.06283095695930509, "grad_norm": 0.43359375, "learning_rate": 0.0014769592886333472, "loss": 0.1891, "step": 35436 }, { "epoch": 0.06283450312461492, "grad_norm": 0.2060546875, "learning_rate": 0.0014769052414026352, "loss": 0.1506, "step": 35438 }, { "epoch": 0.06283804928992473, "grad_norm": 0.369140625, "learning_rate": 0.0014768511925235585, "loss": 0.3143, "step": 35440 }, { "epoch": 0.06284159545523454, "grad_norm": 0.6171875, "learning_rate": 0.0014767971419963532, "loss": 0.1952, "step": 35442 }, { "epoch": 0.06284514162054436, "grad_norm": 0.9296875, "learning_rate": 0.0014767430898212565, "loss": 0.2017, "step": 35444 }, { "epoch": 0.06284868778585417, "grad_norm": 0.2734375, "learning_rate": 0.0014766890359985043, "loss": 0.2086, "step": 35446 }, { "epoch": 0.06285223395116399, "grad_norm": 0.306640625, "learning_rate": 0.0014766349805283327, "loss": 0.2022, "step": 35448 }, { "epoch": 0.0628557801164738, "grad_norm": 0.31640625, "learning_rate": 0.001476580923410979, "loss": 0.1688, "step": 35450 }, { "epoch": 0.06285932628178362, "grad_norm": 1.5390625, "learning_rate": 0.0014765268646466788, "loss": 0.327, "step": 35452 }, { "epoch": 0.06286287244709343, "grad_norm": 0.76953125, "learning_rate": 0.0014764728042356689, "loss": 0.1645, "step": 35454 }, { "epoch": 0.06286641861240325, "grad_norm": 0.259765625, "learning_rate": 0.0014764187421781854, "loss": 0.1985, "step": 35456 }, { "epoch": 0.06286996477771306, "grad_norm": 0.4609375, "learning_rate": 0.0014763646784744656, "loss": 0.1972, "step": 35458 }, { "epoch": 0.06287351094302288, "grad_norm": 0.70703125, "learning_rate": 0.0014763106131247445, "loss": 0.1975, "step": 35460 }, { "epoch": 0.06287705710833269, "grad_norm": 0.341796875, "learning_rate": 0.00147625654612926, "loss": 0.1871, "step": 35462 }, { "epoch": 0.0628806032736425, "grad_norm": 0.388671875, "learning_rate": 0.0014762024774882478, "loss": 0.3703, "step": 35464 }, { "epoch": 0.06288414943895232, "grad_norm": 0.2275390625, "learning_rate": 0.0014761484072019445, "loss": 0.1722, "step": 35466 }, { "epoch": 0.06288769560426213, "grad_norm": 4.125, "learning_rate": 0.0014760943352705863, "loss": 0.2352, "step": 35468 }, { "epoch": 0.06289124176957195, "grad_norm": 0.439453125, "learning_rate": 0.00147604026169441, "loss": 0.1839, "step": 35470 }, { "epoch": 0.06289478793488176, "grad_norm": 0.369140625, "learning_rate": 0.0014759861864736522, "loss": 0.1825, "step": 35472 }, { "epoch": 0.06289833410019158, "grad_norm": 0.470703125, "learning_rate": 0.001475932109608549, "loss": 0.2739, "step": 35474 }, { "epoch": 0.06290188026550139, "grad_norm": 0.392578125, "learning_rate": 0.0014758780310993372, "loss": 0.212, "step": 35476 }, { "epoch": 0.0629054264308112, "grad_norm": 1.5390625, "learning_rate": 0.0014758239509462532, "loss": 0.1706, "step": 35478 }, { "epoch": 0.06290897259612102, "grad_norm": 7.71875, "learning_rate": 0.0014757698691495336, "loss": 0.4462, "step": 35480 }, { "epoch": 0.06291251876143084, "grad_norm": 0.40625, "learning_rate": 0.001475715785709415, "loss": 0.195, "step": 35482 }, { "epoch": 0.06291606492674066, "grad_norm": 1.03125, "learning_rate": 0.0014756617006261338, "loss": 0.2121, "step": 35484 }, { "epoch": 0.06291961109205048, "grad_norm": 0.4609375, "learning_rate": 0.0014756076138999264, "loss": 0.147, "step": 35486 }, { "epoch": 0.0629231572573603, "grad_norm": 0.61328125, "learning_rate": 0.0014755535255310296, "loss": 0.1994, "step": 35488 }, { "epoch": 0.06292670342267011, "grad_norm": 0.216796875, "learning_rate": 0.0014754994355196795, "loss": 0.1753, "step": 35490 }, { "epoch": 0.06293024958797992, "grad_norm": 0.376953125, "learning_rate": 0.0014754453438661132, "loss": 0.2035, "step": 35492 }, { "epoch": 0.06293379575328974, "grad_norm": 0.1962890625, "learning_rate": 0.0014753912505705668, "loss": 0.1538, "step": 35494 }, { "epoch": 0.06293734191859955, "grad_norm": 2.609375, "learning_rate": 0.0014753371556332775, "loss": 0.1581, "step": 35496 }, { "epoch": 0.06294088808390937, "grad_norm": 0.84765625, "learning_rate": 0.0014752830590544816, "loss": 0.1908, "step": 35498 }, { "epoch": 0.06294443424921918, "grad_norm": 0.8046875, "learning_rate": 0.0014752289608344152, "loss": 0.191, "step": 35500 }, { "epoch": 0.062947980414529, "grad_norm": 0.47265625, "learning_rate": 0.0014751748609733155, "loss": 0.1919, "step": 35502 }, { "epoch": 0.06295152657983881, "grad_norm": 0.416015625, "learning_rate": 0.0014751207594714188, "loss": 0.1463, "step": 35504 }, { "epoch": 0.06295507274514862, "grad_norm": 0.357421875, "learning_rate": 0.0014750666563289618, "loss": 0.1822, "step": 35506 }, { "epoch": 0.06295861891045844, "grad_norm": 0.326171875, "learning_rate": 0.0014750125515461813, "loss": 0.2285, "step": 35508 }, { "epoch": 0.06296216507576825, "grad_norm": 0.287109375, "learning_rate": 0.0014749584451233132, "loss": 0.14, "step": 35510 }, { "epoch": 0.06296571124107807, "grad_norm": 0.306640625, "learning_rate": 0.0014749043370605951, "loss": 0.1463, "step": 35512 }, { "epoch": 0.06296925740638788, "grad_norm": 0.38671875, "learning_rate": 0.001474850227358263, "loss": 0.2504, "step": 35514 }, { "epoch": 0.0629728035716977, "grad_norm": 0.515625, "learning_rate": 0.0014747961160165542, "loss": 0.2048, "step": 35516 }, { "epoch": 0.06297634973700751, "grad_norm": 0.55859375, "learning_rate": 0.0014747420030357043, "loss": 0.1696, "step": 35518 }, { "epoch": 0.06297989590231733, "grad_norm": 0.455078125, "learning_rate": 0.0014746878884159512, "loss": 0.1934, "step": 35520 }, { "epoch": 0.06298344206762714, "grad_norm": 0.41015625, "learning_rate": 0.0014746337721575303, "loss": 0.2101, "step": 35522 }, { "epoch": 0.06298698823293696, "grad_norm": 0.345703125, "learning_rate": 0.001474579654260679, "loss": 0.2518, "step": 35524 }, { "epoch": 0.06299053439824677, "grad_norm": 0.734375, "learning_rate": 0.0014745255347256342, "loss": 0.1748, "step": 35526 }, { "epoch": 0.06299408056355658, "grad_norm": 0.337890625, "learning_rate": 0.001474471413552632, "loss": 0.2214, "step": 35528 }, { "epoch": 0.06299762672886641, "grad_norm": 0.314453125, "learning_rate": 0.0014744172907419093, "loss": 0.2343, "step": 35530 }, { "epoch": 0.06300117289417623, "grad_norm": 1.9296875, "learning_rate": 0.0014743631662937032, "loss": 0.1894, "step": 35532 }, { "epoch": 0.06300471905948604, "grad_norm": 0.78125, "learning_rate": 0.0014743090402082497, "loss": 0.1943, "step": 35534 }, { "epoch": 0.06300826522479586, "grad_norm": 0.45703125, "learning_rate": 0.0014742549124857863, "loss": 0.1534, "step": 35536 }, { "epoch": 0.06301181139010567, "grad_norm": 0.30859375, "learning_rate": 0.001474200783126549, "loss": 0.2144, "step": 35538 }, { "epoch": 0.06301535755541549, "grad_norm": 0.25390625, "learning_rate": 0.001474146652130775, "loss": 0.2199, "step": 35540 }, { "epoch": 0.0630189037207253, "grad_norm": 0.435546875, "learning_rate": 0.0014740925194987005, "loss": 0.2032, "step": 35542 }, { "epoch": 0.06302244988603511, "grad_norm": 0.50390625, "learning_rate": 0.0014740383852305627, "loss": 0.1747, "step": 35544 }, { "epoch": 0.06302599605134493, "grad_norm": 1.09375, "learning_rate": 0.0014739842493265987, "loss": 0.2268, "step": 35546 }, { "epoch": 0.06302954221665474, "grad_norm": 1.3671875, "learning_rate": 0.0014739301117870445, "loss": 0.1654, "step": 35548 }, { "epoch": 0.06303308838196456, "grad_norm": 0.5234375, "learning_rate": 0.001473875972612137, "loss": 0.2037, "step": 35550 }, { "epoch": 0.06303663454727437, "grad_norm": 0.392578125, "learning_rate": 0.0014738218318021135, "loss": 0.162, "step": 35552 }, { "epoch": 0.06304018071258419, "grad_norm": 0.349609375, "learning_rate": 0.0014737676893572106, "loss": 0.1407, "step": 35554 }, { "epoch": 0.063043726877894, "grad_norm": 0.25390625, "learning_rate": 0.0014737135452776648, "loss": 0.1473, "step": 35556 }, { "epoch": 0.06304727304320382, "grad_norm": 0.5859375, "learning_rate": 0.0014736593995637128, "loss": 0.2906, "step": 35558 }, { "epoch": 0.06305081920851363, "grad_norm": 1.0546875, "learning_rate": 0.001473605252215592, "loss": 0.244, "step": 35560 }, { "epoch": 0.06305436537382345, "grad_norm": 0.5, "learning_rate": 0.0014735511032335385, "loss": 0.172, "step": 35562 }, { "epoch": 0.06305791153913326, "grad_norm": 0.50390625, "learning_rate": 0.0014734969526177895, "loss": 0.1657, "step": 35564 }, { "epoch": 0.06306145770444307, "grad_norm": 0.71484375, "learning_rate": 0.001473442800368582, "loss": 0.1564, "step": 35566 }, { "epoch": 0.06306500386975289, "grad_norm": 0.5078125, "learning_rate": 0.0014733886464861523, "loss": 0.1992, "step": 35568 }, { "epoch": 0.0630685500350627, "grad_norm": 0.89453125, "learning_rate": 0.0014733344909707378, "loss": 0.2306, "step": 35570 }, { "epoch": 0.06307209620037252, "grad_norm": 0.33984375, "learning_rate": 0.001473280333822575, "loss": 0.2205, "step": 35572 }, { "epoch": 0.06307564236568235, "grad_norm": 0.48046875, "learning_rate": 0.001473226175041901, "loss": 0.1988, "step": 35574 }, { "epoch": 0.06307918853099216, "grad_norm": 2.21875, "learning_rate": 0.0014731720146289524, "loss": 0.3054, "step": 35576 }, { "epoch": 0.06308273469630198, "grad_norm": 0.2890625, "learning_rate": 0.001473117852583966, "loss": 0.2137, "step": 35578 }, { "epoch": 0.06308628086161179, "grad_norm": 0.69921875, "learning_rate": 0.001473063688907179, "loss": 0.1883, "step": 35580 }, { "epoch": 0.0630898270269216, "grad_norm": 0.310546875, "learning_rate": 0.001473009523598828, "loss": 0.1501, "step": 35582 }, { "epoch": 0.06309337319223142, "grad_norm": 0.38671875, "learning_rate": 0.0014729553566591503, "loss": 0.2363, "step": 35584 }, { "epoch": 0.06309691935754123, "grad_norm": 0.50390625, "learning_rate": 0.0014729011880883823, "loss": 0.1963, "step": 35586 }, { "epoch": 0.06310046552285105, "grad_norm": 0.48828125, "learning_rate": 0.0014728470178867613, "loss": 0.2905, "step": 35588 }, { "epoch": 0.06310401168816086, "grad_norm": 0.2431640625, "learning_rate": 0.0014727928460545239, "loss": 0.1441, "step": 35590 }, { "epoch": 0.06310755785347068, "grad_norm": 0.2470703125, "learning_rate": 0.001472738672591907, "loss": 0.1985, "step": 35592 }, { "epoch": 0.06311110401878049, "grad_norm": 1.046875, "learning_rate": 0.0014726844974991477, "loss": 0.2147, "step": 35594 }, { "epoch": 0.0631146501840903, "grad_norm": 0.349609375, "learning_rate": 0.0014726303207764828, "loss": 0.1552, "step": 35596 }, { "epoch": 0.06311819634940012, "grad_norm": 0.2412109375, "learning_rate": 0.0014725761424241497, "loss": 0.3288, "step": 35598 }, { "epoch": 0.06312174251470994, "grad_norm": 0.2421875, "learning_rate": 0.0014725219624423848, "loss": 0.1788, "step": 35600 }, { "epoch": 0.06312528868001975, "grad_norm": 0.76171875, "learning_rate": 0.001472467780831425, "loss": 0.1723, "step": 35602 }, { "epoch": 0.06312883484532957, "grad_norm": 0.3828125, "learning_rate": 0.0014724135975915076, "loss": 0.216, "step": 35604 }, { "epoch": 0.06313238101063938, "grad_norm": 2.0625, "learning_rate": 0.0014723594127228697, "loss": 0.2661, "step": 35606 }, { "epoch": 0.0631359271759492, "grad_norm": 0.59375, "learning_rate": 0.0014723052262257477, "loss": 0.2325, "step": 35608 }, { "epoch": 0.06313947334125901, "grad_norm": 0.875, "learning_rate": 0.0014722510381003791, "loss": 0.1845, "step": 35610 }, { "epoch": 0.06314301950656882, "grad_norm": 1.5, "learning_rate": 0.0014721968483470006, "loss": 0.3052, "step": 35612 }, { "epoch": 0.06314656567187864, "grad_norm": 0.6015625, "learning_rate": 0.0014721426569658494, "loss": 0.2524, "step": 35614 }, { "epoch": 0.06315011183718845, "grad_norm": 0.404296875, "learning_rate": 0.001472088463957162, "loss": 0.2141, "step": 35616 }, { "epoch": 0.06315365800249827, "grad_norm": 1.015625, "learning_rate": 0.0014720342693211761, "loss": 0.2333, "step": 35618 }, { "epoch": 0.0631572041678081, "grad_norm": 1.8203125, "learning_rate": 0.0014719800730581285, "loss": 0.2465, "step": 35620 }, { "epoch": 0.06316075033311791, "grad_norm": 0.255859375, "learning_rate": 0.001471925875168256, "loss": 0.2575, "step": 35622 }, { "epoch": 0.06316429649842772, "grad_norm": 0.412109375, "learning_rate": 0.0014718716756517956, "loss": 0.1786, "step": 35624 }, { "epoch": 0.06316784266373754, "grad_norm": 0.765625, "learning_rate": 0.0014718174745089849, "loss": 0.2031, "step": 35626 }, { "epoch": 0.06317138882904735, "grad_norm": 0.38671875, "learning_rate": 0.0014717632717400604, "loss": 0.2137, "step": 35628 }, { "epoch": 0.06317493499435717, "grad_norm": 0.25390625, "learning_rate": 0.0014717090673452594, "loss": 0.2049, "step": 35630 }, { "epoch": 0.06317848115966698, "grad_norm": 0.27734375, "learning_rate": 0.0014716548613248183, "loss": 0.2984, "step": 35632 }, { "epoch": 0.0631820273249768, "grad_norm": 0.443359375, "learning_rate": 0.0014716006536789755, "loss": 0.2033, "step": 35634 }, { "epoch": 0.06318557349028661, "grad_norm": 0.46875, "learning_rate": 0.0014715464444079664, "loss": 0.2025, "step": 35636 }, { "epoch": 0.06318911965559643, "grad_norm": 0.65625, "learning_rate": 0.0014714922335120298, "loss": 0.2476, "step": 35638 }, { "epoch": 0.06319266582090624, "grad_norm": 0.83984375, "learning_rate": 0.0014714380209914014, "loss": 0.1887, "step": 35640 }, { "epoch": 0.06319621198621606, "grad_norm": 0.251953125, "learning_rate": 0.0014713838068463193, "loss": 0.2063, "step": 35642 }, { "epoch": 0.06319975815152587, "grad_norm": 0.26171875, "learning_rate": 0.0014713295910770195, "loss": 0.2036, "step": 35644 }, { "epoch": 0.06320330431683568, "grad_norm": 0.62890625, "learning_rate": 0.0014712753736837404, "loss": 0.2535, "step": 35646 }, { "epoch": 0.0632068504821455, "grad_norm": 0.9375, "learning_rate": 0.0014712211546667182, "loss": 0.1655, "step": 35648 }, { "epoch": 0.06321039664745531, "grad_norm": 0.328125, "learning_rate": 0.0014711669340261907, "loss": 0.1882, "step": 35650 }, { "epoch": 0.06321394281276513, "grad_norm": 0.54296875, "learning_rate": 0.001471112711762394, "loss": 0.1902, "step": 35652 }, { "epoch": 0.06321748897807494, "grad_norm": 0.5078125, "learning_rate": 0.0014710584878755665, "loss": 0.1648, "step": 35654 }, { "epoch": 0.06322103514338476, "grad_norm": 0.49609375, "learning_rate": 0.001471004262365944, "loss": 0.1814, "step": 35656 }, { "epoch": 0.06322458130869457, "grad_norm": 0.396484375, "learning_rate": 0.0014709500352337649, "loss": 0.1928, "step": 35658 }, { "epoch": 0.06322812747400439, "grad_norm": 0.609375, "learning_rate": 0.0014708958064792654, "loss": 0.1994, "step": 35660 }, { "epoch": 0.0632316736393142, "grad_norm": 0.341796875, "learning_rate": 0.0014708415761026836, "loss": 0.1923, "step": 35662 }, { "epoch": 0.06323521980462402, "grad_norm": 0.60546875, "learning_rate": 0.0014707873441042558, "loss": 0.1872, "step": 35664 }, { "epoch": 0.06323876596993384, "grad_norm": 0.76171875, "learning_rate": 0.0014707331104842198, "loss": 0.1842, "step": 35666 }, { "epoch": 0.06324231213524366, "grad_norm": 0.419921875, "learning_rate": 0.0014706788752428123, "loss": 0.1575, "step": 35668 }, { "epoch": 0.06324585830055347, "grad_norm": 0.73828125, "learning_rate": 0.0014706246383802705, "loss": 0.1956, "step": 35670 }, { "epoch": 0.06324940446586329, "grad_norm": 0.4609375, "learning_rate": 0.0014705703998968323, "loss": 0.1649, "step": 35672 }, { "epoch": 0.0632529506311731, "grad_norm": 0.51171875, "learning_rate": 0.0014705161597927342, "loss": 0.1724, "step": 35674 }, { "epoch": 0.06325649679648292, "grad_norm": 0.2060546875, "learning_rate": 0.0014704619180682134, "loss": 0.1817, "step": 35676 }, { "epoch": 0.06326004296179273, "grad_norm": 1.1171875, "learning_rate": 0.0014704076747235075, "loss": 0.2207, "step": 35678 }, { "epoch": 0.06326358912710255, "grad_norm": 0.51953125, "learning_rate": 0.001470353429758854, "loss": 0.4006, "step": 35680 }, { "epoch": 0.06326713529241236, "grad_norm": 0.55078125, "learning_rate": 0.0014702991831744895, "loss": 0.2716, "step": 35682 }, { "epoch": 0.06327068145772217, "grad_norm": 1.8125, "learning_rate": 0.0014702449349706516, "loss": 0.2228, "step": 35684 }, { "epoch": 0.06327422762303199, "grad_norm": 0.5, "learning_rate": 0.0014701906851475768, "loss": 0.1487, "step": 35686 }, { "epoch": 0.0632777737883418, "grad_norm": 0.40234375, "learning_rate": 0.0014701364337055035, "loss": 0.1834, "step": 35688 }, { "epoch": 0.06328131995365162, "grad_norm": 0.77734375, "learning_rate": 0.0014700821806446682, "loss": 0.2339, "step": 35690 }, { "epoch": 0.06328486611896143, "grad_norm": 3.390625, "learning_rate": 0.0014700279259653084, "loss": 0.3506, "step": 35692 }, { "epoch": 0.06328841228427125, "grad_norm": 0.6328125, "learning_rate": 0.0014699736696676615, "loss": 0.2072, "step": 35694 }, { "epoch": 0.06329195844958106, "grad_norm": 0.384765625, "learning_rate": 0.001469919411751965, "loss": 0.206, "step": 35696 }, { "epoch": 0.06329550461489088, "grad_norm": 0.734375, "learning_rate": 0.0014698651522184552, "loss": 0.2326, "step": 35698 }, { "epoch": 0.06329905078020069, "grad_norm": 0.2578125, "learning_rate": 0.0014698108910673707, "loss": 0.1406, "step": 35700 }, { "epoch": 0.0633025969455105, "grad_norm": 0.265625, "learning_rate": 0.001469756628298948, "loss": 0.1872, "step": 35702 }, { "epoch": 0.06330614311082032, "grad_norm": 0.546875, "learning_rate": 0.0014697023639134246, "loss": 0.1999, "step": 35704 }, { "epoch": 0.06330968927613013, "grad_norm": 1.1796875, "learning_rate": 0.0014696480979110374, "loss": 0.172, "step": 35706 }, { "epoch": 0.06331323544143995, "grad_norm": 0.255859375, "learning_rate": 0.0014695938302920247, "loss": 0.1735, "step": 35708 }, { "epoch": 0.06331678160674978, "grad_norm": 0.86328125, "learning_rate": 0.001469539561056623, "loss": 0.1948, "step": 35710 }, { "epoch": 0.06332032777205959, "grad_norm": 0.388671875, "learning_rate": 0.0014694852902050698, "loss": 0.196, "step": 35712 }, { "epoch": 0.06332387393736941, "grad_norm": 1.0234375, "learning_rate": 0.0014694310177376024, "loss": 0.3898, "step": 35714 }, { "epoch": 0.06332742010267922, "grad_norm": 0.40234375, "learning_rate": 0.0014693767436544586, "loss": 0.1977, "step": 35716 }, { "epoch": 0.06333096626798904, "grad_norm": 0.5625, "learning_rate": 0.0014693224679558756, "loss": 0.2351, "step": 35718 }, { "epoch": 0.06333451243329885, "grad_norm": 1.3515625, "learning_rate": 0.0014692681906420906, "loss": 0.1988, "step": 35720 }, { "epoch": 0.06333805859860867, "grad_norm": 0.52734375, "learning_rate": 0.0014692139117133411, "loss": 0.1916, "step": 35722 }, { "epoch": 0.06334160476391848, "grad_norm": 1.6953125, "learning_rate": 0.0014691596311698642, "loss": 0.3414, "step": 35724 }, { "epoch": 0.0633451509292283, "grad_norm": 0.59375, "learning_rate": 0.0014691053490118976, "loss": 0.1557, "step": 35726 }, { "epoch": 0.06334869709453811, "grad_norm": 0.42578125, "learning_rate": 0.0014690510652396788, "loss": 0.1974, "step": 35728 }, { "epoch": 0.06335224325984792, "grad_norm": 0.56640625, "learning_rate": 0.0014689967798534448, "loss": 0.2082, "step": 35730 }, { "epoch": 0.06335578942515774, "grad_norm": 0.455078125, "learning_rate": 0.001468942492853433, "loss": 0.2261, "step": 35732 }, { "epoch": 0.06335933559046755, "grad_norm": 0.79296875, "learning_rate": 0.0014688882042398813, "loss": 0.236, "step": 35734 }, { "epoch": 0.06336288175577737, "grad_norm": 0.275390625, "learning_rate": 0.0014688339140130268, "loss": 0.1602, "step": 35736 }, { "epoch": 0.06336642792108718, "grad_norm": 0.73828125, "learning_rate": 0.0014687796221731072, "loss": 0.2385, "step": 35738 }, { "epoch": 0.063369974086397, "grad_norm": 0.30859375, "learning_rate": 0.0014687253287203595, "loss": 0.2155, "step": 35740 }, { "epoch": 0.06337352025170681, "grad_norm": 0.6484375, "learning_rate": 0.0014686710336550214, "loss": 0.1875, "step": 35742 }, { "epoch": 0.06337706641701663, "grad_norm": 0.59765625, "learning_rate": 0.0014686167369773308, "loss": 0.1862, "step": 35744 }, { "epoch": 0.06338061258232644, "grad_norm": 0.302734375, "learning_rate": 0.0014685624386875243, "loss": 0.1595, "step": 35746 }, { "epoch": 0.06338415874763625, "grad_norm": 0.1669921875, "learning_rate": 0.0014685081387858397, "loss": 0.1804, "step": 35748 }, { "epoch": 0.06338770491294607, "grad_norm": 0.310546875, "learning_rate": 0.0014684538372725148, "loss": 0.1926, "step": 35750 }, { "epoch": 0.06339125107825588, "grad_norm": 0.4375, "learning_rate": 0.0014683995341477868, "loss": 0.1984, "step": 35752 }, { "epoch": 0.0633947972435657, "grad_norm": 0.9140625, "learning_rate": 0.001468345229411893, "loss": 0.1892, "step": 35754 }, { "epoch": 0.06339834340887553, "grad_norm": 0.32421875, "learning_rate": 0.0014682909230650711, "loss": 0.1904, "step": 35756 }, { "epoch": 0.06340188957418534, "grad_norm": 0.2451171875, "learning_rate": 0.001468236615107559, "loss": 0.152, "step": 35758 }, { "epoch": 0.06340543573949516, "grad_norm": 0.337890625, "learning_rate": 0.0014681823055395936, "loss": 0.169, "step": 35760 }, { "epoch": 0.06340898190480497, "grad_norm": 0.44921875, "learning_rate": 0.0014681279943614127, "loss": 0.2105, "step": 35762 }, { "epoch": 0.06341252807011478, "grad_norm": 0.259765625, "learning_rate": 0.0014680736815732539, "loss": 0.2279, "step": 35764 }, { "epoch": 0.0634160742354246, "grad_norm": 1.171875, "learning_rate": 0.0014680193671753542, "loss": 0.3781, "step": 35766 }, { "epoch": 0.06341962040073441, "grad_norm": 0.291015625, "learning_rate": 0.0014679650511679521, "loss": 0.2112, "step": 35768 }, { "epoch": 0.06342316656604423, "grad_norm": 0.9296875, "learning_rate": 0.0014679107335512842, "loss": 0.1608, "step": 35770 }, { "epoch": 0.06342671273135404, "grad_norm": 1.0703125, "learning_rate": 0.0014678564143255888, "loss": 0.331, "step": 35772 }, { "epoch": 0.06343025889666386, "grad_norm": 1.0078125, "learning_rate": 0.001467802093491103, "loss": 0.1572, "step": 35774 }, { "epoch": 0.06343380506197367, "grad_norm": 0.3203125, "learning_rate": 0.0014677477710480645, "loss": 0.2624, "step": 35776 }, { "epoch": 0.06343735122728349, "grad_norm": 0.27734375, "learning_rate": 0.0014676934469967109, "loss": 0.2667, "step": 35778 }, { "epoch": 0.0634408973925933, "grad_norm": 0.60546875, "learning_rate": 0.0014676391213372798, "loss": 0.3599, "step": 35780 }, { "epoch": 0.06344444355790312, "grad_norm": 1.2421875, "learning_rate": 0.0014675847940700084, "loss": 0.2904, "step": 35782 }, { "epoch": 0.06344798972321293, "grad_norm": 0.828125, "learning_rate": 0.0014675304651951347, "loss": 0.3502, "step": 35784 }, { "epoch": 0.06345153588852274, "grad_norm": 0.4140625, "learning_rate": 0.0014674761347128965, "loss": 0.1876, "step": 35786 }, { "epoch": 0.06345508205383256, "grad_norm": 0.55078125, "learning_rate": 0.0014674218026235308, "loss": 0.1856, "step": 35788 }, { "epoch": 0.06345862821914237, "grad_norm": 1.390625, "learning_rate": 0.001467367468927276, "loss": 0.2253, "step": 35790 }, { "epoch": 0.06346217438445219, "grad_norm": 1.3671875, "learning_rate": 0.0014673131336243688, "loss": 0.3229, "step": 35792 }, { "epoch": 0.063465720549762, "grad_norm": 1.2890625, "learning_rate": 0.0014672587967150476, "loss": 0.4037, "step": 35794 }, { "epoch": 0.06346926671507182, "grad_norm": 0.203125, "learning_rate": 0.0014672044581995499, "loss": 0.1905, "step": 35796 }, { "epoch": 0.06347281288038163, "grad_norm": 0.83203125, "learning_rate": 0.001467150118078113, "loss": 0.3098, "step": 35798 }, { "epoch": 0.06347635904569145, "grad_norm": 0.96875, "learning_rate": 0.0014670957763509747, "loss": 0.2511, "step": 35800 }, { "epoch": 0.06347990521100128, "grad_norm": 0.26953125, "learning_rate": 0.0014670414330183728, "loss": 0.1856, "step": 35802 }, { "epoch": 0.06348345137631109, "grad_norm": 0.87109375, "learning_rate": 0.0014669870880805447, "loss": 0.2496, "step": 35804 }, { "epoch": 0.0634869975416209, "grad_norm": 1.4765625, "learning_rate": 0.0014669327415377288, "loss": 0.2348, "step": 35806 }, { "epoch": 0.06349054370693072, "grad_norm": 0.306640625, "learning_rate": 0.0014668783933901614, "loss": 0.1643, "step": 35808 }, { "epoch": 0.06349408987224053, "grad_norm": 0.486328125, "learning_rate": 0.0014668240436380815, "loss": 0.2166, "step": 35810 }, { "epoch": 0.06349763603755035, "grad_norm": 0.365234375, "learning_rate": 0.0014667696922817262, "loss": 0.1747, "step": 35812 }, { "epoch": 0.06350118220286016, "grad_norm": 0.392578125, "learning_rate": 0.0014667153393213336, "loss": 0.1988, "step": 35814 }, { "epoch": 0.06350472836816998, "grad_norm": 0.82421875, "learning_rate": 0.001466660984757141, "loss": 0.1695, "step": 35816 }, { "epoch": 0.06350827453347979, "grad_norm": 4.53125, "learning_rate": 0.001466606628589386, "loss": 0.2611, "step": 35818 }, { "epoch": 0.0635118206987896, "grad_norm": 0.5234375, "learning_rate": 0.0014665522708183065, "loss": 0.2061, "step": 35820 }, { "epoch": 0.06351536686409942, "grad_norm": 2.765625, "learning_rate": 0.0014664979114441406, "loss": 0.3154, "step": 35822 }, { "epoch": 0.06351891302940924, "grad_norm": 0.76953125, "learning_rate": 0.0014664435504671254, "loss": 0.2702, "step": 35824 }, { "epoch": 0.06352245919471905, "grad_norm": 0.73046875, "learning_rate": 0.0014663891878874992, "loss": 0.1982, "step": 35826 }, { "epoch": 0.06352600536002886, "grad_norm": 1.390625, "learning_rate": 0.0014663348237054991, "loss": 0.2206, "step": 35828 }, { "epoch": 0.06352955152533868, "grad_norm": 0.318359375, "learning_rate": 0.001466280457921364, "loss": 0.1573, "step": 35830 }, { "epoch": 0.0635330976906485, "grad_norm": 0.578125, "learning_rate": 0.0014662260905353305, "loss": 0.1895, "step": 35832 }, { "epoch": 0.06353664385595831, "grad_norm": 0.458984375, "learning_rate": 0.0014661717215476368, "loss": 0.2424, "step": 35834 }, { "epoch": 0.06354019002126812, "grad_norm": 0.302734375, "learning_rate": 0.0014661173509585208, "loss": 0.1796, "step": 35836 }, { "epoch": 0.06354373618657794, "grad_norm": 0.734375, "learning_rate": 0.0014660629787682203, "loss": 0.2547, "step": 35838 }, { "epoch": 0.06354728235188775, "grad_norm": 0.80078125, "learning_rate": 0.0014660086049769728, "loss": 0.3369, "step": 35840 }, { "epoch": 0.06355082851719757, "grad_norm": 0.302734375, "learning_rate": 0.0014659542295850164, "loss": 0.195, "step": 35842 }, { "epoch": 0.06355437468250738, "grad_norm": 0.30859375, "learning_rate": 0.0014658998525925884, "loss": 0.2022, "step": 35844 }, { "epoch": 0.06355792084781721, "grad_norm": 0.46484375, "learning_rate": 0.001465845473999927, "loss": 0.1853, "step": 35846 }, { "epoch": 0.06356146701312702, "grad_norm": 0.76171875, "learning_rate": 0.0014657910938072705, "loss": 0.226, "step": 35848 }, { "epoch": 0.06356501317843684, "grad_norm": 0.5078125, "learning_rate": 0.001465736712014856, "loss": 0.1819, "step": 35850 }, { "epoch": 0.06356855934374665, "grad_norm": 0.365234375, "learning_rate": 0.0014656823286229217, "loss": 0.2088, "step": 35852 }, { "epoch": 0.06357210550905647, "grad_norm": 0.84765625, "learning_rate": 0.001465627943631705, "loss": 0.1773, "step": 35854 }, { "epoch": 0.06357565167436628, "grad_norm": 0.7421875, "learning_rate": 0.0014655735570414444, "loss": 0.2128, "step": 35856 }, { "epoch": 0.0635791978396761, "grad_norm": 1.0390625, "learning_rate": 0.001465519168852377, "loss": 0.2544, "step": 35858 }, { "epoch": 0.06358274400498591, "grad_norm": 0.443359375, "learning_rate": 0.0014654647790647412, "loss": 0.2245, "step": 35860 }, { "epoch": 0.06358629017029573, "grad_norm": 0.23828125, "learning_rate": 0.001465410387678775, "loss": 0.2134, "step": 35862 }, { "epoch": 0.06358983633560554, "grad_norm": 0.66796875, "learning_rate": 0.0014653559946947157, "loss": 0.2174, "step": 35864 }, { "epoch": 0.06359338250091535, "grad_norm": 0.66796875, "learning_rate": 0.0014653016001128015, "loss": 0.2381, "step": 35866 }, { "epoch": 0.06359692866622517, "grad_norm": 0.404296875, "learning_rate": 0.0014652472039332707, "loss": 0.1995, "step": 35868 }, { "epoch": 0.06360047483153498, "grad_norm": 0.43359375, "learning_rate": 0.0014651928061563603, "loss": 0.1919, "step": 35870 }, { "epoch": 0.0636040209968448, "grad_norm": 0.6875, "learning_rate": 0.001465138406782309, "loss": 0.389, "step": 35872 }, { "epoch": 0.06360756716215461, "grad_norm": 3.421875, "learning_rate": 0.0014650840058113544, "loss": 0.3533, "step": 35874 }, { "epoch": 0.06361111332746443, "grad_norm": 0.421875, "learning_rate": 0.0014650296032437345, "loss": 0.3744, "step": 35876 }, { "epoch": 0.06361465949277424, "grad_norm": 0.66015625, "learning_rate": 0.0014649751990796869, "loss": 0.1846, "step": 35878 }, { "epoch": 0.06361820565808406, "grad_norm": 1.296875, "learning_rate": 0.00146492079331945, "loss": 0.2693, "step": 35880 }, { "epoch": 0.06362175182339387, "grad_norm": 0.6328125, "learning_rate": 0.0014648663859632611, "loss": 0.1789, "step": 35882 }, { "epoch": 0.06362529798870369, "grad_norm": 0.30859375, "learning_rate": 0.001464811977011359, "loss": 0.2304, "step": 35884 }, { "epoch": 0.0636288441540135, "grad_norm": 0.859375, "learning_rate": 0.0014647575664639809, "loss": 0.2338, "step": 35886 }, { "epoch": 0.06363239031932331, "grad_norm": 0.1845703125, "learning_rate": 0.0014647031543213653, "loss": 0.1566, "step": 35888 }, { "epoch": 0.06363593648463313, "grad_norm": 0.45703125, "learning_rate": 0.0014646487405837502, "loss": 0.2675, "step": 35890 }, { "epoch": 0.06363948264994296, "grad_norm": 1.7109375, "learning_rate": 0.0014645943252513732, "loss": 0.232, "step": 35892 }, { "epoch": 0.06364302881525277, "grad_norm": 0.5859375, "learning_rate": 0.0014645399083244722, "loss": 0.1899, "step": 35894 }, { "epoch": 0.06364657498056259, "grad_norm": 0.68359375, "learning_rate": 0.0014644854898032853, "loss": 0.2973, "step": 35896 }, { "epoch": 0.0636501211458724, "grad_norm": 0.251953125, "learning_rate": 0.0014644310696880507, "loss": 0.1688, "step": 35898 }, { "epoch": 0.06365366731118222, "grad_norm": 0.9609375, "learning_rate": 0.0014643766479790065, "loss": 0.1542, "step": 35900 }, { "epoch": 0.06365721347649203, "grad_norm": 0.384765625, "learning_rate": 0.00146432222467639, "loss": 0.1585, "step": 35902 }, { "epoch": 0.06366075964180185, "grad_norm": 1.1171875, "learning_rate": 0.00146426779978044, "loss": 0.187, "step": 35904 }, { "epoch": 0.06366430580711166, "grad_norm": 5.90625, "learning_rate": 0.0014642133732913943, "loss": 0.2861, "step": 35906 }, { "epoch": 0.06366785197242147, "grad_norm": 0.84375, "learning_rate": 0.0014641589452094909, "loss": 0.2033, "step": 35908 }, { "epoch": 0.06367139813773129, "grad_norm": 0.55078125, "learning_rate": 0.001464104515534968, "loss": 0.4428, "step": 35910 }, { "epoch": 0.0636749443030411, "grad_norm": 0.39453125, "learning_rate": 0.0014640500842680632, "loss": 0.1882, "step": 35912 }, { "epoch": 0.06367849046835092, "grad_norm": 0.259765625, "learning_rate": 0.0014639956514090145, "loss": 0.3606, "step": 35914 }, { "epoch": 0.06368203663366073, "grad_norm": 0.474609375, "learning_rate": 0.0014639412169580605, "loss": 0.2765, "step": 35916 }, { "epoch": 0.06368558279897055, "grad_norm": 0.333984375, "learning_rate": 0.001463886780915439, "loss": 0.1718, "step": 35918 }, { "epoch": 0.06368912896428036, "grad_norm": 3.015625, "learning_rate": 0.0014638323432813885, "loss": 0.2021, "step": 35920 }, { "epoch": 0.06369267512959018, "grad_norm": 0.33203125, "learning_rate": 0.001463777904056146, "loss": 0.1819, "step": 35922 }, { "epoch": 0.06369622129489999, "grad_norm": 0.53125, "learning_rate": 0.0014637234632399508, "loss": 0.1789, "step": 35924 }, { "epoch": 0.0636997674602098, "grad_norm": 0.267578125, "learning_rate": 0.00146366902083304, "loss": 0.2096, "step": 35926 }, { "epoch": 0.06370331362551962, "grad_norm": 3.53125, "learning_rate": 0.0014636145768356526, "loss": 0.3308, "step": 35928 }, { "epoch": 0.06370685979082943, "grad_norm": 0.45703125, "learning_rate": 0.001463560131248026, "loss": 0.2012, "step": 35930 }, { "epoch": 0.06371040595613925, "grad_norm": 1.4296875, "learning_rate": 0.0014635056840703983, "loss": 0.3634, "step": 35932 }, { "epoch": 0.06371395212144906, "grad_norm": 0.279296875, "learning_rate": 0.0014634512353030084, "loss": 0.1549, "step": 35934 }, { "epoch": 0.06371749828675888, "grad_norm": 0.4453125, "learning_rate": 0.0014633967849460936, "loss": 0.1574, "step": 35936 }, { "epoch": 0.0637210444520687, "grad_norm": 0.28515625, "learning_rate": 0.0014633423329998923, "loss": 0.1861, "step": 35938 }, { "epoch": 0.06372459061737852, "grad_norm": 0.2578125, "learning_rate": 0.0014632878794646428, "loss": 0.2551, "step": 35940 }, { "epoch": 0.06372813678268834, "grad_norm": 0.322265625, "learning_rate": 0.001463233424340583, "loss": 0.2012, "step": 35942 }, { "epoch": 0.06373168294799815, "grad_norm": 0.8046875, "learning_rate": 0.0014631789676279516, "loss": 0.2697, "step": 35944 }, { "epoch": 0.06373522911330796, "grad_norm": 0.6796875, "learning_rate": 0.0014631245093269863, "loss": 0.2294, "step": 35946 }, { "epoch": 0.06373877527861778, "grad_norm": 0.271484375, "learning_rate": 0.001463070049437925, "loss": 0.1541, "step": 35948 }, { "epoch": 0.0637423214439276, "grad_norm": 0.474609375, "learning_rate": 0.0014630155879610063, "loss": 0.1846, "step": 35950 }, { "epoch": 0.06374586760923741, "grad_norm": 0.5859375, "learning_rate": 0.001462961124896468, "loss": 0.1733, "step": 35952 }, { "epoch": 0.06374941377454722, "grad_norm": 1.0390625, "learning_rate": 0.0014629066602445493, "loss": 0.2277, "step": 35954 }, { "epoch": 0.06375295993985704, "grad_norm": 0.2021484375, "learning_rate": 0.0014628521940054872, "loss": 0.3299, "step": 35956 }, { "epoch": 0.06375650610516685, "grad_norm": 0.34765625, "learning_rate": 0.0014627977261795204, "loss": 0.2162, "step": 35958 }, { "epoch": 0.06376005227047667, "grad_norm": 0.462890625, "learning_rate": 0.001462743256766887, "loss": 0.2071, "step": 35960 }, { "epoch": 0.06376359843578648, "grad_norm": 0.6796875, "learning_rate": 0.0014626887857678253, "loss": 0.318, "step": 35962 }, { "epoch": 0.0637671446010963, "grad_norm": 0.31640625, "learning_rate": 0.0014626343131825738, "loss": 0.1398, "step": 35964 }, { "epoch": 0.06377069076640611, "grad_norm": 0.703125, "learning_rate": 0.0014625798390113705, "loss": 0.1872, "step": 35966 }, { "epoch": 0.06377423693171592, "grad_norm": 2.4375, "learning_rate": 0.0014625253632544533, "loss": 0.542, "step": 35968 }, { "epoch": 0.06377778309702574, "grad_norm": 0.5390625, "learning_rate": 0.0014624708859120605, "loss": 0.2541, "step": 35970 }, { "epoch": 0.06378132926233555, "grad_norm": 0.271484375, "learning_rate": 0.0014624164069844312, "loss": 0.1689, "step": 35972 }, { "epoch": 0.06378487542764537, "grad_norm": 0.4765625, "learning_rate": 0.0014623619264718026, "loss": 0.1664, "step": 35974 }, { "epoch": 0.06378842159295518, "grad_norm": 0.29296875, "learning_rate": 0.0014623074443744135, "loss": 0.2016, "step": 35976 }, { "epoch": 0.063791967758265, "grad_norm": 0.162109375, "learning_rate": 0.0014622529606925024, "loss": 0.1547, "step": 35978 }, { "epoch": 0.06379551392357481, "grad_norm": 0.55078125, "learning_rate": 0.0014621984754263069, "loss": 0.1702, "step": 35980 }, { "epoch": 0.06379906008888464, "grad_norm": 0.609375, "learning_rate": 0.0014621439885760658, "loss": 0.2038, "step": 35982 }, { "epoch": 0.06380260625419445, "grad_norm": 0.376953125, "learning_rate": 0.0014620895001420174, "loss": 0.2626, "step": 35984 }, { "epoch": 0.06380615241950427, "grad_norm": 0.36328125, "learning_rate": 0.0014620350101243998, "loss": 0.1908, "step": 35986 }, { "epoch": 0.06380969858481408, "grad_norm": 0.62109375, "learning_rate": 0.0014619805185234515, "loss": 0.1927, "step": 35988 }, { "epoch": 0.0638132447501239, "grad_norm": 0.87890625, "learning_rate": 0.0014619260253394104, "loss": 0.1678, "step": 35990 }, { "epoch": 0.06381679091543371, "grad_norm": 0.283203125, "learning_rate": 0.001461871530572515, "loss": 0.1677, "step": 35992 }, { "epoch": 0.06382033708074353, "grad_norm": 0.345703125, "learning_rate": 0.001461817034223004, "loss": 0.1877, "step": 35994 }, { "epoch": 0.06382388324605334, "grad_norm": 0.451171875, "learning_rate": 0.0014617625362911152, "loss": 0.1669, "step": 35996 }, { "epoch": 0.06382742941136316, "grad_norm": 0.53515625, "learning_rate": 0.0014617080367770874, "loss": 0.2157, "step": 35998 }, { "epoch": 0.06383097557667297, "grad_norm": 0.2373046875, "learning_rate": 0.0014616535356811585, "loss": 0.2111, "step": 36000 }, { "epoch": 0.06383452174198279, "grad_norm": 0.55859375, "learning_rate": 0.0014615990330035674, "loss": 0.1653, "step": 36002 }, { "epoch": 0.0638380679072926, "grad_norm": 0.578125, "learning_rate": 0.0014615445287445522, "loss": 0.1851, "step": 36004 }, { "epoch": 0.06384161407260242, "grad_norm": 0.96484375, "learning_rate": 0.0014614900229043511, "loss": 0.1844, "step": 36006 }, { "epoch": 0.06384516023791223, "grad_norm": 0.423828125, "learning_rate": 0.0014614355154832024, "loss": 0.1638, "step": 36008 }, { "epoch": 0.06384870640322204, "grad_norm": 0.64453125, "learning_rate": 0.0014613810064813453, "loss": 0.1899, "step": 36010 }, { "epoch": 0.06385225256853186, "grad_norm": 0.9453125, "learning_rate": 0.001461326495899017, "loss": 0.2314, "step": 36012 }, { "epoch": 0.06385579873384167, "grad_norm": 0.875, "learning_rate": 0.0014612719837364565, "loss": 0.1954, "step": 36014 }, { "epoch": 0.06385934489915149, "grad_norm": 0.291015625, "learning_rate": 0.0014612174699939024, "loss": 0.164, "step": 36016 }, { "epoch": 0.0638628910644613, "grad_norm": 0.451171875, "learning_rate": 0.0014611629546715933, "loss": 0.2117, "step": 36018 }, { "epoch": 0.06386643722977112, "grad_norm": 0.326171875, "learning_rate": 0.0014611084377697669, "loss": 0.2418, "step": 36020 }, { "epoch": 0.06386998339508093, "grad_norm": 0.90234375, "learning_rate": 0.0014610539192886616, "loss": 0.226, "step": 36022 }, { "epoch": 0.06387352956039075, "grad_norm": 0.275390625, "learning_rate": 0.0014609993992285165, "loss": 0.1898, "step": 36024 }, { "epoch": 0.06387707572570056, "grad_norm": 0.3671875, "learning_rate": 0.0014609448775895696, "loss": 0.1493, "step": 36026 }, { "epoch": 0.06388062189101039, "grad_norm": 0.408203125, "learning_rate": 0.0014608903543720598, "loss": 0.1625, "step": 36028 }, { "epoch": 0.0638841680563202, "grad_norm": 0.1953125, "learning_rate": 0.0014608358295762247, "loss": 0.1805, "step": 36030 }, { "epoch": 0.06388771422163002, "grad_norm": 0.390625, "learning_rate": 0.0014607813032023037, "loss": 0.1854, "step": 36032 }, { "epoch": 0.06389126038693983, "grad_norm": 0.333984375, "learning_rate": 0.0014607267752505346, "loss": 0.1879, "step": 36034 }, { "epoch": 0.06389480655224965, "grad_norm": 1.140625, "learning_rate": 0.0014606722457211564, "loss": 0.2819, "step": 36036 }, { "epoch": 0.06389835271755946, "grad_norm": 0.375, "learning_rate": 0.0014606177146144074, "loss": 0.2314, "step": 36038 }, { "epoch": 0.06390189888286928, "grad_norm": 1.7734375, "learning_rate": 0.0014605631819305253, "loss": 0.1631, "step": 36040 }, { "epoch": 0.06390544504817909, "grad_norm": 0.55859375, "learning_rate": 0.00146050864766975, "loss": 0.2352, "step": 36042 }, { "epoch": 0.0639089912134889, "grad_norm": 0.96484375, "learning_rate": 0.001460454111832319, "loss": 0.2413, "step": 36044 }, { "epoch": 0.06391253737879872, "grad_norm": 0.5, "learning_rate": 0.0014603995744184712, "loss": 0.2004, "step": 36046 }, { "epoch": 0.06391608354410853, "grad_norm": 2.625, "learning_rate": 0.0014603450354284447, "loss": 0.2188, "step": 36048 }, { "epoch": 0.06391962970941835, "grad_norm": 0.2197265625, "learning_rate": 0.0014602904948624784, "loss": 0.1695, "step": 36050 }, { "epoch": 0.06392317587472816, "grad_norm": 0.490234375, "learning_rate": 0.001460235952720811, "loss": 0.199, "step": 36052 }, { "epoch": 0.06392672204003798, "grad_norm": 0.416015625, "learning_rate": 0.0014601814090036804, "loss": 0.1401, "step": 36054 }, { "epoch": 0.06393026820534779, "grad_norm": 0.322265625, "learning_rate": 0.001460126863711326, "loss": 0.3583, "step": 36056 }, { "epoch": 0.06393381437065761, "grad_norm": 0.4296875, "learning_rate": 0.0014600723168439855, "loss": 0.1938, "step": 36058 }, { "epoch": 0.06393736053596742, "grad_norm": 0.298828125, "learning_rate": 0.0014600177684018982, "loss": 0.1683, "step": 36060 }, { "epoch": 0.06394090670127724, "grad_norm": 0.69140625, "learning_rate": 0.001459963218385302, "loss": 0.3138, "step": 36062 }, { "epoch": 0.06394445286658705, "grad_norm": 0.69140625, "learning_rate": 0.0014599086667944359, "loss": 0.4388, "step": 36064 }, { "epoch": 0.06394799903189687, "grad_norm": 0.39453125, "learning_rate": 0.0014598541136295385, "loss": 0.1993, "step": 36066 }, { "epoch": 0.06395154519720668, "grad_norm": 0.71484375, "learning_rate": 0.001459799558890848, "loss": 0.2054, "step": 36068 }, { "epoch": 0.0639550913625165, "grad_norm": 0.265625, "learning_rate": 0.0014597450025786033, "loss": 0.2158, "step": 36070 }, { "epoch": 0.06395863752782631, "grad_norm": 0.22265625, "learning_rate": 0.0014596904446930428, "loss": 0.1789, "step": 36072 }, { "epoch": 0.06396218369313614, "grad_norm": 0.60546875, "learning_rate": 0.0014596358852344055, "loss": 0.1776, "step": 36074 }, { "epoch": 0.06396572985844595, "grad_norm": 0.486328125, "learning_rate": 0.0014595813242029296, "loss": 0.2003, "step": 36076 }, { "epoch": 0.06396927602375577, "grad_norm": 0.421875, "learning_rate": 0.0014595267615988541, "loss": 0.1757, "step": 36078 }, { "epoch": 0.06397282218906558, "grad_norm": 1.078125, "learning_rate": 0.001459472197422417, "loss": 0.2055, "step": 36080 }, { "epoch": 0.0639763683543754, "grad_norm": 1.859375, "learning_rate": 0.0014594176316738575, "loss": 0.2553, "step": 36082 }, { "epoch": 0.06397991451968521, "grad_norm": 0.380859375, "learning_rate": 0.0014593630643534139, "loss": 0.2045, "step": 36084 }, { "epoch": 0.06398346068499502, "grad_norm": 6.90625, "learning_rate": 0.0014593084954613248, "loss": 0.2533, "step": 36086 }, { "epoch": 0.06398700685030484, "grad_norm": 1.3203125, "learning_rate": 0.0014592539249978298, "loss": 0.2083, "step": 36088 }, { "epoch": 0.06399055301561465, "grad_norm": 0.478515625, "learning_rate": 0.0014591993529631663, "loss": 0.1815, "step": 36090 }, { "epoch": 0.06399409918092447, "grad_norm": 0.48046875, "learning_rate": 0.0014591447793575736, "loss": 0.2067, "step": 36092 }, { "epoch": 0.06399764534623428, "grad_norm": 1.4921875, "learning_rate": 0.00145909020418129, "loss": 0.2523, "step": 36094 }, { "epoch": 0.0640011915115441, "grad_norm": 0.345703125, "learning_rate": 0.0014590356274345546, "loss": 0.1845, "step": 36096 }, { "epoch": 0.06400473767685391, "grad_norm": 0.2890625, "learning_rate": 0.001458981049117606, "loss": 0.4264, "step": 36098 }, { "epoch": 0.06400828384216373, "grad_norm": 0.8046875, "learning_rate": 0.0014589264692306827, "loss": 0.1963, "step": 36100 }, { "epoch": 0.06401183000747354, "grad_norm": 0.48828125, "learning_rate": 0.0014588718877740238, "loss": 0.1778, "step": 36102 }, { "epoch": 0.06401537617278336, "grad_norm": 0.421875, "learning_rate": 0.0014588173047478672, "loss": 0.1896, "step": 36104 }, { "epoch": 0.06401892233809317, "grad_norm": 1.5625, "learning_rate": 0.0014587627201524528, "loss": 0.2993, "step": 36106 }, { "epoch": 0.06402246850340299, "grad_norm": 0.494140625, "learning_rate": 0.0014587081339880181, "loss": 0.1684, "step": 36108 }, { "epoch": 0.0640260146687128, "grad_norm": 0.65234375, "learning_rate": 0.0014586535462548025, "loss": 0.2066, "step": 36110 }, { "epoch": 0.06402956083402261, "grad_norm": 0.369140625, "learning_rate": 0.0014585989569530448, "loss": 0.1718, "step": 36112 }, { "epoch": 0.06403310699933243, "grad_norm": 0.8046875, "learning_rate": 0.0014585443660829836, "loss": 0.2311, "step": 36114 }, { "epoch": 0.06403665316464224, "grad_norm": 1.0546875, "learning_rate": 0.0014584897736448575, "loss": 0.2476, "step": 36116 }, { "epoch": 0.06404019932995206, "grad_norm": 0.55078125, "learning_rate": 0.0014584351796389052, "loss": 0.2361, "step": 36118 }, { "epoch": 0.06404374549526189, "grad_norm": 0.345703125, "learning_rate": 0.001458380584065366, "loss": 0.1962, "step": 36120 }, { "epoch": 0.0640472916605717, "grad_norm": 0.47265625, "learning_rate": 0.0014583259869244778, "loss": 0.1581, "step": 36122 }, { "epoch": 0.06405083782588152, "grad_norm": 0.5546875, "learning_rate": 0.00145827138821648, "loss": 0.2152, "step": 36124 }, { "epoch": 0.06405438399119133, "grad_norm": 0.361328125, "learning_rate": 0.0014582167879416115, "loss": 0.1623, "step": 36126 }, { "epoch": 0.06405793015650114, "grad_norm": 0.84375, "learning_rate": 0.0014581621861001106, "loss": 0.2034, "step": 36128 }, { "epoch": 0.06406147632181096, "grad_norm": 1.0234375, "learning_rate": 0.0014581075826922165, "loss": 0.189, "step": 36130 }, { "epoch": 0.06406502248712077, "grad_norm": 0.85546875, "learning_rate": 0.001458052977718168, "loss": 0.1858, "step": 36132 }, { "epoch": 0.06406856865243059, "grad_norm": 0.2216796875, "learning_rate": 0.001457998371178204, "loss": 0.1899, "step": 36134 }, { "epoch": 0.0640721148177404, "grad_norm": 0.376953125, "learning_rate": 0.0014579437630725623, "loss": 0.1974, "step": 36136 }, { "epoch": 0.06407566098305022, "grad_norm": 0.376953125, "learning_rate": 0.001457889153401483, "loss": 0.1831, "step": 36138 }, { "epoch": 0.06407920714836003, "grad_norm": 1.3828125, "learning_rate": 0.001457834542165204, "loss": 0.2341, "step": 36140 }, { "epoch": 0.06408275331366985, "grad_norm": 0.39453125, "learning_rate": 0.0014577799293639649, "loss": 0.1707, "step": 36142 }, { "epoch": 0.06408629947897966, "grad_norm": 0.31640625, "learning_rate": 0.001457725314998004, "loss": 0.2103, "step": 36144 }, { "epoch": 0.06408984564428948, "grad_norm": 0.41796875, "learning_rate": 0.0014576706990675609, "loss": 0.1736, "step": 36146 }, { "epoch": 0.06409339180959929, "grad_norm": 0.2373046875, "learning_rate": 0.0014576160815728731, "loss": 0.1757, "step": 36148 }, { "epoch": 0.0640969379749091, "grad_norm": 1.4609375, "learning_rate": 0.0014575614625141808, "loss": 0.201, "step": 36150 }, { "epoch": 0.06410048414021892, "grad_norm": 0.41796875, "learning_rate": 0.0014575068418917225, "loss": 0.155, "step": 36152 }, { "epoch": 0.06410403030552873, "grad_norm": 0.78515625, "learning_rate": 0.001457452219705737, "loss": 0.2339, "step": 36154 }, { "epoch": 0.06410757647083855, "grad_norm": 0.7109375, "learning_rate": 0.0014573975959564628, "loss": 0.2077, "step": 36156 }, { "epoch": 0.06411112263614836, "grad_norm": 0.4296875, "learning_rate": 0.0014573429706441392, "loss": 0.1272, "step": 36158 }, { "epoch": 0.06411466880145818, "grad_norm": 0.46484375, "learning_rate": 0.0014572883437690045, "loss": 0.2097, "step": 36160 }, { "epoch": 0.06411821496676799, "grad_norm": 0.306640625, "learning_rate": 0.0014572337153312988, "loss": 0.2857, "step": 36162 }, { "epoch": 0.06412176113207782, "grad_norm": 0.36328125, "learning_rate": 0.0014571790853312601, "loss": 0.2497, "step": 36164 }, { "epoch": 0.06412530729738763, "grad_norm": 0.3125, "learning_rate": 0.0014571244537691277, "loss": 0.1233, "step": 36166 }, { "epoch": 0.06412885346269745, "grad_norm": 0.51953125, "learning_rate": 0.0014570698206451402, "loss": 0.2454, "step": 36168 }, { "epoch": 0.06413239962800726, "grad_norm": 0.291015625, "learning_rate": 0.001457015185959537, "loss": 0.2182, "step": 36170 }, { "epoch": 0.06413594579331708, "grad_norm": 0.7734375, "learning_rate": 0.0014569605497125565, "loss": 0.2086, "step": 36172 }, { "epoch": 0.06413949195862689, "grad_norm": 0.40625, "learning_rate": 0.0014569059119044381, "loss": 0.2193, "step": 36174 }, { "epoch": 0.06414303812393671, "grad_norm": 0.365234375, "learning_rate": 0.0014568512725354203, "loss": 0.2536, "step": 36176 }, { "epoch": 0.06414658428924652, "grad_norm": 0.46484375, "learning_rate": 0.0014567966316057423, "loss": 0.2184, "step": 36178 }, { "epoch": 0.06415013045455634, "grad_norm": 0.314453125, "learning_rate": 0.0014567419891156432, "loss": 0.1838, "step": 36180 }, { "epoch": 0.06415367661986615, "grad_norm": 0.234375, "learning_rate": 0.0014566873450653618, "loss": 0.1881, "step": 36182 }, { "epoch": 0.06415722278517597, "grad_norm": 0.43359375, "learning_rate": 0.0014566326994551373, "loss": 0.2014, "step": 36184 }, { "epoch": 0.06416076895048578, "grad_norm": 0.55859375, "learning_rate": 0.0014565780522852085, "loss": 0.1902, "step": 36186 }, { "epoch": 0.0641643151157956, "grad_norm": 0.71875, "learning_rate": 0.0014565234035558143, "loss": 0.1709, "step": 36188 }, { "epoch": 0.06416786128110541, "grad_norm": 0.361328125, "learning_rate": 0.001456468753267194, "loss": 0.2233, "step": 36190 }, { "epoch": 0.06417140744641522, "grad_norm": 0.30078125, "learning_rate": 0.0014564141014195863, "loss": 0.1721, "step": 36192 }, { "epoch": 0.06417495361172504, "grad_norm": 0.251953125, "learning_rate": 0.0014563594480132302, "loss": 0.1611, "step": 36194 }, { "epoch": 0.06417849977703485, "grad_norm": 0.52734375, "learning_rate": 0.0014563047930483655, "loss": 0.4987, "step": 36196 }, { "epoch": 0.06418204594234467, "grad_norm": 0.98828125, "learning_rate": 0.0014562501365252298, "loss": 0.1836, "step": 36198 }, { "epoch": 0.06418559210765448, "grad_norm": 0.388671875, "learning_rate": 0.0014561954784440636, "loss": 0.1689, "step": 36200 }, { "epoch": 0.0641891382729643, "grad_norm": 1.4765625, "learning_rate": 0.0014561408188051047, "loss": 0.2725, "step": 36202 }, { "epoch": 0.06419268443827411, "grad_norm": 0.458984375, "learning_rate": 0.0014560861576085934, "loss": 0.1701, "step": 36204 }, { "epoch": 0.06419623060358393, "grad_norm": 0.98828125, "learning_rate": 0.0014560314948547674, "loss": 0.1656, "step": 36206 }, { "epoch": 0.06419977676889374, "grad_norm": 0.1923828125, "learning_rate": 0.0014559768305438669, "loss": 0.1546, "step": 36208 }, { "epoch": 0.06420332293420357, "grad_norm": 0.32421875, "learning_rate": 0.0014559221646761303, "loss": 0.1867, "step": 36210 }, { "epoch": 0.06420686909951338, "grad_norm": 0.337890625, "learning_rate": 0.0014558674972517968, "loss": 0.1567, "step": 36212 }, { "epoch": 0.0642104152648232, "grad_norm": 0.419921875, "learning_rate": 0.001455812828271106, "loss": 0.3284, "step": 36214 }, { "epoch": 0.06421396143013301, "grad_norm": 0.6875, "learning_rate": 0.0014557581577342962, "loss": 0.2036, "step": 36216 }, { "epoch": 0.06421750759544283, "grad_norm": 0.392578125, "learning_rate": 0.0014557034856416068, "loss": 0.1953, "step": 36218 }, { "epoch": 0.06422105376075264, "grad_norm": 0.55859375, "learning_rate": 0.0014556488119932772, "loss": 0.2172, "step": 36220 }, { "epoch": 0.06422459992606246, "grad_norm": 0.2333984375, "learning_rate": 0.0014555941367895464, "loss": 0.2309, "step": 36222 }, { "epoch": 0.06422814609137227, "grad_norm": 0.3046875, "learning_rate": 0.0014555394600306532, "loss": 0.1579, "step": 36224 }, { "epoch": 0.06423169225668209, "grad_norm": 0.55078125, "learning_rate": 0.001455484781716837, "loss": 0.1974, "step": 36226 }, { "epoch": 0.0642352384219919, "grad_norm": 0.439453125, "learning_rate": 0.0014554301018483368, "loss": 0.129, "step": 36228 }, { "epoch": 0.06423878458730171, "grad_norm": 0.578125, "learning_rate": 0.0014553754204253916, "loss": 0.2316, "step": 36230 }, { "epoch": 0.06424233075261153, "grad_norm": 0.19140625, "learning_rate": 0.0014553207374482411, "loss": 0.2083, "step": 36232 }, { "epoch": 0.06424587691792134, "grad_norm": 0.7890625, "learning_rate": 0.001455266052917124, "loss": 0.2225, "step": 36234 }, { "epoch": 0.06424942308323116, "grad_norm": 0.451171875, "learning_rate": 0.0014552113668322795, "loss": 0.1507, "step": 36236 }, { "epoch": 0.06425296924854097, "grad_norm": 0.29296875, "learning_rate": 0.0014551566791939465, "loss": 0.1725, "step": 36238 }, { "epoch": 0.06425651541385079, "grad_norm": 0.39453125, "learning_rate": 0.001455101990002365, "loss": 0.32, "step": 36240 }, { "epoch": 0.0642600615791606, "grad_norm": 0.91015625, "learning_rate": 0.0014550472992577732, "loss": 0.2742, "step": 36242 }, { "epoch": 0.06426360774447042, "grad_norm": 5.21875, "learning_rate": 0.0014549926069604112, "loss": 0.3501, "step": 36244 }, { "epoch": 0.06426715390978023, "grad_norm": 0.86328125, "learning_rate": 0.0014549379131105176, "loss": 0.1706, "step": 36246 }, { "epoch": 0.06427070007509005, "grad_norm": 7.4375, "learning_rate": 0.0014548832177083317, "loss": 0.1495, "step": 36248 }, { "epoch": 0.06427424624039986, "grad_norm": 0.357421875, "learning_rate": 0.0014548285207540929, "loss": 0.2269, "step": 36250 }, { "epoch": 0.06427779240570967, "grad_norm": 0.388671875, "learning_rate": 0.0014547738222480398, "loss": 0.1996, "step": 36252 }, { "epoch": 0.06428133857101949, "grad_norm": 0.498046875, "learning_rate": 0.0014547191221904125, "loss": 0.1657, "step": 36254 }, { "epoch": 0.06428488473632932, "grad_norm": 0.5703125, "learning_rate": 0.0014546644205814495, "loss": 0.2608, "step": 36256 }, { "epoch": 0.06428843090163913, "grad_norm": 0.443359375, "learning_rate": 0.0014546097174213907, "loss": 0.1732, "step": 36258 }, { "epoch": 0.06429197706694895, "grad_norm": 0.7109375, "learning_rate": 0.0014545550127104746, "loss": 0.1905, "step": 36260 }, { "epoch": 0.06429552323225876, "grad_norm": 0.240234375, "learning_rate": 0.0014545003064489407, "loss": 0.1904, "step": 36262 }, { "epoch": 0.06429906939756858, "grad_norm": 0.333984375, "learning_rate": 0.0014544455986370288, "loss": 0.1509, "step": 36264 }, { "epoch": 0.06430261556287839, "grad_norm": 0.306640625, "learning_rate": 0.0014543908892749777, "loss": 0.1942, "step": 36266 }, { "epoch": 0.0643061617281882, "grad_norm": 0.53125, "learning_rate": 0.0014543361783630265, "loss": 0.1972, "step": 36268 }, { "epoch": 0.06430970789349802, "grad_norm": 0.369140625, "learning_rate": 0.0014542814659014149, "loss": 0.1959, "step": 36270 }, { "epoch": 0.06431325405880783, "grad_norm": 1.4921875, "learning_rate": 0.0014542267518903819, "loss": 0.2879, "step": 36272 }, { "epoch": 0.06431680022411765, "grad_norm": 1.0625, "learning_rate": 0.0014541720363301666, "loss": 0.1796, "step": 36274 }, { "epoch": 0.06432034638942746, "grad_norm": 0.4453125, "learning_rate": 0.0014541173192210088, "loss": 0.1574, "step": 36276 }, { "epoch": 0.06432389255473728, "grad_norm": 0.294921875, "learning_rate": 0.0014540626005631477, "loss": 0.1249, "step": 36278 }, { "epoch": 0.06432743872004709, "grad_norm": 0.84375, "learning_rate": 0.0014540078803568222, "loss": 0.2021, "step": 36280 }, { "epoch": 0.0643309848853569, "grad_norm": 1.0625, "learning_rate": 0.001453953158602272, "loss": 0.1457, "step": 36282 }, { "epoch": 0.06433453105066672, "grad_norm": 0.3828125, "learning_rate": 0.001453898435299736, "loss": 0.2111, "step": 36284 }, { "epoch": 0.06433807721597654, "grad_norm": 0.236328125, "learning_rate": 0.001453843710449454, "loss": 0.188, "step": 36286 }, { "epoch": 0.06434162338128635, "grad_norm": 0.2451171875, "learning_rate": 0.0014537889840516652, "loss": 0.1982, "step": 36288 }, { "epoch": 0.06434516954659616, "grad_norm": 1.09375, "learning_rate": 0.0014537342561066087, "loss": 0.1983, "step": 36290 }, { "epoch": 0.06434871571190598, "grad_norm": 0.54296875, "learning_rate": 0.0014536795266145242, "loss": 0.2552, "step": 36292 }, { "epoch": 0.0643522618772158, "grad_norm": 0.49609375, "learning_rate": 0.0014536247955756509, "loss": 0.2157, "step": 36294 }, { "epoch": 0.06435580804252561, "grad_norm": 0.35546875, "learning_rate": 0.001453570062990228, "loss": 0.1916, "step": 36296 }, { "epoch": 0.06435935420783542, "grad_norm": 0.84375, "learning_rate": 0.0014535153288584952, "loss": 0.2325, "step": 36298 }, { "epoch": 0.06436290037314525, "grad_norm": 0.72265625, "learning_rate": 0.0014534605931806916, "loss": 0.1888, "step": 36300 }, { "epoch": 0.06436644653845507, "grad_norm": 0.2275390625, "learning_rate": 0.0014534058559570568, "loss": 0.1459, "step": 36302 }, { "epoch": 0.06436999270376488, "grad_norm": 0.412109375, "learning_rate": 0.00145335111718783, "loss": 0.3498, "step": 36304 }, { "epoch": 0.0643735388690747, "grad_norm": 0.7265625, "learning_rate": 0.0014532963768732505, "loss": 0.3083, "step": 36306 }, { "epoch": 0.06437708503438451, "grad_norm": 0.51953125, "learning_rate": 0.0014532416350135583, "loss": 0.1513, "step": 36308 }, { "epoch": 0.06438063119969432, "grad_norm": 0.80078125, "learning_rate": 0.001453186891608992, "loss": 0.1618, "step": 36310 }, { "epoch": 0.06438417736500414, "grad_norm": 0.251953125, "learning_rate": 0.0014531321466597913, "loss": 0.1669, "step": 36312 }, { "epoch": 0.06438772353031395, "grad_norm": 0.8984375, "learning_rate": 0.0014530774001661959, "loss": 0.4763, "step": 36314 }, { "epoch": 0.06439126969562377, "grad_norm": 0.2890625, "learning_rate": 0.001453022652128445, "loss": 0.1652, "step": 36316 }, { "epoch": 0.06439481586093358, "grad_norm": 0.478515625, "learning_rate": 0.0014529679025467783, "loss": 0.1722, "step": 36318 }, { "epoch": 0.0643983620262434, "grad_norm": 0.185546875, "learning_rate": 0.001452913151421435, "loss": 0.175, "step": 36320 }, { "epoch": 0.06440190819155321, "grad_norm": 1.40625, "learning_rate": 0.0014528583987526544, "loss": 0.4955, "step": 36322 }, { "epoch": 0.06440545435686303, "grad_norm": 0.283203125, "learning_rate": 0.001452803644540676, "loss": 0.21, "step": 36324 }, { "epoch": 0.06440900052217284, "grad_norm": 1.3203125, "learning_rate": 0.0014527488887857397, "loss": 0.1896, "step": 36326 }, { "epoch": 0.06441254668748266, "grad_norm": 3.234375, "learning_rate": 0.0014526941314880841, "loss": 0.3934, "step": 36328 }, { "epoch": 0.06441609285279247, "grad_norm": 0.41015625, "learning_rate": 0.00145263937264795, "loss": 0.1678, "step": 36330 }, { "epoch": 0.06441963901810228, "grad_norm": 0.341796875, "learning_rate": 0.0014525846122655755, "loss": 0.2592, "step": 36332 }, { "epoch": 0.0644231851834121, "grad_norm": 0.359375, "learning_rate": 0.001452529850341201, "loss": 0.1992, "step": 36334 }, { "epoch": 0.06442673134872191, "grad_norm": 0.67578125, "learning_rate": 0.0014524750868750657, "loss": 0.1976, "step": 36336 }, { "epoch": 0.06443027751403173, "grad_norm": 0.39453125, "learning_rate": 0.001452420321867409, "loss": 0.2053, "step": 36338 }, { "epoch": 0.06443382367934154, "grad_norm": 0.458984375, "learning_rate": 0.0014523655553184706, "loss": 0.2175, "step": 36340 }, { "epoch": 0.06443736984465136, "grad_norm": 0.234375, "learning_rate": 0.0014523107872284901, "loss": 0.1597, "step": 36342 }, { "epoch": 0.06444091600996117, "grad_norm": 2.109375, "learning_rate": 0.0014522560175977066, "loss": 0.2435, "step": 36344 }, { "epoch": 0.064444462175271, "grad_norm": 0.76171875, "learning_rate": 0.0014522012464263602, "loss": 0.2393, "step": 36346 }, { "epoch": 0.06444800834058081, "grad_norm": 0.48046875, "learning_rate": 0.0014521464737146897, "loss": 0.1559, "step": 36348 }, { "epoch": 0.06445155450589063, "grad_norm": 0.578125, "learning_rate": 0.0014520916994629352, "loss": 0.2315, "step": 36350 }, { "epoch": 0.06445510067120044, "grad_norm": 0.84375, "learning_rate": 0.0014520369236713365, "loss": 0.2131, "step": 36352 }, { "epoch": 0.06445864683651026, "grad_norm": 0.287109375, "learning_rate": 0.0014519821463401322, "loss": 0.168, "step": 36354 }, { "epoch": 0.06446219300182007, "grad_norm": 0.2578125, "learning_rate": 0.001451927367469563, "loss": 0.1601, "step": 36356 }, { "epoch": 0.06446573916712989, "grad_norm": 0.3125, "learning_rate": 0.0014518725870598676, "loss": 0.1772, "step": 36358 }, { "epoch": 0.0644692853324397, "grad_norm": 0.1630859375, "learning_rate": 0.001451817805111286, "loss": 0.1685, "step": 36360 }, { "epoch": 0.06447283149774952, "grad_norm": 0.8671875, "learning_rate": 0.0014517630216240576, "loss": 0.2324, "step": 36362 }, { "epoch": 0.06447637766305933, "grad_norm": 0.376953125, "learning_rate": 0.0014517082365984222, "loss": 0.1191, "step": 36364 }, { "epoch": 0.06447992382836915, "grad_norm": 0.67578125, "learning_rate": 0.0014516534500346192, "loss": 0.1405, "step": 36366 }, { "epoch": 0.06448346999367896, "grad_norm": 0.3203125, "learning_rate": 0.0014515986619328882, "loss": 0.1526, "step": 36368 }, { "epoch": 0.06448701615898877, "grad_norm": 0.30078125, "learning_rate": 0.0014515438722934688, "loss": 0.1906, "step": 36370 }, { "epoch": 0.06449056232429859, "grad_norm": 5.375, "learning_rate": 0.0014514890811166013, "loss": 0.2426, "step": 36372 }, { "epoch": 0.0644941084896084, "grad_norm": 0.2138671875, "learning_rate": 0.0014514342884025242, "loss": 0.1602, "step": 36374 }, { "epoch": 0.06449765465491822, "grad_norm": 0.515625, "learning_rate": 0.0014513794941514778, "loss": 0.2355, "step": 36376 }, { "epoch": 0.06450120082022803, "grad_norm": 0.341796875, "learning_rate": 0.0014513246983637017, "loss": 0.2262, "step": 36378 }, { "epoch": 0.06450474698553785, "grad_norm": 1.265625, "learning_rate": 0.0014512699010394352, "loss": 0.2276, "step": 36380 }, { "epoch": 0.06450829315084766, "grad_norm": 0.75390625, "learning_rate": 0.0014512151021789182, "loss": 0.1989, "step": 36382 }, { "epoch": 0.06451183931615748, "grad_norm": 0.54296875, "learning_rate": 0.0014511603017823905, "loss": 0.2097, "step": 36384 }, { "epoch": 0.06451538548146729, "grad_norm": 0.83984375, "learning_rate": 0.0014511054998500915, "loss": 0.2595, "step": 36386 }, { "epoch": 0.0645189316467771, "grad_norm": 0.59765625, "learning_rate": 0.001451050696382261, "loss": 0.1755, "step": 36388 }, { "epoch": 0.06452247781208692, "grad_norm": 1.453125, "learning_rate": 0.0014509958913791385, "loss": 0.2211, "step": 36390 }, { "epoch": 0.06452602397739675, "grad_norm": 0.44140625, "learning_rate": 0.0014509410848409642, "loss": 0.224, "step": 36392 }, { "epoch": 0.06452957014270656, "grad_norm": 0.44140625, "learning_rate": 0.0014508862767679776, "loss": 0.1818, "step": 36394 }, { "epoch": 0.06453311630801638, "grad_norm": 0.341796875, "learning_rate": 0.0014508314671604176, "loss": 0.2104, "step": 36396 }, { "epoch": 0.06453666247332619, "grad_norm": 4.53125, "learning_rate": 0.0014507766560185249, "loss": 0.1676, "step": 36398 }, { "epoch": 0.064540208638636, "grad_norm": 0.5625, "learning_rate": 0.0014507218433425391, "loss": 0.1774, "step": 36400 }, { "epoch": 0.06454375480394582, "grad_norm": 0.1796875, "learning_rate": 0.0014506670291326994, "loss": 0.1746, "step": 36402 }, { "epoch": 0.06454730096925564, "grad_norm": 0.296875, "learning_rate": 0.001450612213389246, "loss": 0.1791, "step": 36404 }, { "epoch": 0.06455084713456545, "grad_norm": 0.29296875, "learning_rate": 0.0014505573961124181, "loss": 0.1908, "step": 36406 }, { "epoch": 0.06455439329987527, "grad_norm": 0.4296875, "learning_rate": 0.0014505025773024558, "loss": 0.1711, "step": 36408 }, { "epoch": 0.06455793946518508, "grad_norm": 0.27734375, "learning_rate": 0.001450447756959599, "loss": 0.188, "step": 36410 }, { "epoch": 0.0645614856304949, "grad_norm": 0.80078125, "learning_rate": 0.0014503929350840874, "loss": 0.1908, "step": 36412 }, { "epoch": 0.06456503179580471, "grad_norm": 0.8125, "learning_rate": 0.0014503381116761607, "loss": 0.2488, "step": 36414 }, { "epoch": 0.06456857796111452, "grad_norm": 0.66015625, "learning_rate": 0.0014502832867360582, "loss": 0.2516, "step": 36416 }, { "epoch": 0.06457212412642434, "grad_norm": 0.46875, "learning_rate": 0.0014502284602640203, "loss": 0.2259, "step": 36418 }, { "epoch": 0.06457567029173415, "grad_norm": 0.279296875, "learning_rate": 0.0014501736322602868, "loss": 0.1394, "step": 36420 }, { "epoch": 0.06457921645704397, "grad_norm": 0.78125, "learning_rate": 0.0014501188027250967, "loss": 0.2057, "step": 36422 }, { "epoch": 0.06458276262235378, "grad_norm": 0.20703125, "learning_rate": 0.0014500639716586907, "loss": 0.2114, "step": 36424 }, { "epoch": 0.0645863087876636, "grad_norm": 0.259765625, "learning_rate": 0.0014500091390613082, "loss": 0.1846, "step": 36426 }, { "epoch": 0.06458985495297341, "grad_norm": 0.35546875, "learning_rate": 0.001449954304933189, "loss": 0.1537, "step": 36428 }, { "epoch": 0.06459340111828323, "grad_norm": 0.27734375, "learning_rate": 0.0014498994692745727, "loss": 0.1967, "step": 36430 }, { "epoch": 0.06459694728359304, "grad_norm": 0.59375, "learning_rate": 0.0014498446320856998, "loss": 0.2512, "step": 36432 }, { "epoch": 0.06460049344890285, "grad_norm": 0.2490234375, "learning_rate": 0.0014497897933668095, "loss": 0.1404, "step": 36434 }, { "epoch": 0.06460403961421268, "grad_norm": 0.984375, "learning_rate": 0.0014497349531181415, "loss": 0.196, "step": 36436 }, { "epoch": 0.0646075857795225, "grad_norm": 0.4140625, "learning_rate": 0.0014496801113399363, "loss": 0.2646, "step": 36438 }, { "epoch": 0.06461113194483231, "grad_norm": 0.283203125, "learning_rate": 0.0014496252680324332, "loss": 0.1575, "step": 36440 }, { "epoch": 0.06461467811014213, "grad_norm": 0.244140625, "learning_rate": 0.0014495704231958727, "loss": 0.1946, "step": 36442 }, { "epoch": 0.06461822427545194, "grad_norm": 2.046875, "learning_rate": 0.0014495155768304937, "loss": 0.2554, "step": 36444 }, { "epoch": 0.06462177044076176, "grad_norm": 0.41796875, "learning_rate": 0.001449460728936537, "loss": 0.2381, "step": 36446 }, { "epoch": 0.06462531660607157, "grad_norm": 1.5390625, "learning_rate": 0.001449405879514242, "loss": 0.2096, "step": 36448 }, { "epoch": 0.06462886277138138, "grad_norm": 0.396484375, "learning_rate": 0.0014493510285638488, "loss": 0.2699, "step": 36450 }, { "epoch": 0.0646324089366912, "grad_norm": 0.77734375, "learning_rate": 0.0014492961760855967, "loss": 0.1699, "step": 36452 }, { "epoch": 0.06463595510200101, "grad_norm": 0.67578125, "learning_rate": 0.0014492413220797262, "loss": 0.2214, "step": 36454 }, { "epoch": 0.06463950126731083, "grad_norm": 0.515625, "learning_rate": 0.001449186466546477, "loss": 0.1441, "step": 36456 }, { "epoch": 0.06464304743262064, "grad_norm": 0.7890625, "learning_rate": 0.0014491316094860892, "loss": 0.2654, "step": 36458 }, { "epoch": 0.06464659359793046, "grad_norm": 0.6484375, "learning_rate": 0.0014490767508988022, "loss": 0.3156, "step": 36460 }, { "epoch": 0.06465013976324027, "grad_norm": 0.3046875, "learning_rate": 0.0014490218907848567, "loss": 0.1824, "step": 36462 }, { "epoch": 0.06465368592855009, "grad_norm": 0.57421875, "learning_rate": 0.0014489670291444917, "loss": 0.2224, "step": 36464 }, { "epoch": 0.0646572320938599, "grad_norm": 0.54296875, "learning_rate": 0.0014489121659779479, "loss": 0.1638, "step": 36466 }, { "epoch": 0.06466077825916972, "grad_norm": 0.51171875, "learning_rate": 0.0014488573012854651, "loss": 0.1622, "step": 36468 }, { "epoch": 0.06466432442447953, "grad_norm": 0.2890625, "learning_rate": 0.0014488024350672829, "loss": 0.2092, "step": 36470 }, { "epoch": 0.06466787058978934, "grad_norm": 0.6171875, "learning_rate": 0.0014487475673236414, "loss": 0.1636, "step": 36472 }, { "epoch": 0.06467141675509916, "grad_norm": 2.296875, "learning_rate": 0.0014486926980547808, "loss": 0.2008, "step": 36474 }, { "epoch": 0.06467496292040897, "grad_norm": 0.392578125, "learning_rate": 0.0014486378272609405, "loss": 0.2171, "step": 36476 }, { "epoch": 0.06467850908571879, "grad_norm": 0.5625, "learning_rate": 0.0014485829549423612, "loss": 0.2024, "step": 36478 }, { "epoch": 0.0646820552510286, "grad_norm": 0.52734375, "learning_rate": 0.0014485280810992824, "loss": 0.2262, "step": 36480 }, { "epoch": 0.06468560141633843, "grad_norm": 0.2236328125, "learning_rate": 0.0014484732057319445, "loss": 0.169, "step": 36482 }, { "epoch": 0.06468914758164825, "grad_norm": 0.6328125, "learning_rate": 0.0014484183288405868, "loss": 0.2095, "step": 36484 }, { "epoch": 0.06469269374695806, "grad_norm": 0.62109375, "learning_rate": 0.0014483634504254502, "loss": 0.1546, "step": 36486 }, { "epoch": 0.06469623991226787, "grad_norm": 0.1826171875, "learning_rate": 0.001448308570486774, "loss": 0.1562, "step": 36488 }, { "epoch": 0.06469978607757769, "grad_norm": 0.33203125, "learning_rate": 0.0014482536890247984, "loss": 0.2207, "step": 36490 }, { "epoch": 0.0647033322428875, "grad_norm": 0.81640625, "learning_rate": 0.0014481988060397633, "loss": 0.1767, "step": 36492 }, { "epoch": 0.06470687840819732, "grad_norm": 10.8125, "learning_rate": 0.001448143921531909, "loss": 0.2334, "step": 36494 }, { "epoch": 0.06471042457350713, "grad_norm": 0.6171875, "learning_rate": 0.0014480890355014753, "loss": 0.2408, "step": 36496 }, { "epoch": 0.06471397073881695, "grad_norm": 0.203125, "learning_rate": 0.0014480341479487025, "loss": 0.2687, "step": 36498 }, { "epoch": 0.06471751690412676, "grad_norm": 0.447265625, "learning_rate": 0.0014479792588738304, "loss": 0.2146, "step": 36500 }, { "epoch": 0.06472106306943658, "grad_norm": 0.546875, "learning_rate": 0.0014479243682770993, "loss": 0.1985, "step": 36502 }, { "epoch": 0.06472460923474639, "grad_norm": 0.240234375, "learning_rate": 0.001447869476158749, "loss": 0.2189, "step": 36504 }, { "epoch": 0.0647281554000562, "grad_norm": 0.83203125, "learning_rate": 0.00144781458251902, "loss": 0.5075, "step": 36506 }, { "epoch": 0.06473170156536602, "grad_norm": 0.53125, "learning_rate": 0.0014477596873581518, "loss": 0.3733, "step": 36508 }, { "epoch": 0.06473524773067584, "grad_norm": 0.7578125, "learning_rate": 0.0014477047906763848, "loss": 0.1987, "step": 36510 }, { "epoch": 0.06473879389598565, "grad_norm": 0.466796875, "learning_rate": 0.0014476498924739587, "loss": 0.2054, "step": 36512 }, { "epoch": 0.06474234006129546, "grad_norm": 0.431640625, "learning_rate": 0.0014475949927511144, "loss": 0.1846, "step": 36514 }, { "epoch": 0.06474588622660528, "grad_norm": 1.046875, "learning_rate": 0.001447540091508091, "loss": 0.2396, "step": 36516 }, { "epoch": 0.0647494323919151, "grad_norm": 1.03125, "learning_rate": 0.0014474851887451292, "loss": 0.1649, "step": 36518 }, { "epoch": 0.06475297855722491, "grad_norm": 1.3125, "learning_rate": 0.0014474302844624692, "loss": 0.3179, "step": 36520 }, { "epoch": 0.06475652472253472, "grad_norm": 0.291015625, "learning_rate": 0.0014473753786603508, "loss": 0.1754, "step": 36522 }, { "epoch": 0.06476007088784454, "grad_norm": 0.90625, "learning_rate": 0.0014473204713390147, "loss": 0.1738, "step": 36524 }, { "epoch": 0.06476361705315435, "grad_norm": 0.330078125, "learning_rate": 0.0014472655624987003, "loss": 0.2214, "step": 36526 }, { "epoch": 0.06476716321846418, "grad_norm": 0.2109375, "learning_rate": 0.001447210652139648, "loss": 0.1667, "step": 36528 }, { "epoch": 0.064770709383774, "grad_norm": 0.263671875, "learning_rate": 0.0014471557402620978, "loss": 0.2205, "step": 36530 }, { "epoch": 0.06477425554908381, "grad_norm": 0.26953125, "learning_rate": 0.0014471008268662907, "loss": 0.2237, "step": 36532 }, { "epoch": 0.06477780171439362, "grad_norm": 0.42578125, "learning_rate": 0.0014470459119524655, "loss": 0.193, "step": 36534 }, { "epoch": 0.06478134787970344, "grad_norm": 0.32421875, "learning_rate": 0.0014469909955208632, "loss": 0.223, "step": 36536 }, { "epoch": 0.06478489404501325, "grad_norm": 0.34765625, "learning_rate": 0.0014469360775717242, "loss": 0.1715, "step": 36538 }, { "epoch": 0.06478844021032307, "grad_norm": 0.63671875, "learning_rate": 0.001446881158105288, "loss": 0.2115, "step": 36540 }, { "epoch": 0.06479198637563288, "grad_norm": 1.3359375, "learning_rate": 0.0014468262371217953, "loss": 0.1433, "step": 36542 }, { "epoch": 0.0647955325409427, "grad_norm": 0.30078125, "learning_rate": 0.0014467713146214863, "loss": 0.1917, "step": 36544 }, { "epoch": 0.06479907870625251, "grad_norm": 1.890625, "learning_rate": 0.0014467163906046005, "loss": 0.3532, "step": 36546 }, { "epoch": 0.06480262487156233, "grad_norm": 0.40625, "learning_rate": 0.001446661465071379, "loss": 0.2, "step": 36548 }, { "epoch": 0.06480617103687214, "grad_norm": 0.34375, "learning_rate": 0.0014466065380220614, "loss": 0.1925, "step": 36550 }, { "epoch": 0.06480971720218195, "grad_norm": 0.7578125, "learning_rate": 0.0014465516094568883, "loss": 0.1865, "step": 36552 }, { "epoch": 0.06481326336749177, "grad_norm": 0.404296875, "learning_rate": 0.0014464966793760995, "loss": 0.1973, "step": 36554 }, { "epoch": 0.06481680953280158, "grad_norm": 0.33984375, "learning_rate": 0.0014464417477799355, "loss": 0.2452, "step": 36556 }, { "epoch": 0.0648203556981114, "grad_norm": 0.5234375, "learning_rate": 0.0014463868146686368, "loss": 0.1677, "step": 36558 }, { "epoch": 0.06482390186342121, "grad_norm": 0.271484375, "learning_rate": 0.0014463318800424431, "loss": 0.1846, "step": 36560 }, { "epoch": 0.06482744802873103, "grad_norm": 0.400390625, "learning_rate": 0.0014462769439015952, "loss": 0.1833, "step": 36562 }, { "epoch": 0.06483099419404084, "grad_norm": 0.408203125, "learning_rate": 0.0014462220062463331, "loss": 0.2178, "step": 36564 }, { "epoch": 0.06483454035935066, "grad_norm": 0.51953125, "learning_rate": 0.001446167067076897, "loss": 0.248, "step": 36566 }, { "epoch": 0.06483808652466047, "grad_norm": 0.49609375, "learning_rate": 0.001446112126393527, "loss": 0.1366, "step": 36568 }, { "epoch": 0.06484163268997029, "grad_norm": 0.37890625, "learning_rate": 0.0014460571841964638, "loss": 0.2334, "step": 36570 }, { "epoch": 0.06484517885528011, "grad_norm": 0.337890625, "learning_rate": 0.0014460022404859474, "loss": 0.197, "step": 36572 }, { "epoch": 0.06484872502058993, "grad_norm": 0.2177734375, "learning_rate": 0.001445947295262218, "loss": 0.1325, "step": 36574 }, { "epoch": 0.06485227118589974, "grad_norm": 0.314453125, "learning_rate": 0.0014458923485255167, "loss": 0.2116, "step": 36576 }, { "epoch": 0.06485581735120956, "grad_norm": 1.421875, "learning_rate": 0.0014458374002760826, "loss": 0.2492, "step": 36578 }, { "epoch": 0.06485936351651937, "grad_norm": 0.79296875, "learning_rate": 0.0014457824505141569, "loss": 0.2067, "step": 36580 }, { "epoch": 0.06486290968182919, "grad_norm": 0.35546875, "learning_rate": 0.0014457274992399796, "loss": 0.205, "step": 36582 }, { "epoch": 0.064866455847139, "grad_norm": 0.72265625, "learning_rate": 0.001445672546453791, "loss": 0.4659, "step": 36584 }, { "epoch": 0.06487000201244882, "grad_norm": 1.65625, "learning_rate": 0.0014456175921558313, "loss": 0.2308, "step": 36586 }, { "epoch": 0.06487354817775863, "grad_norm": 1.46875, "learning_rate": 0.001445562636346341, "loss": 0.2502, "step": 36588 }, { "epoch": 0.06487709434306844, "grad_norm": 0.78125, "learning_rate": 0.0014455076790255608, "loss": 0.1896, "step": 36590 }, { "epoch": 0.06488064050837826, "grad_norm": 1.2890625, "learning_rate": 0.0014454527201937305, "loss": 0.2547, "step": 36592 }, { "epoch": 0.06488418667368807, "grad_norm": 7.84375, "learning_rate": 0.0014453977598510904, "loss": 0.2256, "step": 36594 }, { "epoch": 0.06488773283899789, "grad_norm": 0.734375, "learning_rate": 0.0014453427979978816, "loss": 0.2695, "step": 36596 }, { "epoch": 0.0648912790043077, "grad_norm": 4.5, "learning_rate": 0.0014452878346343434, "loss": 0.3166, "step": 36598 }, { "epoch": 0.06489482516961752, "grad_norm": 0.53125, "learning_rate": 0.0014452328697607173, "loss": 0.3663, "step": 36600 }, { "epoch": 0.06489837133492733, "grad_norm": 0.57421875, "learning_rate": 0.0014451779033772434, "loss": 0.1823, "step": 36602 }, { "epoch": 0.06490191750023715, "grad_norm": 0.43359375, "learning_rate": 0.0014451229354841613, "loss": 0.1404, "step": 36604 }, { "epoch": 0.06490546366554696, "grad_norm": 0.365234375, "learning_rate": 0.0014450679660817122, "loss": 0.16, "step": 36606 }, { "epoch": 0.06490900983085678, "grad_norm": 0.18359375, "learning_rate": 0.001445012995170136, "loss": 0.1443, "step": 36608 }, { "epoch": 0.06491255599616659, "grad_norm": 2.9375, "learning_rate": 0.0014449580227496738, "loss": 0.4874, "step": 36610 }, { "epoch": 0.0649161021614764, "grad_norm": 0.62890625, "learning_rate": 0.0014449030488205652, "loss": 0.1897, "step": 36612 }, { "epoch": 0.06491964832678622, "grad_norm": 0.671875, "learning_rate": 0.001444848073383051, "loss": 0.1926, "step": 36614 }, { "epoch": 0.06492319449209603, "grad_norm": 0.404296875, "learning_rate": 0.0014447930964373717, "loss": 0.2293, "step": 36616 }, { "epoch": 0.06492674065740586, "grad_norm": 0.486328125, "learning_rate": 0.0014447381179837678, "loss": 0.254, "step": 36618 }, { "epoch": 0.06493028682271568, "grad_norm": 0.77734375, "learning_rate": 0.00144468313802248, "loss": 0.1651, "step": 36620 }, { "epoch": 0.06493383298802549, "grad_norm": 0.67578125, "learning_rate": 0.001444628156553748, "loss": 0.2224, "step": 36622 }, { "epoch": 0.0649373791533353, "grad_norm": 0.1806640625, "learning_rate": 0.0014445731735778123, "loss": 0.1942, "step": 36624 }, { "epoch": 0.06494092531864512, "grad_norm": 0.443359375, "learning_rate": 0.001444518189094914, "loss": 0.2067, "step": 36626 }, { "epoch": 0.06494447148395494, "grad_norm": 0.451171875, "learning_rate": 0.0014444632031052932, "loss": 0.1854, "step": 36628 }, { "epoch": 0.06494801764926475, "grad_norm": 0.33203125, "learning_rate": 0.0014444082156091908, "loss": 0.1836, "step": 36630 }, { "epoch": 0.06495156381457456, "grad_norm": 0.400390625, "learning_rate": 0.0014443532266068461, "loss": 0.2847, "step": 36632 }, { "epoch": 0.06495510997988438, "grad_norm": 0.859375, "learning_rate": 0.001444298236098501, "loss": 0.3273, "step": 36634 }, { "epoch": 0.0649586561451942, "grad_norm": 0.384765625, "learning_rate": 0.0014442432440843955, "loss": 0.1722, "step": 36636 }, { "epoch": 0.06496220231050401, "grad_norm": 0.5, "learning_rate": 0.0014441882505647698, "loss": 0.2176, "step": 36638 }, { "epoch": 0.06496574847581382, "grad_norm": 0.5, "learning_rate": 0.0014441332555398644, "loss": 0.212, "step": 36640 }, { "epoch": 0.06496929464112364, "grad_norm": 0.4296875, "learning_rate": 0.0014440782590099204, "loss": 0.2109, "step": 36642 }, { "epoch": 0.06497284080643345, "grad_norm": 0.310546875, "learning_rate": 0.0014440232609751777, "loss": 0.2014, "step": 36644 }, { "epoch": 0.06497638697174327, "grad_norm": 0.890625, "learning_rate": 0.0014439682614358774, "loss": 0.2327, "step": 36646 }, { "epoch": 0.06497993313705308, "grad_norm": 0.46875, "learning_rate": 0.0014439132603922594, "loss": 0.2039, "step": 36648 }, { "epoch": 0.0649834793023629, "grad_norm": 0.337890625, "learning_rate": 0.0014438582578445648, "loss": 0.3073, "step": 36650 }, { "epoch": 0.06498702546767271, "grad_norm": 0.5546875, "learning_rate": 0.001443803253793034, "loss": 0.1985, "step": 36652 }, { "epoch": 0.06499057163298252, "grad_norm": 0.455078125, "learning_rate": 0.0014437482482379071, "loss": 0.2182, "step": 36654 }, { "epoch": 0.06499411779829234, "grad_norm": 0.470703125, "learning_rate": 0.0014436932411794254, "loss": 0.2987, "step": 36656 }, { "epoch": 0.06499766396360215, "grad_norm": 0.44921875, "learning_rate": 0.001443638232617829, "loss": 0.1842, "step": 36658 }, { "epoch": 0.06500121012891197, "grad_norm": 0.376953125, "learning_rate": 0.0014435832225533586, "loss": 0.1771, "step": 36660 }, { "epoch": 0.06500475629422178, "grad_norm": 0.431640625, "learning_rate": 0.0014435282109862546, "loss": 0.2434, "step": 36662 }, { "epoch": 0.06500830245953161, "grad_norm": 0.470703125, "learning_rate": 0.0014434731979167577, "loss": 0.1737, "step": 36664 }, { "epoch": 0.06501184862484143, "grad_norm": 0.333984375, "learning_rate": 0.0014434181833451088, "loss": 0.1519, "step": 36666 }, { "epoch": 0.06501539479015124, "grad_norm": 0.61328125, "learning_rate": 0.0014433631672715482, "loss": 0.1741, "step": 36668 }, { "epoch": 0.06501894095546105, "grad_norm": 0.38671875, "learning_rate": 0.0014433081496963165, "loss": 0.4655, "step": 36670 }, { "epoch": 0.06502248712077087, "grad_norm": 0.66015625, "learning_rate": 0.0014432531306196542, "loss": 0.1809, "step": 36672 }, { "epoch": 0.06502603328608068, "grad_norm": 0.443359375, "learning_rate": 0.0014431981100418024, "loss": 0.1371, "step": 36674 }, { "epoch": 0.0650295794513905, "grad_norm": 0.2421875, "learning_rate": 0.0014431430879630013, "loss": 0.3104, "step": 36676 }, { "epoch": 0.06503312561670031, "grad_norm": 0.203125, "learning_rate": 0.0014430880643834915, "loss": 0.1735, "step": 36678 }, { "epoch": 0.06503667178201013, "grad_norm": 0.703125, "learning_rate": 0.001443033039303514, "loss": 0.1618, "step": 36680 }, { "epoch": 0.06504021794731994, "grad_norm": 0.3515625, "learning_rate": 0.001442978012723309, "loss": 0.1513, "step": 36682 }, { "epoch": 0.06504376411262976, "grad_norm": 0.53125, "learning_rate": 0.0014429229846431176, "loss": 0.1749, "step": 36684 }, { "epoch": 0.06504731027793957, "grad_norm": 0.37109375, "learning_rate": 0.0014428679550631803, "loss": 0.2065, "step": 36686 }, { "epoch": 0.06505085644324939, "grad_norm": 1.3828125, "learning_rate": 0.0014428129239837375, "loss": 0.2029, "step": 36688 }, { "epoch": 0.0650544026085592, "grad_norm": 0.8125, "learning_rate": 0.0014427578914050303, "loss": 0.2002, "step": 36690 }, { "epoch": 0.06505794877386901, "grad_norm": 2.140625, "learning_rate": 0.0014427028573272991, "loss": 0.1988, "step": 36692 }, { "epoch": 0.06506149493917883, "grad_norm": 0.50390625, "learning_rate": 0.0014426478217507848, "loss": 0.1601, "step": 36694 }, { "epoch": 0.06506504110448864, "grad_norm": 0.5078125, "learning_rate": 0.001442592784675728, "loss": 0.1962, "step": 36696 }, { "epoch": 0.06506858726979846, "grad_norm": 0.9140625, "learning_rate": 0.001442537746102369, "loss": 0.1833, "step": 36698 }, { "epoch": 0.06507213343510827, "grad_norm": 0.63671875, "learning_rate": 0.0014424827060309491, "loss": 0.2309, "step": 36700 }, { "epoch": 0.06507567960041809, "grad_norm": 0.8125, "learning_rate": 0.0014424276644617088, "loss": 0.2105, "step": 36702 }, { "epoch": 0.0650792257657279, "grad_norm": 0.96875, "learning_rate": 0.0014423726213948886, "loss": 0.2095, "step": 36704 }, { "epoch": 0.06508277193103772, "grad_norm": 0.349609375, "learning_rate": 0.0014423175768307295, "loss": 0.2679, "step": 36706 }, { "epoch": 0.06508631809634755, "grad_norm": 0.828125, "learning_rate": 0.0014422625307694724, "loss": 0.2106, "step": 36708 }, { "epoch": 0.06508986426165736, "grad_norm": 0.427734375, "learning_rate": 0.0014422074832113577, "loss": 0.1973, "step": 36710 }, { "epoch": 0.06509341042696717, "grad_norm": 0.3359375, "learning_rate": 0.0014421524341566262, "loss": 0.1878, "step": 36712 }, { "epoch": 0.06509695659227699, "grad_norm": 0.53125, "learning_rate": 0.0014420973836055187, "loss": 0.1534, "step": 36714 }, { "epoch": 0.0651005027575868, "grad_norm": 0.35546875, "learning_rate": 0.001442042331558276, "loss": 0.1745, "step": 36716 }, { "epoch": 0.06510404892289662, "grad_norm": 0.4296875, "learning_rate": 0.001441987278015139, "loss": 0.146, "step": 36718 }, { "epoch": 0.06510759508820643, "grad_norm": 0.56640625, "learning_rate": 0.0014419322229763478, "loss": 0.1967, "step": 36720 }, { "epoch": 0.06511114125351625, "grad_norm": 0.56640625, "learning_rate": 0.001441877166442144, "loss": 0.1887, "step": 36722 }, { "epoch": 0.06511468741882606, "grad_norm": 0.296875, "learning_rate": 0.0014418221084127679, "loss": 0.1995, "step": 36724 }, { "epoch": 0.06511823358413588, "grad_norm": 0.3984375, "learning_rate": 0.0014417670488884604, "loss": 0.1698, "step": 36726 }, { "epoch": 0.06512177974944569, "grad_norm": 0.73046875, "learning_rate": 0.0014417119878694627, "loss": 0.1583, "step": 36728 }, { "epoch": 0.0651253259147555, "grad_norm": 1.28125, "learning_rate": 0.001441656925356015, "loss": 0.2448, "step": 36730 }, { "epoch": 0.06512887208006532, "grad_norm": 0.6875, "learning_rate": 0.0014416018613483588, "loss": 0.2002, "step": 36732 }, { "epoch": 0.06513241824537513, "grad_norm": 0.240234375, "learning_rate": 0.001441546795846734, "loss": 0.1711, "step": 36734 }, { "epoch": 0.06513596441068495, "grad_norm": 0.28125, "learning_rate": 0.001441491728851382, "loss": 0.1842, "step": 36736 }, { "epoch": 0.06513951057599476, "grad_norm": 1.109375, "learning_rate": 0.0014414366603625434, "loss": 0.4712, "step": 36738 }, { "epoch": 0.06514305674130458, "grad_norm": 0.49609375, "learning_rate": 0.0014413815903804596, "loss": 0.1786, "step": 36740 }, { "epoch": 0.06514660290661439, "grad_norm": 0.23828125, "learning_rate": 0.0014413265189053706, "loss": 0.1591, "step": 36742 }, { "epoch": 0.06515014907192421, "grad_norm": 0.36328125, "learning_rate": 0.0014412714459375179, "loss": 0.2035, "step": 36744 }, { "epoch": 0.06515369523723402, "grad_norm": 0.28515625, "learning_rate": 0.0014412163714771417, "loss": 0.2669, "step": 36746 }, { "epoch": 0.06515724140254384, "grad_norm": 0.44140625, "learning_rate": 0.0014411612955244838, "loss": 0.2392, "step": 36748 }, { "epoch": 0.06516078756785365, "grad_norm": 0.369140625, "learning_rate": 0.0014411062180797846, "loss": 0.2053, "step": 36750 }, { "epoch": 0.06516433373316347, "grad_norm": 0.71484375, "learning_rate": 0.001441051139143285, "loss": 0.2135, "step": 36752 }, { "epoch": 0.0651678798984733, "grad_norm": 0.8828125, "learning_rate": 0.0014409960587152253, "loss": 0.1991, "step": 36754 }, { "epoch": 0.06517142606378311, "grad_norm": 0.5, "learning_rate": 0.0014409409767958475, "loss": 0.2116, "step": 36756 }, { "epoch": 0.06517497222909292, "grad_norm": 0.70703125, "learning_rate": 0.0014408858933853913, "loss": 0.1963, "step": 36758 }, { "epoch": 0.06517851839440274, "grad_norm": 4.75, "learning_rate": 0.0014408308084840986, "loss": 0.2669, "step": 36760 }, { "epoch": 0.06518206455971255, "grad_norm": 0.859375, "learning_rate": 0.0014407757220922099, "loss": 0.1617, "step": 36762 }, { "epoch": 0.06518561072502237, "grad_norm": 0.345703125, "learning_rate": 0.0014407206342099662, "loss": 0.1233, "step": 36764 }, { "epoch": 0.06518915689033218, "grad_norm": 2.421875, "learning_rate": 0.001440665544837608, "loss": 0.2675, "step": 36766 }, { "epoch": 0.065192703055642, "grad_norm": 1.0234375, "learning_rate": 0.0014406104539753768, "loss": 0.2098, "step": 36768 }, { "epoch": 0.06519624922095181, "grad_norm": 0.24609375, "learning_rate": 0.0014405553616235137, "loss": 0.1872, "step": 36770 }, { "epoch": 0.06519979538626162, "grad_norm": 1.34375, "learning_rate": 0.0014405002677822588, "loss": 0.2578, "step": 36772 }, { "epoch": 0.06520334155157144, "grad_norm": 0.7734375, "learning_rate": 0.0014404451724518534, "loss": 0.1846, "step": 36774 }, { "epoch": 0.06520688771688125, "grad_norm": 0.46484375, "learning_rate": 0.0014403900756325388, "loss": 0.2092, "step": 36776 }, { "epoch": 0.06521043388219107, "grad_norm": 0.6640625, "learning_rate": 0.001440334977324556, "loss": 0.4931, "step": 36778 }, { "epoch": 0.06521398004750088, "grad_norm": 1.296875, "learning_rate": 0.0014402798775281452, "loss": 0.3459, "step": 36780 }, { "epoch": 0.0652175262128107, "grad_norm": 1.328125, "learning_rate": 0.001440224776243548, "loss": 0.2499, "step": 36782 }, { "epoch": 0.06522107237812051, "grad_norm": 0.37890625, "learning_rate": 0.0014401696734710054, "loss": 0.151, "step": 36784 }, { "epoch": 0.06522461854343033, "grad_norm": 1.09375, "learning_rate": 0.0014401145692107583, "loss": 0.3122, "step": 36786 }, { "epoch": 0.06522816470874014, "grad_norm": 0.6796875, "learning_rate": 0.0014400594634630475, "loss": 0.2105, "step": 36788 }, { "epoch": 0.06523171087404996, "grad_norm": 0.43359375, "learning_rate": 0.0014400043562281142, "loss": 0.1764, "step": 36790 }, { "epoch": 0.06523525703935977, "grad_norm": 0.224609375, "learning_rate": 0.0014399492475061994, "loss": 0.1613, "step": 36792 }, { "epoch": 0.06523880320466958, "grad_norm": 0.3359375, "learning_rate": 0.001439894137297544, "loss": 0.1534, "step": 36794 }, { "epoch": 0.0652423493699794, "grad_norm": 8.3125, "learning_rate": 0.001439839025602389, "loss": 0.3257, "step": 36796 }, { "epoch": 0.06524589553528921, "grad_norm": 0.287109375, "learning_rate": 0.0014397839124209754, "loss": 0.1682, "step": 36798 }, { "epoch": 0.06524944170059904, "grad_norm": 0.61328125, "learning_rate": 0.0014397287977535448, "loss": 0.1645, "step": 36800 }, { "epoch": 0.06525298786590886, "grad_norm": 0.4453125, "learning_rate": 0.0014396736816003374, "loss": 0.1693, "step": 36802 }, { "epoch": 0.06525653403121867, "grad_norm": 0.388671875, "learning_rate": 0.0014396185639615946, "loss": 0.1807, "step": 36804 }, { "epoch": 0.06526008019652849, "grad_norm": 0.52734375, "learning_rate": 0.0014395634448375577, "loss": 0.2584, "step": 36806 }, { "epoch": 0.0652636263618383, "grad_norm": 0.703125, "learning_rate": 0.0014395083242284674, "loss": 0.3236, "step": 36808 }, { "epoch": 0.06526717252714812, "grad_norm": 1.203125, "learning_rate": 0.0014394532021345652, "loss": 0.1885, "step": 36810 }, { "epoch": 0.06527071869245793, "grad_norm": 0.5390625, "learning_rate": 0.0014393980785560915, "loss": 0.2197, "step": 36812 }, { "epoch": 0.06527426485776774, "grad_norm": 0.451171875, "learning_rate": 0.0014393429534932875, "loss": 0.1711, "step": 36814 }, { "epoch": 0.06527781102307756, "grad_norm": 0.8671875, "learning_rate": 0.0014392878269463949, "loss": 0.1565, "step": 36816 }, { "epoch": 0.06528135718838737, "grad_norm": 0.65625, "learning_rate": 0.0014392326989156547, "loss": 0.1561, "step": 36818 }, { "epoch": 0.06528490335369719, "grad_norm": 0.2021484375, "learning_rate": 0.0014391775694013072, "loss": 0.4676, "step": 36820 }, { "epoch": 0.065288449519007, "grad_norm": 0.447265625, "learning_rate": 0.0014391224384035944, "loss": 0.2118, "step": 36822 }, { "epoch": 0.06529199568431682, "grad_norm": 2.140625, "learning_rate": 0.0014390673059227568, "loss": 0.2482, "step": 36824 }, { "epoch": 0.06529554184962663, "grad_norm": 0.3046875, "learning_rate": 0.001439012171959036, "loss": 0.1639, "step": 36826 }, { "epoch": 0.06529908801493645, "grad_norm": 0.5234375, "learning_rate": 0.0014389570365126725, "loss": 0.2031, "step": 36828 }, { "epoch": 0.06530263418024626, "grad_norm": 0.298828125, "learning_rate": 0.0014389018995839083, "loss": 0.1613, "step": 36830 }, { "epoch": 0.06530618034555608, "grad_norm": 0.76171875, "learning_rate": 0.0014388467611729835, "loss": 0.1766, "step": 36832 }, { "epoch": 0.06530972651086589, "grad_norm": 0.765625, "learning_rate": 0.0014387916212801402, "loss": 0.1868, "step": 36834 }, { "epoch": 0.0653132726761757, "grad_norm": 0.142578125, "learning_rate": 0.001438736479905619, "loss": 0.1627, "step": 36836 }, { "epoch": 0.06531681884148552, "grad_norm": 0.255859375, "learning_rate": 0.0014386813370496612, "loss": 0.2737, "step": 36838 }, { "epoch": 0.06532036500679533, "grad_norm": 0.57421875, "learning_rate": 0.0014386261927125078, "loss": 0.225, "step": 36840 }, { "epoch": 0.06532391117210515, "grad_norm": 0.56640625, "learning_rate": 0.0014385710468944005, "loss": 0.2013, "step": 36842 }, { "epoch": 0.06532745733741498, "grad_norm": 1.296875, "learning_rate": 0.00143851589959558, "loss": 0.2708, "step": 36844 }, { "epoch": 0.06533100350272479, "grad_norm": 1.8515625, "learning_rate": 0.0014384607508162875, "loss": 0.2835, "step": 36846 }, { "epoch": 0.0653345496680346, "grad_norm": 0.31640625, "learning_rate": 0.0014384056005567643, "loss": 0.178, "step": 36848 }, { "epoch": 0.06533809583334442, "grad_norm": 0.310546875, "learning_rate": 0.0014383504488172515, "loss": 0.1696, "step": 36850 }, { "epoch": 0.06534164199865423, "grad_norm": 0.69140625, "learning_rate": 0.0014382952955979901, "loss": 0.214, "step": 36852 }, { "epoch": 0.06534518816396405, "grad_norm": 0.47265625, "learning_rate": 0.0014382401408992221, "loss": 0.2086, "step": 36854 }, { "epoch": 0.06534873432927386, "grad_norm": 0.359375, "learning_rate": 0.0014381849847211882, "loss": 0.3033, "step": 36856 }, { "epoch": 0.06535228049458368, "grad_norm": 0.43359375, "learning_rate": 0.0014381298270641292, "loss": 0.2393, "step": 36858 }, { "epoch": 0.06535582665989349, "grad_norm": 0.61328125, "learning_rate": 0.001438074667928287, "loss": 0.4743, "step": 36860 }, { "epoch": 0.06535937282520331, "grad_norm": 1.59375, "learning_rate": 0.0014380195073139027, "loss": 0.2464, "step": 36862 }, { "epoch": 0.06536291899051312, "grad_norm": 0.515625, "learning_rate": 0.0014379643452212172, "loss": 0.2763, "step": 36864 }, { "epoch": 0.06536646515582294, "grad_norm": 0.359375, "learning_rate": 0.0014379091816504718, "loss": 0.1868, "step": 36866 }, { "epoch": 0.06537001132113275, "grad_norm": 0.59765625, "learning_rate": 0.0014378540166019083, "loss": 0.3379, "step": 36868 }, { "epoch": 0.06537355748644257, "grad_norm": 0.2216796875, "learning_rate": 0.0014377988500757675, "loss": 0.17, "step": 36870 }, { "epoch": 0.06537710365175238, "grad_norm": 1.3046875, "learning_rate": 0.0014377436820722906, "loss": 0.2509, "step": 36872 }, { "epoch": 0.0653806498170622, "grad_norm": 0.361328125, "learning_rate": 0.0014376885125917192, "loss": 0.1757, "step": 36874 }, { "epoch": 0.06538419598237201, "grad_norm": 0.62890625, "learning_rate": 0.0014376333416342944, "loss": 0.1473, "step": 36876 }, { "epoch": 0.06538774214768182, "grad_norm": 0.33203125, "learning_rate": 0.0014375781692002572, "loss": 0.1444, "step": 36878 }, { "epoch": 0.06539128831299164, "grad_norm": 0.58203125, "learning_rate": 0.0014375229952898493, "loss": 0.2071, "step": 36880 }, { "epoch": 0.06539483447830145, "grad_norm": 1.1796875, "learning_rate": 0.0014374678199033121, "loss": 0.1963, "step": 36882 }, { "epoch": 0.06539838064361127, "grad_norm": 0.462890625, "learning_rate": 0.0014374126430408867, "loss": 0.2466, "step": 36884 }, { "epoch": 0.06540192680892108, "grad_norm": 0.48046875, "learning_rate": 0.001437357464702814, "loss": 0.1869, "step": 36886 }, { "epoch": 0.0654054729742309, "grad_norm": 0.490234375, "learning_rate": 0.001437302284889336, "loss": 0.1961, "step": 36888 }, { "epoch": 0.06540901913954072, "grad_norm": 0.78515625, "learning_rate": 0.0014372471036006936, "loss": 0.2019, "step": 36890 }, { "epoch": 0.06541256530485054, "grad_norm": 1.78125, "learning_rate": 0.0014371919208371285, "loss": 0.2876, "step": 36892 }, { "epoch": 0.06541611147016035, "grad_norm": 0.6875, "learning_rate": 0.0014371367365988814, "loss": 0.3217, "step": 36894 }, { "epoch": 0.06541965763547017, "grad_norm": 0.283203125, "learning_rate": 0.0014370815508861943, "loss": 0.2015, "step": 36896 }, { "epoch": 0.06542320380077998, "grad_norm": 0.2412109375, "learning_rate": 0.0014370263636993084, "loss": 0.1598, "step": 36898 }, { "epoch": 0.0654267499660898, "grad_norm": 0.78515625, "learning_rate": 0.0014369711750384648, "loss": 0.219, "step": 36900 }, { "epoch": 0.06543029613139961, "grad_norm": 1.328125, "learning_rate": 0.0014369159849039052, "loss": 0.2115, "step": 36902 }, { "epoch": 0.06543384229670943, "grad_norm": 0.515625, "learning_rate": 0.0014368607932958707, "loss": 0.1687, "step": 36904 }, { "epoch": 0.06543738846201924, "grad_norm": 0.490234375, "learning_rate": 0.0014368056002146024, "loss": 0.2331, "step": 36906 }, { "epoch": 0.06544093462732906, "grad_norm": 0.31640625, "learning_rate": 0.0014367504056603424, "loss": 0.2033, "step": 36908 }, { "epoch": 0.06544448079263887, "grad_norm": 0.8125, "learning_rate": 0.0014366952096333317, "loss": 0.1894, "step": 36910 }, { "epoch": 0.06544802695794869, "grad_norm": 0.267578125, "learning_rate": 0.0014366400121338116, "loss": 0.166, "step": 36912 }, { "epoch": 0.0654515731232585, "grad_norm": 0.427734375, "learning_rate": 0.001436584813162024, "loss": 0.1709, "step": 36914 }, { "epoch": 0.06545511928856831, "grad_norm": 0.3046875, "learning_rate": 0.0014365296127182095, "loss": 0.2494, "step": 36916 }, { "epoch": 0.06545866545387813, "grad_norm": 0.287109375, "learning_rate": 0.0014364744108026101, "loss": 0.1956, "step": 36918 }, { "epoch": 0.06546221161918794, "grad_norm": 0.2294921875, "learning_rate": 0.0014364192074154674, "loss": 0.1928, "step": 36920 }, { "epoch": 0.06546575778449776, "grad_norm": 1.1796875, "learning_rate": 0.001436364002557022, "loss": 0.2667, "step": 36922 }, { "epoch": 0.06546930394980757, "grad_norm": 0.451171875, "learning_rate": 0.001436308796227516, "loss": 0.1691, "step": 36924 }, { "epoch": 0.06547285011511739, "grad_norm": 1.5546875, "learning_rate": 0.0014362535884271906, "loss": 0.2395, "step": 36926 }, { "epoch": 0.0654763962804272, "grad_norm": 0.71875, "learning_rate": 0.0014361983791562874, "loss": 0.1559, "step": 36928 }, { "epoch": 0.06547994244573702, "grad_norm": 0.236328125, "learning_rate": 0.001436143168415048, "loss": 0.2033, "step": 36930 }, { "epoch": 0.06548348861104683, "grad_norm": 0.2470703125, "learning_rate": 0.0014360879562037134, "loss": 0.4137, "step": 36932 }, { "epoch": 0.06548703477635665, "grad_norm": 0.8984375, "learning_rate": 0.0014360327425225253, "loss": 0.2932, "step": 36934 }, { "epoch": 0.06549058094166647, "grad_norm": 0.63671875, "learning_rate": 0.001435977527371725, "loss": 0.1701, "step": 36936 }, { "epoch": 0.06549412710697629, "grad_norm": 0.1796875, "learning_rate": 0.0014359223107515548, "loss": 0.1234, "step": 36938 }, { "epoch": 0.0654976732722861, "grad_norm": 0.427734375, "learning_rate": 0.0014358670926622548, "loss": 0.1607, "step": 36940 }, { "epoch": 0.06550121943759592, "grad_norm": 0.390625, "learning_rate": 0.0014358118731040677, "loss": 0.198, "step": 36942 }, { "epoch": 0.06550476560290573, "grad_norm": 1.484375, "learning_rate": 0.0014357566520772343, "loss": 0.4045, "step": 36944 }, { "epoch": 0.06550831176821555, "grad_norm": 0.71484375, "learning_rate": 0.0014357014295819964, "loss": 0.2255, "step": 36946 }, { "epoch": 0.06551185793352536, "grad_norm": 0.6171875, "learning_rate": 0.0014356462056185956, "loss": 0.2362, "step": 36948 }, { "epoch": 0.06551540409883518, "grad_norm": 0.2021484375, "learning_rate": 0.001435590980187273, "loss": 0.162, "step": 36950 }, { "epoch": 0.06551895026414499, "grad_norm": 0.47265625, "learning_rate": 0.0014355357532882705, "loss": 0.1692, "step": 36952 }, { "epoch": 0.0655224964294548, "grad_norm": 1.765625, "learning_rate": 0.0014354805249218296, "loss": 0.2169, "step": 36954 }, { "epoch": 0.06552604259476462, "grad_norm": 0.404296875, "learning_rate": 0.0014354252950881917, "loss": 0.1652, "step": 36956 }, { "epoch": 0.06552958876007443, "grad_norm": 0.404296875, "learning_rate": 0.0014353700637875983, "loss": 0.1906, "step": 36958 }, { "epoch": 0.06553313492538425, "grad_norm": 0.6484375, "learning_rate": 0.001435314831020291, "loss": 0.1449, "step": 36960 }, { "epoch": 0.06553668109069406, "grad_norm": 0.275390625, "learning_rate": 0.0014352595967865116, "loss": 0.1546, "step": 36962 }, { "epoch": 0.06554022725600388, "grad_norm": 0.40234375, "learning_rate": 0.0014352043610865014, "loss": 0.1678, "step": 36964 }, { "epoch": 0.06554377342131369, "grad_norm": 0.37109375, "learning_rate": 0.001435149123920502, "loss": 0.1878, "step": 36966 }, { "epoch": 0.0655473195866235, "grad_norm": 0.375, "learning_rate": 0.001435093885288755, "loss": 0.2004, "step": 36968 }, { "epoch": 0.06555086575193332, "grad_norm": 0.126953125, "learning_rate": 0.001435038645191502, "loss": 0.1305, "step": 36970 }, { "epoch": 0.06555441191724314, "grad_norm": 0.1787109375, "learning_rate": 0.0014349834036289845, "loss": 0.1715, "step": 36972 }, { "epoch": 0.06555795808255295, "grad_norm": 0.1953125, "learning_rate": 0.0014349281606014445, "loss": 0.1893, "step": 36974 }, { "epoch": 0.06556150424786276, "grad_norm": 0.484375, "learning_rate": 0.0014348729161091233, "loss": 0.1976, "step": 36976 }, { "epoch": 0.06556505041317258, "grad_norm": 0.388671875, "learning_rate": 0.0014348176701522621, "loss": 0.1367, "step": 36978 }, { "epoch": 0.06556859657848241, "grad_norm": 0.45703125, "learning_rate": 0.001434762422731103, "loss": 0.1574, "step": 36980 }, { "epoch": 0.06557214274379222, "grad_norm": 0.5390625, "learning_rate": 0.0014347071738458877, "loss": 0.1931, "step": 36982 }, { "epoch": 0.06557568890910204, "grad_norm": 0.8046875, "learning_rate": 0.0014346519234968574, "loss": 0.1649, "step": 36984 }, { "epoch": 0.06557923507441185, "grad_norm": 1.7421875, "learning_rate": 0.001434596671684254, "loss": 0.2059, "step": 36986 }, { "epoch": 0.06558278123972167, "grad_norm": 1.3046875, "learning_rate": 0.001434541418408319, "loss": 0.1895, "step": 36988 }, { "epoch": 0.06558632740503148, "grad_norm": 0.345703125, "learning_rate": 0.0014344861636692945, "loss": 0.2404, "step": 36990 }, { "epoch": 0.0655898735703413, "grad_norm": 0.6796875, "learning_rate": 0.001434430907467422, "loss": 0.1777, "step": 36992 }, { "epoch": 0.06559341973565111, "grad_norm": 0.7265625, "learning_rate": 0.0014343756498029425, "loss": 0.2195, "step": 36994 }, { "epoch": 0.06559696590096092, "grad_norm": 0.353515625, "learning_rate": 0.0014343203906760982, "loss": 0.1507, "step": 36996 }, { "epoch": 0.06560051206627074, "grad_norm": 0.625, "learning_rate": 0.0014342651300871308, "loss": 0.3201, "step": 36998 }, { "epoch": 0.06560405823158055, "grad_norm": 0.51953125, "learning_rate": 0.0014342098680362816, "loss": 0.2513, "step": 37000 }, { "epoch": 0.06560760439689037, "grad_norm": 0.400390625, "learning_rate": 0.0014341546045237932, "loss": 0.1966, "step": 37002 }, { "epoch": 0.06561115056220018, "grad_norm": 0.349609375, "learning_rate": 0.001434099339549906, "loss": 0.2417, "step": 37004 }, { "epoch": 0.06561469672751, "grad_norm": 0.72265625, "learning_rate": 0.0014340440731148627, "loss": 0.1518, "step": 37006 }, { "epoch": 0.06561824289281981, "grad_norm": 0.76171875, "learning_rate": 0.0014339888052189045, "loss": 0.2141, "step": 37008 }, { "epoch": 0.06562178905812963, "grad_norm": 0.9453125, "learning_rate": 0.0014339335358622737, "loss": 0.1896, "step": 37010 }, { "epoch": 0.06562533522343944, "grad_norm": 0.796875, "learning_rate": 0.0014338782650452114, "loss": 0.1915, "step": 37012 }, { "epoch": 0.06562888138874926, "grad_norm": 0.94921875, "learning_rate": 0.0014338229927679592, "loss": 0.2066, "step": 37014 }, { "epoch": 0.06563242755405907, "grad_norm": 0.45703125, "learning_rate": 0.0014337677190307593, "loss": 0.269, "step": 37016 }, { "epoch": 0.06563597371936888, "grad_norm": 0.392578125, "learning_rate": 0.001433712443833853, "loss": 0.2019, "step": 37018 }, { "epoch": 0.0656395198846787, "grad_norm": 0.328125, "learning_rate": 0.0014336571671774827, "loss": 0.1379, "step": 37020 }, { "epoch": 0.06564306604998851, "grad_norm": 0.30078125, "learning_rate": 0.0014336018890618895, "loss": 0.1924, "step": 37022 }, { "epoch": 0.06564661221529833, "grad_norm": 0.46875, "learning_rate": 0.0014335466094873158, "loss": 0.2258, "step": 37024 }, { "epoch": 0.06565015838060816, "grad_norm": 0.88671875, "learning_rate": 0.0014334913284540026, "loss": 0.2199, "step": 37026 }, { "epoch": 0.06565370454591797, "grad_norm": 1.7109375, "learning_rate": 0.0014334360459621923, "loss": 0.2587, "step": 37028 }, { "epoch": 0.06565725071122779, "grad_norm": 0.349609375, "learning_rate": 0.0014333807620121265, "loss": 0.1502, "step": 37030 }, { "epoch": 0.0656607968765376, "grad_norm": 0.48828125, "learning_rate": 0.0014333254766040464, "loss": 0.2223, "step": 37032 }, { "epoch": 0.06566434304184741, "grad_norm": 0.220703125, "learning_rate": 0.0014332701897381945, "loss": 0.1599, "step": 37034 }, { "epoch": 0.06566788920715723, "grad_norm": 0.52734375, "learning_rate": 0.0014332149014148127, "loss": 0.1919, "step": 37036 }, { "epoch": 0.06567143537246704, "grad_norm": 0.76953125, "learning_rate": 0.001433159611634142, "loss": 0.1781, "step": 37038 }, { "epoch": 0.06567498153777686, "grad_norm": 0.328125, "learning_rate": 0.001433104320396425, "loss": 0.2281, "step": 37040 }, { "epoch": 0.06567852770308667, "grad_norm": 0.271484375, "learning_rate": 0.0014330490277019033, "loss": 0.1492, "step": 37042 }, { "epoch": 0.06568207386839649, "grad_norm": 0.498046875, "learning_rate": 0.0014329937335508182, "loss": 0.1522, "step": 37044 }, { "epoch": 0.0656856200337063, "grad_norm": 1.078125, "learning_rate": 0.0014329384379434124, "loss": 0.2447, "step": 37046 }, { "epoch": 0.06568916619901612, "grad_norm": 0.7265625, "learning_rate": 0.0014328831408799267, "loss": 0.1964, "step": 37048 }, { "epoch": 0.06569271236432593, "grad_norm": 0.46875, "learning_rate": 0.0014328278423606042, "loss": 0.202, "step": 37050 }, { "epoch": 0.06569625852963575, "grad_norm": 0.302734375, "learning_rate": 0.001432772542385686, "loss": 0.13, "step": 37052 }, { "epoch": 0.06569980469494556, "grad_norm": 0.26171875, "learning_rate": 0.0014327172409554132, "loss": 0.1608, "step": 37054 }, { "epoch": 0.06570335086025537, "grad_norm": 1.3125, "learning_rate": 0.0014326619380700292, "loss": 0.1521, "step": 37056 }, { "epoch": 0.06570689702556519, "grad_norm": 0.30078125, "learning_rate": 0.0014326066337297745, "loss": 0.2051, "step": 37058 }, { "epoch": 0.065710443190875, "grad_norm": 0.53515625, "learning_rate": 0.0014325513279348921, "loss": 0.1873, "step": 37060 }, { "epoch": 0.06571398935618482, "grad_norm": 0.455078125, "learning_rate": 0.001432496020685623, "loss": 0.1577, "step": 37062 }, { "epoch": 0.06571753552149463, "grad_norm": 0.3671875, "learning_rate": 0.0014324407119822098, "loss": 0.1653, "step": 37064 }, { "epoch": 0.06572108168680445, "grad_norm": 0.546875, "learning_rate": 0.0014323854018248939, "loss": 0.1857, "step": 37066 }, { "epoch": 0.06572462785211426, "grad_norm": 0.35546875, "learning_rate": 0.0014323300902139174, "loss": 0.1571, "step": 37068 }, { "epoch": 0.06572817401742408, "grad_norm": 0.255859375, "learning_rate": 0.001432274777149522, "loss": 0.158, "step": 37070 }, { "epoch": 0.0657317201827339, "grad_norm": 0.357421875, "learning_rate": 0.0014322194626319499, "loss": 0.1847, "step": 37072 }, { "epoch": 0.06573526634804372, "grad_norm": 0.54296875, "learning_rate": 0.0014321641466614424, "loss": 0.2224, "step": 37074 }, { "epoch": 0.06573881251335353, "grad_norm": 0.4375, "learning_rate": 0.0014321088292382422, "loss": 0.1258, "step": 37076 }, { "epoch": 0.06574235867866335, "grad_norm": 0.388671875, "learning_rate": 0.001432053510362591, "loss": 0.1971, "step": 37078 }, { "epoch": 0.06574590484397316, "grad_norm": 1.703125, "learning_rate": 0.0014319981900347302, "loss": 0.1584, "step": 37080 }, { "epoch": 0.06574945100928298, "grad_norm": 0.66796875, "learning_rate": 0.0014319428682549023, "loss": 0.2502, "step": 37082 }, { "epoch": 0.06575299717459279, "grad_norm": 0.65625, "learning_rate": 0.0014318875450233495, "loss": 0.2016, "step": 37084 }, { "epoch": 0.0657565433399026, "grad_norm": 0.458984375, "learning_rate": 0.0014318322203403133, "loss": 0.2383, "step": 37086 }, { "epoch": 0.06576008950521242, "grad_norm": 0.271484375, "learning_rate": 0.0014317768942060354, "loss": 0.2305, "step": 37088 }, { "epoch": 0.06576363567052224, "grad_norm": 2.5, "learning_rate": 0.001431721566620758, "loss": 0.3525, "step": 37090 }, { "epoch": 0.06576718183583205, "grad_norm": 0.84375, "learning_rate": 0.0014316662375847237, "loss": 0.2073, "step": 37092 }, { "epoch": 0.06577072800114186, "grad_norm": 0.625, "learning_rate": 0.0014316109070981733, "loss": 0.1671, "step": 37094 }, { "epoch": 0.06577427416645168, "grad_norm": 0.359375, "learning_rate": 0.0014315555751613501, "loss": 0.2918, "step": 37096 }, { "epoch": 0.0657778203317615, "grad_norm": 0.52734375, "learning_rate": 0.0014315002417744946, "loss": 0.1837, "step": 37098 }, { "epoch": 0.06578136649707131, "grad_norm": 1.46875, "learning_rate": 0.0014314449069378504, "loss": 0.4547, "step": 37100 }, { "epoch": 0.06578491266238112, "grad_norm": 0.73046875, "learning_rate": 0.001431389570651658, "loss": 0.1828, "step": 37102 }, { "epoch": 0.06578845882769094, "grad_norm": 0.216796875, "learning_rate": 0.0014313342329161606, "loss": 0.1248, "step": 37104 }, { "epoch": 0.06579200499300075, "grad_norm": 0.357421875, "learning_rate": 0.0014312788937315994, "loss": 0.1561, "step": 37106 }, { "epoch": 0.06579555115831057, "grad_norm": 0.427734375, "learning_rate": 0.001431223553098217, "loss": 0.1659, "step": 37108 }, { "epoch": 0.06579909732362038, "grad_norm": 0.3359375, "learning_rate": 0.001431168211016255, "loss": 0.1749, "step": 37110 }, { "epoch": 0.0658026434889302, "grad_norm": 1.484375, "learning_rate": 0.0014311128674859557, "loss": 0.2233, "step": 37112 }, { "epoch": 0.06580618965424001, "grad_norm": 0.36328125, "learning_rate": 0.0014310575225075612, "loss": 0.1978, "step": 37114 }, { "epoch": 0.06580973581954984, "grad_norm": 0.33203125, "learning_rate": 0.0014310021760813132, "loss": 0.1675, "step": 37116 }, { "epoch": 0.06581328198485965, "grad_norm": 0.388671875, "learning_rate": 0.001430946828207454, "loss": 0.1823, "step": 37118 }, { "epoch": 0.06581682815016947, "grad_norm": 0.39453125, "learning_rate": 0.0014308914788862255, "loss": 0.1979, "step": 37120 }, { "epoch": 0.06582037431547928, "grad_norm": 0.2177734375, "learning_rate": 0.00143083612811787, "loss": 0.186, "step": 37122 }, { "epoch": 0.0658239204807891, "grad_norm": 0.6796875, "learning_rate": 0.0014307807759026297, "loss": 0.175, "step": 37124 }, { "epoch": 0.06582746664609891, "grad_norm": 0.52734375, "learning_rate": 0.0014307254222407462, "loss": 0.1963, "step": 37126 }, { "epoch": 0.06583101281140873, "grad_norm": 1.4140625, "learning_rate": 0.0014306700671324617, "loss": 0.2441, "step": 37128 }, { "epoch": 0.06583455897671854, "grad_norm": 0.55859375, "learning_rate": 0.0014306147105780185, "loss": 0.1848, "step": 37130 }, { "epoch": 0.06583810514202836, "grad_norm": 0.2890625, "learning_rate": 0.0014305593525776583, "loss": 0.2553, "step": 37132 }, { "epoch": 0.06584165130733817, "grad_norm": 0.3984375, "learning_rate": 0.0014305039931316239, "loss": 0.2166, "step": 37134 }, { "epoch": 0.06584519747264798, "grad_norm": 0.8046875, "learning_rate": 0.001430448632240157, "loss": 0.1738, "step": 37136 }, { "epoch": 0.0658487436379578, "grad_norm": 0.78515625, "learning_rate": 0.0014303932699034996, "loss": 0.2454, "step": 37138 }, { "epoch": 0.06585228980326761, "grad_norm": 1.84375, "learning_rate": 0.0014303379061218937, "loss": 0.2256, "step": 37140 }, { "epoch": 0.06585583596857743, "grad_norm": 1.1640625, "learning_rate": 0.0014302825408955822, "loss": 0.2771, "step": 37142 }, { "epoch": 0.06585938213388724, "grad_norm": 1.0, "learning_rate": 0.0014302271742248065, "loss": 0.1846, "step": 37144 }, { "epoch": 0.06586292829919706, "grad_norm": 0.43359375, "learning_rate": 0.0014301718061098087, "loss": 0.2014, "step": 37146 }, { "epoch": 0.06586647446450687, "grad_norm": 0.421875, "learning_rate": 0.0014301164365508314, "loss": 0.1954, "step": 37148 }, { "epoch": 0.06587002062981669, "grad_norm": 0.306640625, "learning_rate": 0.001430061065548117, "loss": 0.1855, "step": 37150 }, { "epoch": 0.0658735667951265, "grad_norm": 0.33203125, "learning_rate": 0.0014300056931019066, "loss": 0.1771, "step": 37152 }, { "epoch": 0.06587711296043632, "grad_norm": 0.4765625, "learning_rate": 0.0014299503192124433, "loss": 0.2113, "step": 37154 }, { "epoch": 0.06588065912574613, "grad_norm": 0.337890625, "learning_rate": 0.0014298949438799687, "loss": 0.2012, "step": 37156 }, { "epoch": 0.06588420529105594, "grad_norm": 0.53125, "learning_rate": 0.0014298395671047256, "loss": 0.1979, "step": 37158 }, { "epoch": 0.06588775145636576, "grad_norm": 0.37890625, "learning_rate": 0.0014297841888869557, "loss": 0.159, "step": 37160 }, { "epoch": 0.06589129762167559, "grad_norm": 2.203125, "learning_rate": 0.0014297288092269013, "loss": 0.1849, "step": 37162 }, { "epoch": 0.0658948437869854, "grad_norm": 0.408203125, "learning_rate": 0.0014296734281248043, "loss": 0.2035, "step": 37164 }, { "epoch": 0.06589838995229522, "grad_norm": 0.365234375, "learning_rate": 0.0014296180455809079, "loss": 0.149, "step": 37166 }, { "epoch": 0.06590193611760503, "grad_norm": 0.38671875, "learning_rate": 0.0014295626615954528, "loss": 0.2075, "step": 37168 }, { "epoch": 0.06590548228291485, "grad_norm": 0.361328125, "learning_rate": 0.0014295072761686828, "loss": 0.1954, "step": 37170 }, { "epoch": 0.06590902844822466, "grad_norm": 0.32421875, "learning_rate": 0.001429451889300839, "loss": 0.1515, "step": 37172 }, { "epoch": 0.06591257461353447, "grad_norm": 0.62109375, "learning_rate": 0.0014293965009921639, "loss": 0.1751, "step": 37174 }, { "epoch": 0.06591612077884429, "grad_norm": 0.423828125, "learning_rate": 0.0014293411112429, "loss": 0.1332, "step": 37176 }, { "epoch": 0.0659196669441541, "grad_norm": 1.328125, "learning_rate": 0.0014292857200532893, "loss": 0.2566, "step": 37178 }, { "epoch": 0.06592321310946392, "grad_norm": 1.1875, "learning_rate": 0.0014292303274235744, "loss": 0.2818, "step": 37180 }, { "epoch": 0.06592675927477373, "grad_norm": 3.515625, "learning_rate": 0.0014291749333539972, "loss": 0.4083, "step": 37182 }, { "epoch": 0.06593030544008355, "grad_norm": 0.328125, "learning_rate": 0.0014291195378448, "loss": 0.1939, "step": 37184 }, { "epoch": 0.06593385160539336, "grad_norm": 0.58984375, "learning_rate": 0.0014290641408962249, "loss": 0.2074, "step": 37186 }, { "epoch": 0.06593739777070318, "grad_norm": 0.2158203125, "learning_rate": 0.0014290087425085146, "loss": 0.1692, "step": 37188 }, { "epoch": 0.06594094393601299, "grad_norm": 1.8671875, "learning_rate": 0.001428953342681911, "loss": 0.1932, "step": 37190 }, { "epoch": 0.0659444901013228, "grad_norm": 0.71875, "learning_rate": 0.0014288979414166568, "loss": 0.2129, "step": 37192 }, { "epoch": 0.06594803626663262, "grad_norm": 0.474609375, "learning_rate": 0.001428842538712994, "loss": 0.1735, "step": 37194 }, { "epoch": 0.06595158243194243, "grad_norm": 0.4921875, "learning_rate": 0.0014287871345711647, "loss": 0.2009, "step": 37196 }, { "epoch": 0.06595512859725225, "grad_norm": 0.30078125, "learning_rate": 0.0014287317289914116, "loss": 0.1426, "step": 37198 }, { "epoch": 0.06595867476256206, "grad_norm": 2.25, "learning_rate": 0.0014286763219739772, "loss": 0.2912, "step": 37200 }, { "epoch": 0.06596222092787188, "grad_norm": 0.6875, "learning_rate": 0.001428620913519103, "loss": 0.1292, "step": 37202 }, { "epoch": 0.0659657670931817, "grad_norm": 0.55859375, "learning_rate": 0.0014285655036270321, "loss": 0.1911, "step": 37204 }, { "epoch": 0.06596931325849151, "grad_norm": 0.56640625, "learning_rate": 0.0014285100922980063, "loss": 0.2575, "step": 37206 }, { "epoch": 0.06597285942380134, "grad_norm": 0.314453125, "learning_rate": 0.0014284546795322684, "loss": 0.2502, "step": 37208 }, { "epoch": 0.06597640558911115, "grad_norm": 0.65234375, "learning_rate": 0.0014283992653300603, "loss": 0.2582, "step": 37210 }, { "epoch": 0.06597995175442097, "grad_norm": 0.3046875, "learning_rate": 0.0014283438496916249, "loss": 0.2272, "step": 37212 }, { "epoch": 0.06598349791973078, "grad_norm": 0.515625, "learning_rate": 0.0014282884326172038, "loss": 0.1343, "step": 37214 }, { "epoch": 0.0659870440850406, "grad_norm": 0.318359375, "learning_rate": 0.00142823301410704, "loss": 0.2322, "step": 37216 }, { "epoch": 0.06599059025035041, "grad_norm": 0.99609375, "learning_rate": 0.001428177594161376, "loss": 0.1733, "step": 37218 }, { "epoch": 0.06599413641566022, "grad_norm": 1.0859375, "learning_rate": 0.0014281221727804536, "loss": 0.1542, "step": 37220 }, { "epoch": 0.06599768258097004, "grad_norm": 0.357421875, "learning_rate": 0.0014280667499645152, "loss": 0.1363, "step": 37222 }, { "epoch": 0.06600122874627985, "grad_norm": 0.494140625, "learning_rate": 0.0014280113257138037, "loss": 0.2046, "step": 37224 }, { "epoch": 0.06600477491158967, "grad_norm": 0.5390625, "learning_rate": 0.0014279559000285609, "loss": 0.1261, "step": 37226 }, { "epoch": 0.06600832107689948, "grad_norm": 0.2890625, "learning_rate": 0.0014279004729090296, "loss": 0.3132, "step": 37228 }, { "epoch": 0.0660118672422093, "grad_norm": 0.40234375, "learning_rate": 0.001427845044355452, "loss": 0.2867, "step": 37230 }, { "epoch": 0.06601541340751911, "grad_norm": 0.2236328125, "learning_rate": 0.0014277896143680708, "loss": 0.2209, "step": 37232 }, { "epoch": 0.06601895957282893, "grad_norm": 0.33984375, "learning_rate": 0.001427734182947128, "loss": 0.2246, "step": 37234 }, { "epoch": 0.06602250573813874, "grad_norm": 0.4453125, "learning_rate": 0.0014276787500928667, "loss": 0.1621, "step": 37236 }, { "epoch": 0.06602605190344855, "grad_norm": 0.75, "learning_rate": 0.0014276233158055285, "loss": 0.219, "step": 37238 }, { "epoch": 0.06602959806875837, "grad_norm": 0.9140625, "learning_rate": 0.0014275678800853564, "loss": 0.3027, "step": 37240 }, { "epoch": 0.06603314423406818, "grad_norm": 0.408203125, "learning_rate": 0.0014275124429325924, "loss": 0.1264, "step": 37242 }, { "epoch": 0.066036690399378, "grad_norm": 0.421875, "learning_rate": 0.0014274570043474797, "loss": 0.2091, "step": 37244 }, { "epoch": 0.06604023656468781, "grad_norm": 2.296875, "learning_rate": 0.00142740156433026, "loss": 0.2309, "step": 37246 }, { "epoch": 0.06604378272999763, "grad_norm": 1.265625, "learning_rate": 0.0014273461228811758, "loss": 0.4331, "step": 37248 }, { "epoch": 0.06604732889530744, "grad_norm": 0.490234375, "learning_rate": 0.0014272906800004699, "loss": 0.2109, "step": 37250 }, { "epoch": 0.06605087506061727, "grad_norm": 0.6328125, "learning_rate": 0.001427235235688385, "loss": 0.241, "step": 37252 }, { "epoch": 0.06605442122592708, "grad_norm": 0.287109375, "learning_rate": 0.0014271797899451633, "loss": 0.2312, "step": 37254 }, { "epoch": 0.0660579673912369, "grad_norm": 0.5703125, "learning_rate": 0.0014271243427710469, "loss": 0.1624, "step": 37256 }, { "epoch": 0.06606151355654671, "grad_norm": 0.375, "learning_rate": 0.0014270688941662786, "loss": 0.1598, "step": 37258 }, { "epoch": 0.06606505972185653, "grad_norm": 0.66796875, "learning_rate": 0.001427013444131101, "loss": 0.2129, "step": 37260 }, { "epoch": 0.06606860588716634, "grad_norm": 1.09375, "learning_rate": 0.001426957992665757, "loss": 0.129, "step": 37262 }, { "epoch": 0.06607215205247616, "grad_norm": 3.5, "learning_rate": 0.001426902539770488, "loss": 0.2001, "step": 37264 }, { "epoch": 0.06607569821778597, "grad_norm": 0.337890625, "learning_rate": 0.0014268470854455375, "loss": 0.187, "step": 37266 }, { "epoch": 0.06607924438309579, "grad_norm": 0.26171875, "learning_rate": 0.001426791629691148, "loss": 0.4083, "step": 37268 }, { "epoch": 0.0660827905484056, "grad_norm": 1.3359375, "learning_rate": 0.0014267361725075613, "loss": 0.2454, "step": 37270 }, { "epoch": 0.06608633671371542, "grad_norm": 0.5, "learning_rate": 0.0014266807138950204, "loss": 0.2099, "step": 37272 }, { "epoch": 0.06608988287902523, "grad_norm": 0.6328125, "learning_rate": 0.0014266252538537678, "loss": 0.1557, "step": 37274 }, { "epoch": 0.06609342904433504, "grad_norm": 1.3984375, "learning_rate": 0.0014265697923840461, "loss": 0.2451, "step": 37276 }, { "epoch": 0.06609697520964486, "grad_norm": 0.59765625, "learning_rate": 0.001426514329486098, "loss": 0.1939, "step": 37278 }, { "epoch": 0.06610052137495467, "grad_norm": 0.66015625, "learning_rate": 0.0014264588651601658, "loss": 0.194, "step": 37280 }, { "epoch": 0.06610406754026449, "grad_norm": 0.33203125, "learning_rate": 0.0014264033994064922, "loss": 0.1969, "step": 37282 }, { "epoch": 0.0661076137055743, "grad_norm": 0.6640625, "learning_rate": 0.0014263479322253194, "loss": 0.1824, "step": 37284 }, { "epoch": 0.06611115987088412, "grad_norm": 0.765625, "learning_rate": 0.0014262924636168905, "loss": 0.1981, "step": 37286 }, { "epoch": 0.06611470603619393, "grad_norm": 0.30078125, "learning_rate": 0.001426236993581448, "loss": 0.1568, "step": 37288 }, { "epoch": 0.06611825220150375, "grad_norm": 0.33984375, "learning_rate": 0.0014261815221192345, "loss": 0.1602, "step": 37290 }, { "epoch": 0.06612179836681356, "grad_norm": 0.404296875, "learning_rate": 0.0014261260492304923, "loss": 0.1939, "step": 37292 }, { "epoch": 0.06612534453212338, "grad_norm": 0.66796875, "learning_rate": 0.0014260705749154644, "loss": 0.3056, "step": 37294 }, { "epoch": 0.06612889069743319, "grad_norm": 0.259765625, "learning_rate": 0.001426015099174393, "loss": 0.2168, "step": 37296 }, { "epoch": 0.06613243686274302, "grad_norm": 1.21875, "learning_rate": 0.001425959622007521, "loss": 0.1704, "step": 37298 }, { "epoch": 0.06613598302805283, "grad_norm": 0.9140625, "learning_rate": 0.0014259041434150908, "loss": 0.1694, "step": 37300 }, { "epoch": 0.06613952919336265, "grad_norm": 0.466796875, "learning_rate": 0.0014258486633973455, "loss": 0.1942, "step": 37302 }, { "epoch": 0.06614307535867246, "grad_norm": 0.279296875, "learning_rate": 0.001425793181954527, "loss": 0.1581, "step": 37304 }, { "epoch": 0.06614662152398228, "grad_norm": 0.2236328125, "learning_rate": 0.0014257376990868787, "loss": 0.1621, "step": 37306 }, { "epoch": 0.06615016768929209, "grad_norm": 0.392578125, "learning_rate": 0.0014256822147946427, "loss": 0.1962, "step": 37308 }, { "epoch": 0.0661537138546019, "grad_norm": 0.341796875, "learning_rate": 0.001425626729078062, "loss": 0.1821, "step": 37310 }, { "epoch": 0.06615726001991172, "grad_norm": 0.453125, "learning_rate": 0.0014255712419373795, "loss": 0.2438, "step": 37312 }, { "epoch": 0.06616080618522154, "grad_norm": 1.6953125, "learning_rate": 0.0014255157533728368, "loss": 0.2581, "step": 37314 }, { "epoch": 0.06616435235053135, "grad_norm": 0.291015625, "learning_rate": 0.001425460263384678, "loss": 0.1508, "step": 37316 }, { "epoch": 0.06616789851584116, "grad_norm": 0.302734375, "learning_rate": 0.0014254047719731444, "loss": 0.2272, "step": 37318 }, { "epoch": 0.06617144468115098, "grad_norm": 0.2373046875, "learning_rate": 0.0014253492791384795, "loss": 0.213, "step": 37320 }, { "epoch": 0.0661749908464608, "grad_norm": 5.46875, "learning_rate": 0.0014252937848809259, "loss": 0.3193, "step": 37322 }, { "epoch": 0.06617853701177061, "grad_norm": 0.197265625, "learning_rate": 0.0014252382892007262, "loss": 0.1669, "step": 37324 }, { "epoch": 0.06618208317708042, "grad_norm": 1.1484375, "learning_rate": 0.0014251827920981233, "loss": 0.2215, "step": 37326 }, { "epoch": 0.06618562934239024, "grad_norm": 0.25, "learning_rate": 0.0014251272935733596, "loss": 0.1623, "step": 37328 }, { "epoch": 0.06618917550770005, "grad_norm": 1.0625, "learning_rate": 0.0014250717936266782, "loss": 0.2267, "step": 37330 }, { "epoch": 0.06619272167300987, "grad_norm": 0.318359375, "learning_rate": 0.0014250162922583214, "loss": 0.21, "step": 37332 }, { "epoch": 0.06619626783831968, "grad_norm": 0.265625, "learning_rate": 0.0014249607894685323, "loss": 0.2047, "step": 37334 }, { "epoch": 0.0661998140036295, "grad_norm": 0.8984375, "learning_rate": 0.0014249052852575534, "loss": 0.2141, "step": 37336 }, { "epoch": 0.06620336016893931, "grad_norm": 0.58203125, "learning_rate": 0.0014248497796256275, "loss": 0.3066, "step": 37338 }, { "epoch": 0.06620690633424912, "grad_norm": 0.390625, "learning_rate": 0.0014247942725729972, "loss": 0.313, "step": 37340 }, { "epoch": 0.06621045249955894, "grad_norm": 0.92578125, "learning_rate": 0.0014247387640999056, "loss": 0.2108, "step": 37342 }, { "epoch": 0.06621399866486877, "grad_norm": 0.703125, "learning_rate": 0.0014246832542065952, "loss": 0.1537, "step": 37344 }, { "epoch": 0.06621754483017858, "grad_norm": 0.42578125, "learning_rate": 0.001424627742893309, "loss": 0.1979, "step": 37346 }, { "epoch": 0.0662210909954884, "grad_norm": 0.35546875, "learning_rate": 0.0014245722301602893, "loss": 0.1326, "step": 37348 }, { "epoch": 0.06622463716079821, "grad_norm": 0.21484375, "learning_rate": 0.0014245167160077796, "loss": 0.2366, "step": 37350 }, { "epoch": 0.06622818332610803, "grad_norm": 0.2412109375, "learning_rate": 0.001424461200436022, "loss": 0.3163, "step": 37352 }, { "epoch": 0.06623172949141784, "grad_norm": 0.453125, "learning_rate": 0.0014244056834452595, "loss": 0.1565, "step": 37354 }, { "epoch": 0.06623527565672765, "grad_norm": 0.640625, "learning_rate": 0.001424350165035735, "loss": 0.1595, "step": 37356 }, { "epoch": 0.06623882182203747, "grad_norm": 0.28125, "learning_rate": 0.0014242946452076915, "loss": 0.1767, "step": 37358 }, { "epoch": 0.06624236798734728, "grad_norm": 0.75390625, "learning_rate": 0.0014242391239613714, "loss": 0.1787, "step": 37360 }, { "epoch": 0.0662459141526571, "grad_norm": 0.546875, "learning_rate": 0.0014241836012970177, "loss": 0.1628, "step": 37362 }, { "epoch": 0.06624946031796691, "grad_norm": 0.294921875, "learning_rate": 0.0014241280772148734, "loss": 0.1385, "step": 37364 }, { "epoch": 0.06625300648327673, "grad_norm": 1.34375, "learning_rate": 0.0014240725517151813, "loss": 0.1858, "step": 37366 }, { "epoch": 0.06625655264858654, "grad_norm": 1.3125, "learning_rate": 0.0014240170247981838, "loss": 0.1873, "step": 37368 }, { "epoch": 0.06626009881389636, "grad_norm": 1.5, "learning_rate": 0.0014239614964641242, "loss": 0.2223, "step": 37370 }, { "epoch": 0.06626364497920617, "grad_norm": 0.5625, "learning_rate": 0.0014239059667132453, "loss": 0.1777, "step": 37372 }, { "epoch": 0.06626719114451599, "grad_norm": 0.7421875, "learning_rate": 0.0014238504355457895, "loss": 0.2262, "step": 37374 }, { "epoch": 0.0662707373098258, "grad_norm": 0.171875, "learning_rate": 0.0014237949029620002, "loss": 0.2039, "step": 37376 }, { "epoch": 0.06627428347513561, "grad_norm": 1.1328125, "learning_rate": 0.00142373936896212, "loss": 0.1545, "step": 37378 }, { "epoch": 0.06627782964044543, "grad_norm": 1.5234375, "learning_rate": 0.001423683833546392, "loss": 0.1821, "step": 37380 }, { "epoch": 0.06628137580575524, "grad_norm": 0.271484375, "learning_rate": 0.001423628296715059, "loss": 0.1692, "step": 37382 }, { "epoch": 0.06628492197106506, "grad_norm": 0.5234375, "learning_rate": 0.0014235727584683635, "loss": 0.2125, "step": 37384 }, { "epoch": 0.06628846813637487, "grad_norm": 0.373046875, "learning_rate": 0.001423517218806549, "loss": 0.2159, "step": 37386 }, { "epoch": 0.0662920143016847, "grad_norm": 0.28515625, "learning_rate": 0.0014234616777298581, "loss": 0.1527, "step": 37388 }, { "epoch": 0.06629556046699452, "grad_norm": 0.31640625, "learning_rate": 0.0014234061352385336, "loss": 0.161, "step": 37390 }, { "epoch": 0.06629910663230433, "grad_norm": 0.2177734375, "learning_rate": 0.0014233505913328187, "loss": 0.1688, "step": 37392 }, { "epoch": 0.06630265279761414, "grad_norm": 0.3515625, "learning_rate": 0.0014232950460129556, "loss": 0.2113, "step": 37394 }, { "epoch": 0.06630619896292396, "grad_norm": 0.32421875, "learning_rate": 0.0014232394992791885, "loss": 0.1484, "step": 37396 }, { "epoch": 0.06630974512823377, "grad_norm": 0.2138671875, "learning_rate": 0.0014231839511317588, "loss": 0.1443, "step": 37398 }, { "epoch": 0.06631329129354359, "grad_norm": 0.70703125, "learning_rate": 0.0014231284015709107, "loss": 0.4504, "step": 37400 }, { "epoch": 0.0663168374588534, "grad_norm": 0.23828125, "learning_rate": 0.0014230728505968868, "loss": 0.1718, "step": 37402 }, { "epoch": 0.06632038362416322, "grad_norm": 0.515625, "learning_rate": 0.0014230172982099296, "loss": 0.1779, "step": 37404 }, { "epoch": 0.06632392978947303, "grad_norm": 0.439453125, "learning_rate": 0.0014229617444102825, "loss": 0.2164, "step": 37406 }, { "epoch": 0.06632747595478285, "grad_norm": 0.318359375, "learning_rate": 0.0014229061891981884, "loss": 0.1896, "step": 37408 }, { "epoch": 0.06633102212009266, "grad_norm": 0.52734375, "learning_rate": 0.0014228506325738902, "loss": 0.2448, "step": 37410 }, { "epoch": 0.06633456828540248, "grad_norm": 0.40625, "learning_rate": 0.0014227950745376308, "loss": 0.1804, "step": 37412 }, { "epoch": 0.06633811445071229, "grad_norm": 0.58984375, "learning_rate": 0.001422739515089653, "loss": 0.2256, "step": 37414 }, { "epoch": 0.0663416606160221, "grad_norm": 0.392578125, "learning_rate": 0.0014226839542302006, "loss": 0.1826, "step": 37416 }, { "epoch": 0.06634520678133192, "grad_norm": 5.625, "learning_rate": 0.0014226283919595154, "loss": 0.2268, "step": 37418 }, { "epoch": 0.06634875294664173, "grad_norm": 0.388671875, "learning_rate": 0.0014225728282778413, "loss": 0.1687, "step": 37420 }, { "epoch": 0.06635229911195155, "grad_norm": 0.359375, "learning_rate": 0.0014225172631854208, "loss": 0.1465, "step": 37422 }, { "epoch": 0.06635584527726136, "grad_norm": 0.18359375, "learning_rate": 0.0014224616966824976, "loss": 0.2361, "step": 37424 }, { "epoch": 0.06635939144257118, "grad_norm": 0.80859375, "learning_rate": 0.001422406128769314, "loss": 0.2917, "step": 37426 }, { "epoch": 0.06636293760788099, "grad_norm": 0.400390625, "learning_rate": 0.0014223505594461133, "loss": 0.2081, "step": 37428 }, { "epoch": 0.0663664837731908, "grad_norm": 0.9453125, "learning_rate": 0.0014222949887131384, "loss": 0.2403, "step": 37430 }, { "epoch": 0.06637002993850062, "grad_norm": 1.859375, "learning_rate": 0.0014222394165706326, "loss": 0.2442, "step": 37432 }, { "epoch": 0.06637357610381045, "grad_norm": 0.53515625, "learning_rate": 0.0014221838430188386, "loss": 0.1891, "step": 37434 }, { "epoch": 0.06637712226912026, "grad_norm": 0.453125, "learning_rate": 0.001422128268058, "loss": 0.2332, "step": 37436 }, { "epoch": 0.06638066843443008, "grad_norm": 0.47265625, "learning_rate": 0.0014220726916883586, "loss": 0.1994, "step": 37438 }, { "epoch": 0.0663842145997399, "grad_norm": 0.240234375, "learning_rate": 0.001422017113910159, "loss": 0.1556, "step": 37440 }, { "epoch": 0.06638776076504971, "grad_norm": 0.349609375, "learning_rate": 0.0014219615347236436, "loss": 0.1421, "step": 37442 }, { "epoch": 0.06639130693035952, "grad_norm": 0.51953125, "learning_rate": 0.0014219059541290553, "loss": 0.1607, "step": 37444 }, { "epoch": 0.06639485309566934, "grad_norm": 0.306640625, "learning_rate": 0.0014218503721266376, "loss": 0.1928, "step": 37446 }, { "epoch": 0.06639839926097915, "grad_norm": 1.8359375, "learning_rate": 0.001421794788716633, "loss": 0.3405, "step": 37448 }, { "epoch": 0.06640194542628897, "grad_norm": 0.333984375, "learning_rate": 0.0014217392038992848, "loss": 0.1559, "step": 37450 }, { "epoch": 0.06640549159159878, "grad_norm": 1.1796875, "learning_rate": 0.0014216836176748364, "loss": 0.2223, "step": 37452 }, { "epoch": 0.0664090377569086, "grad_norm": 0.427734375, "learning_rate": 0.001421628030043531, "loss": 0.1661, "step": 37454 }, { "epoch": 0.06641258392221841, "grad_norm": 0.59765625, "learning_rate": 0.001421572441005611, "loss": 0.2503, "step": 37456 }, { "epoch": 0.06641613008752822, "grad_norm": 0.345703125, "learning_rate": 0.0014215168505613202, "loss": 0.1865, "step": 37458 }, { "epoch": 0.06641967625283804, "grad_norm": 0.578125, "learning_rate": 0.0014214612587109012, "loss": 0.1688, "step": 37460 }, { "epoch": 0.06642322241814785, "grad_norm": 0.30078125, "learning_rate": 0.0014214056654545976, "loss": 0.1882, "step": 37462 }, { "epoch": 0.06642676858345767, "grad_norm": 0.3828125, "learning_rate": 0.0014213500707926524, "loss": 0.1713, "step": 37464 }, { "epoch": 0.06643031474876748, "grad_norm": 0.94140625, "learning_rate": 0.0014212944747253086, "loss": 0.2066, "step": 37466 }, { "epoch": 0.0664338609140773, "grad_norm": 1.2734375, "learning_rate": 0.001421238877252809, "loss": 0.169, "step": 37468 }, { "epoch": 0.06643740707938711, "grad_norm": 0.40234375, "learning_rate": 0.0014211832783753975, "loss": 0.2077, "step": 37470 }, { "epoch": 0.06644095324469693, "grad_norm": 0.3671875, "learning_rate": 0.0014211276780933166, "loss": 0.2005, "step": 37472 }, { "epoch": 0.06644449941000674, "grad_norm": 0.3515625, "learning_rate": 0.0014210720764068104, "loss": 0.1488, "step": 37474 }, { "epoch": 0.06644804557531656, "grad_norm": 0.369140625, "learning_rate": 0.0014210164733161209, "loss": 0.3038, "step": 37476 }, { "epoch": 0.06645159174062637, "grad_norm": 0.2412109375, "learning_rate": 0.001420960868821492, "loss": 0.2109, "step": 37478 }, { "epoch": 0.0664551379059362, "grad_norm": 0.294921875, "learning_rate": 0.0014209052629231668, "loss": 0.1579, "step": 37480 }, { "epoch": 0.06645868407124601, "grad_norm": 0.318359375, "learning_rate": 0.0014208496556213882, "loss": 0.1618, "step": 37482 }, { "epoch": 0.06646223023655583, "grad_norm": 0.1953125, "learning_rate": 0.0014207940469163995, "loss": 0.1736, "step": 37484 }, { "epoch": 0.06646577640186564, "grad_norm": 0.2470703125, "learning_rate": 0.0014207384368084444, "loss": 0.2026, "step": 37486 }, { "epoch": 0.06646932256717546, "grad_norm": 0.25, "learning_rate": 0.001420682825297765, "loss": 0.1649, "step": 37488 }, { "epoch": 0.06647286873248527, "grad_norm": 0.515625, "learning_rate": 0.0014206272123846058, "loss": 0.2174, "step": 37490 }, { "epoch": 0.06647641489779509, "grad_norm": 0.447265625, "learning_rate": 0.0014205715980692092, "loss": 0.2247, "step": 37492 }, { "epoch": 0.0664799610631049, "grad_norm": 0.45703125, "learning_rate": 0.0014205159823518188, "loss": 0.2282, "step": 37494 }, { "epoch": 0.06648350722841471, "grad_norm": 1.6875, "learning_rate": 0.0014204603652326773, "loss": 0.2191, "step": 37496 }, { "epoch": 0.06648705339372453, "grad_norm": 0.3671875, "learning_rate": 0.0014204047467120285, "loss": 0.1725, "step": 37498 }, { "epoch": 0.06649059955903434, "grad_norm": 0.5546875, "learning_rate": 0.0014203491267901157, "loss": 0.1582, "step": 37500 }, { "epoch": 0.06649414572434416, "grad_norm": 0.296875, "learning_rate": 0.0014202935054671818, "loss": 0.2006, "step": 37502 }, { "epoch": 0.06649769188965397, "grad_norm": 3.1875, "learning_rate": 0.0014202378827434697, "loss": 0.3244, "step": 37504 }, { "epoch": 0.06650123805496379, "grad_norm": 0.400390625, "learning_rate": 0.0014201822586192238, "loss": 0.2808, "step": 37506 }, { "epoch": 0.0665047842202736, "grad_norm": 0.44140625, "learning_rate": 0.0014201266330946861, "loss": 0.2018, "step": 37508 }, { "epoch": 0.06650833038558342, "grad_norm": 1.7109375, "learning_rate": 0.0014200710061701009, "loss": 0.3386, "step": 37510 }, { "epoch": 0.06651187655089323, "grad_norm": 0.310546875, "learning_rate": 0.0014200153778457106, "loss": 0.2099, "step": 37512 }, { "epoch": 0.06651542271620305, "grad_norm": 0.490234375, "learning_rate": 0.0014199597481217591, "loss": 0.1743, "step": 37514 }, { "epoch": 0.06651896888151286, "grad_norm": 5.25, "learning_rate": 0.0014199041169984897, "loss": 0.4069, "step": 37516 }, { "epoch": 0.06652251504682268, "grad_norm": 0.58203125, "learning_rate": 0.0014198484844761454, "loss": 0.2001, "step": 37518 }, { "epoch": 0.06652606121213249, "grad_norm": 0.5546875, "learning_rate": 0.0014197928505549696, "loss": 0.171, "step": 37520 }, { "epoch": 0.0665296073774423, "grad_norm": 0.82421875, "learning_rate": 0.0014197372152352057, "loss": 0.184, "step": 37522 }, { "epoch": 0.06653315354275213, "grad_norm": 0.48046875, "learning_rate": 0.001419681578517097, "loss": 0.2236, "step": 37524 }, { "epoch": 0.06653669970806195, "grad_norm": 0.314453125, "learning_rate": 0.0014196259404008864, "loss": 0.2118, "step": 37526 }, { "epoch": 0.06654024587337176, "grad_norm": 0.25390625, "learning_rate": 0.0014195703008868178, "loss": 0.1564, "step": 37528 }, { "epoch": 0.06654379203868158, "grad_norm": 0.54296875, "learning_rate": 0.0014195146599751342, "loss": 0.1861, "step": 37530 }, { "epoch": 0.06654733820399139, "grad_norm": 0.6484375, "learning_rate": 0.0014194590176660796, "loss": 0.2699, "step": 37532 }, { "epoch": 0.0665508843693012, "grad_norm": 0.458984375, "learning_rate": 0.0014194033739598963, "loss": 0.2387, "step": 37534 }, { "epoch": 0.06655443053461102, "grad_norm": 0.48046875, "learning_rate": 0.0014193477288568285, "loss": 0.208, "step": 37536 }, { "epoch": 0.06655797669992083, "grad_norm": 1.65625, "learning_rate": 0.001419292082357119, "loss": 0.2327, "step": 37538 }, { "epoch": 0.06656152286523065, "grad_norm": 1.1640625, "learning_rate": 0.0014192364344610114, "loss": 0.1486, "step": 37540 }, { "epoch": 0.06656506903054046, "grad_norm": 4.625, "learning_rate": 0.001419180785168749, "loss": 0.1827, "step": 37542 }, { "epoch": 0.06656861519585028, "grad_norm": 0.2578125, "learning_rate": 0.0014191251344805756, "loss": 0.1442, "step": 37544 }, { "epoch": 0.06657216136116009, "grad_norm": 1.7890625, "learning_rate": 0.001419069482396734, "loss": 0.191, "step": 37546 }, { "epoch": 0.06657570752646991, "grad_norm": 0.609375, "learning_rate": 0.0014190138289174679, "loss": 0.1833, "step": 37548 }, { "epoch": 0.06657925369177972, "grad_norm": 0.43359375, "learning_rate": 0.0014189581740430203, "loss": 0.198, "step": 37550 }, { "epoch": 0.06658279985708954, "grad_norm": 0.51953125, "learning_rate": 0.0014189025177736355, "loss": 0.2372, "step": 37552 }, { "epoch": 0.06658634602239935, "grad_norm": 0.1767578125, "learning_rate": 0.0014188468601095561, "loss": 0.1634, "step": 37554 }, { "epoch": 0.06658989218770917, "grad_norm": 0.44140625, "learning_rate": 0.0014187912010510256, "loss": 0.2132, "step": 37556 }, { "epoch": 0.06659343835301898, "grad_norm": 0.392578125, "learning_rate": 0.0014187355405982877, "loss": 0.3887, "step": 37558 }, { "epoch": 0.0665969845183288, "grad_norm": 0.828125, "learning_rate": 0.0014186798787515856, "loss": 0.2035, "step": 37560 }, { "epoch": 0.06660053068363861, "grad_norm": 0.65234375, "learning_rate": 0.0014186242155111628, "loss": 0.2017, "step": 37562 }, { "epoch": 0.06660407684894842, "grad_norm": 0.3046875, "learning_rate": 0.0014185685508772627, "loss": 0.1746, "step": 37564 }, { "epoch": 0.06660762301425824, "grad_norm": 0.29296875, "learning_rate": 0.001418512884850129, "loss": 0.3041, "step": 37566 }, { "epoch": 0.06661116917956805, "grad_norm": 0.51953125, "learning_rate": 0.0014184572174300047, "loss": 0.2298, "step": 37568 }, { "epoch": 0.06661471534487788, "grad_norm": 1.2421875, "learning_rate": 0.0014184015486171338, "loss": 0.3456, "step": 37570 }, { "epoch": 0.0666182615101877, "grad_norm": 0.56640625, "learning_rate": 0.0014183458784117592, "loss": 0.2807, "step": 37572 }, { "epoch": 0.06662180767549751, "grad_norm": 0.53125, "learning_rate": 0.0014182902068141249, "loss": 0.1707, "step": 37574 }, { "epoch": 0.06662535384080732, "grad_norm": 1.4140625, "learning_rate": 0.0014182345338244742, "loss": 0.2557, "step": 37576 }, { "epoch": 0.06662890000611714, "grad_norm": 0.7109375, "learning_rate": 0.0014181788594430501, "loss": 0.2071, "step": 37578 }, { "epoch": 0.06663244617142695, "grad_norm": 0.326171875, "learning_rate": 0.001418123183670097, "loss": 0.1587, "step": 37580 }, { "epoch": 0.06663599233673677, "grad_norm": 2.09375, "learning_rate": 0.0014180675065058573, "loss": 0.2365, "step": 37582 }, { "epoch": 0.06663953850204658, "grad_norm": 0.365234375, "learning_rate": 0.0014180118279505753, "loss": 0.1756, "step": 37584 }, { "epoch": 0.0666430846673564, "grad_norm": 0.73828125, "learning_rate": 0.0014179561480044942, "loss": 0.2499, "step": 37586 }, { "epoch": 0.06664663083266621, "grad_norm": 0.271484375, "learning_rate": 0.001417900466667858, "loss": 0.2207, "step": 37588 }, { "epoch": 0.06665017699797603, "grad_norm": 0.7578125, "learning_rate": 0.0014178447839409095, "loss": 0.1859, "step": 37590 }, { "epoch": 0.06665372316328584, "grad_norm": 1.625, "learning_rate": 0.0014177890998238925, "loss": 0.2766, "step": 37592 }, { "epoch": 0.06665726932859566, "grad_norm": 0.53125, "learning_rate": 0.0014177334143170507, "loss": 0.1801, "step": 37594 }, { "epoch": 0.06666081549390547, "grad_norm": 0.734375, "learning_rate": 0.0014176777274206279, "loss": 0.2163, "step": 37596 }, { "epoch": 0.06666436165921528, "grad_norm": 0.2314453125, "learning_rate": 0.0014176220391348667, "loss": 0.2238, "step": 37598 }, { "epoch": 0.0666679078245251, "grad_norm": 0.6171875, "learning_rate": 0.0014175663494600114, "loss": 0.1737, "step": 37600 }, { "epoch": 0.06667145398983491, "grad_norm": 1.0, "learning_rate": 0.001417510658396305, "loss": 0.2874, "step": 37602 }, { "epoch": 0.06667500015514473, "grad_norm": 0.26171875, "learning_rate": 0.0014174549659439917, "loss": 0.151, "step": 37604 }, { "epoch": 0.06667854632045454, "grad_norm": 0.5078125, "learning_rate": 0.0014173992721033149, "loss": 0.1412, "step": 37606 }, { "epoch": 0.06668209248576436, "grad_norm": 0.5625, "learning_rate": 0.001417343576874518, "loss": 0.2405, "step": 37608 }, { "epoch": 0.06668563865107417, "grad_norm": 0.984375, "learning_rate": 0.0014172878802578446, "loss": 0.1942, "step": 37610 }, { "epoch": 0.06668918481638399, "grad_norm": 0.466796875, "learning_rate": 0.001417232182253538, "loss": 0.1877, "step": 37612 }, { "epoch": 0.0666927309816938, "grad_norm": 0.60546875, "learning_rate": 0.0014171764828618425, "loss": 0.1726, "step": 37614 }, { "epoch": 0.06669627714700363, "grad_norm": 0.314453125, "learning_rate": 0.0014171207820830012, "loss": 0.1234, "step": 37616 }, { "epoch": 0.06669982331231344, "grad_norm": 0.328125, "learning_rate": 0.0014170650799172578, "loss": 0.1873, "step": 37618 }, { "epoch": 0.06670336947762326, "grad_norm": 2.71875, "learning_rate": 0.0014170093763648556, "loss": 0.3767, "step": 37620 }, { "epoch": 0.06670691564293307, "grad_norm": 0.341796875, "learning_rate": 0.0014169536714260392, "loss": 0.1729, "step": 37622 }, { "epoch": 0.06671046180824289, "grad_norm": 0.3984375, "learning_rate": 0.001416897965101051, "loss": 0.1846, "step": 37624 }, { "epoch": 0.0667140079735527, "grad_norm": 0.34375, "learning_rate": 0.0014168422573901354, "loss": 0.1853, "step": 37626 }, { "epoch": 0.06671755413886252, "grad_norm": 0.5703125, "learning_rate": 0.0014167865482935357, "loss": 0.1324, "step": 37628 }, { "epoch": 0.06672110030417233, "grad_norm": 0.275390625, "learning_rate": 0.0014167308378114961, "loss": 0.2048, "step": 37630 }, { "epoch": 0.06672464646948215, "grad_norm": 0.1279296875, "learning_rate": 0.0014166751259442592, "loss": 0.1168, "step": 37632 }, { "epoch": 0.06672819263479196, "grad_norm": 0.828125, "learning_rate": 0.0014166194126920696, "loss": 0.2124, "step": 37634 }, { "epoch": 0.06673173880010178, "grad_norm": 0.63671875, "learning_rate": 0.0014165636980551701, "loss": 0.1795, "step": 37636 }, { "epoch": 0.06673528496541159, "grad_norm": 0.30859375, "learning_rate": 0.0014165079820338054, "loss": 0.207, "step": 37638 }, { "epoch": 0.0667388311307214, "grad_norm": 0.921875, "learning_rate": 0.0014164522646282183, "loss": 0.1883, "step": 37640 }, { "epoch": 0.06674237729603122, "grad_norm": 0.48046875, "learning_rate": 0.0014163965458386532, "loss": 0.221, "step": 37642 }, { "epoch": 0.06674592346134103, "grad_norm": 1.6171875, "learning_rate": 0.001416340825665353, "loss": 0.1809, "step": 37644 }, { "epoch": 0.06674946962665085, "grad_norm": 0.71484375, "learning_rate": 0.0014162851041085618, "loss": 0.2412, "step": 37646 }, { "epoch": 0.06675301579196066, "grad_norm": 0.9453125, "learning_rate": 0.0014162293811685236, "loss": 0.2272, "step": 37648 }, { "epoch": 0.06675656195727048, "grad_norm": 0.55078125, "learning_rate": 0.0014161736568454818, "loss": 0.174, "step": 37650 }, { "epoch": 0.06676010812258029, "grad_norm": 0.8984375, "learning_rate": 0.0014161179311396796, "loss": 0.1594, "step": 37652 }, { "epoch": 0.0667636542878901, "grad_norm": 0.671875, "learning_rate": 0.0014160622040513618, "loss": 0.1876, "step": 37654 }, { "epoch": 0.06676720045319992, "grad_norm": 0.6640625, "learning_rate": 0.001416006475580771, "loss": 0.2195, "step": 37656 }, { "epoch": 0.06677074661850974, "grad_norm": 0.291015625, "learning_rate": 0.0014159507457281516, "loss": 0.2887, "step": 37658 }, { "epoch": 0.06677429278381956, "grad_norm": 0.59765625, "learning_rate": 0.001415895014493747, "loss": 0.1456, "step": 37660 }, { "epoch": 0.06677783894912938, "grad_norm": 0.279296875, "learning_rate": 0.0014158392818778014, "loss": 0.2402, "step": 37662 }, { "epoch": 0.06678138511443919, "grad_norm": 1.1328125, "learning_rate": 0.001415783547880558, "loss": 0.443, "step": 37664 }, { "epoch": 0.06678493127974901, "grad_norm": 0.2265625, "learning_rate": 0.0014157278125022613, "loss": 0.3335, "step": 37666 }, { "epoch": 0.06678847744505882, "grad_norm": 0.392578125, "learning_rate": 0.001415672075743154, "loss": 0.1977, "step": 37668 }, { "epoch": 0.06679202361036864, "grad_norm": 0.373046875, "learning_rate": 0.0014156163376034805, "loss": 0.1847, "step": 37670 }, { "epoch": 0.06679556977567845, "grad_norm": 0.26171875, "learning_rate": 0.0014155605980834844, "loss": 0.2925, "step": 37672 }, { "epoch": 0.06679911594098827, "grad_norm": 0.470703125, "learning_rate": 0.00141550485718341, "loss": 0.2119, "step": 37674 }, { "epoch": 0.06680266210629808, "grad_norm": 0.87890625, "learning_rate": 0.0014154491149035002, "loss": 0.2032, "step": 37676 }, { "epoch": 0.0668062082716079, "grad_norm": 0.458984375, "learning_rate": 0.0014153933712439993, "loss": 0.2193, "step": 37678 }, { "epoch": 0.06680975443691771, "grad_norm": 0.310546875, "learning_rate": 0.001415337626205151, "loss": 0.206, "step": 37680 }, { "epoch": 0.06681330060222752, "grad_norm": 1.703125, "learning_rate": 0.001415281879787199, "loss": 0.2849, "step": 37682 }, { "epoch": 0.06681684676753734, "grad_norm": 0.2890625, "learning_rate": 0.0014152261319903873, "loss": 0.194, "step": 37684 }, { "epoch": 0.06682039293284715, "grad_norm": 0.87890625, "learning_rate": 0.0014151703828149595, "loss": 0.2511, "step": 37686 }, { "epoch": 0.06682393909815697, "grad_norm": 0.40625, "learning_rate": 0.00141511463226116, "loss": 0.1932, "step": 37688 }, { "epoch": 0.06682748526346678, "grad_norm": 0.734375, "learning_rate": 0.0014150588803292315, "loss": 0.206, "step": 37690 }, { "epoch": 0.0668310314287766, "grad_norm": 0.447265625, "learning_rate": 0.0014150031270194188, "loss": 0.2043, "step": 37692 }, { "epoch": 0.06683457759408641, "grad_norm": 0.33984375, "learning_rate": 0.0014149473723319651, "loss": 0.2395, "step": 37694 }, { "epoch": 0.06683812375939623, "grad_norm": 0.259765625, "learning_rate": 0.0014148916162671147, "loss": 0.1422, "step": 37696 }, { "epoch": 0.06684166992470604, "grad_norm": 0.427734375, "learning_rate": 0.0014148358588251114, "loss": 0.2655, "step": 37698 }, { "epoch": 0.06684521609001585, "grad_norm": 0.376953125, "learning_rate": 0.0014147801000061989, "loss": 0.1459, "step": 37700 }, { "epoch": 0.06684876225532567, "grad_norm": 0.322265625, "learning_rate": 0.001414724339810621, "loss": 0.2364, "step": 37702 }, { "epoch": 0.06685230842063548, "grad_norm": 1.515625, "learning_rate": 0.001414668578238622, "loss": 0.2242, "step": 37704 }, { "epoch": 0.06685585458594531, "grad_norm": 0.39453125, "learning_rate": 0.001414612815290445, "loss": 0.1553, "step": 37706 }, { "epoch": 0.06685940075125513, "grad_norm": 1.7265625, "learning_rate": 0.0014145570509663343, "loss": 0.2396, "step": 37708 }, { "epoch": 0.06686294691656494, "grad_norm": 0.9765625, "learning_rate": 0.0014145012852665338, "loss": 0.1837, "step": 37710 }, { "epoch": 0.06686649308187476, "grad_norm": 1.75, "learning_rate": 0.0014144455181912877, "loss": 0.5242, "step": 37712 }, { "epoch": 0.06687003924718457, "grad_norm": 1.453125, "learning_rate": 0.0014143897497408391, "loss": 0.2209, "step": 37714 }, { "epoch": 0.06687358541249439, "grad_norm": 0.2265625, "learning_rate": 0.0014143339799154325, "loss": 0.1891, "step": 37716 }, { "epoch": 0.0668771315778042, "grad_norm": 0.84375, "learning_rate": 0.001414278208715312, "loss": 0.2593, "step": 37718 }, { "epoch": 0.06688067774311401, "grad_norm": 0.345703125, "learning_rate": 0.0014142224361407207, "loss": 0.1632, "step": 37720 }, { "epoch": 0.06688422390842383, "grad_norm": 0.302734375, "learning_rate": 0.0014141666621919034, "loss": 0.2295, "step": 37722 }, { "epoch": 0.06688777007373364, "grad_norm": 1.6796875, "learning_rate": 0.0014141108868691032, "loss": 0.2882, "step": 37724 }, { "epoch": 0.06689131623904346, "grad_norm": 0.3046875, "learning_rate": 0.0014140551101725644, "loss": 0.2254, "step": 37726 }, { "epoch": 0.06689486240435327, "grad_norm": 0.376953125, "learning_rate": 0.0014139993321025314, "loss": 0.2393, "step": 37728 }, { "epoch": 0.06689840856966309, "grad_norm": 0.63671875, "learning_rate": 0.0014139435526592475, "loss": 0.1908, "step": 37730 }, { "epoch": 0.0669019547349729, "grad_norm": 0.380859375, "learning_rate": 0.0014138877718429571, "loss": 0.2203, "step": 37732 }, { "epoch": 0.06690550090028272, "grad_norm": 0.56640625, "learning_rate": 0.0014138319896539033, "loss": 0.3896, "step": 37734 }, { "epoch": 0.06690904706559253, "grad_norm": 0.349609375, "learning_rate": 0.0014137762060923314, "loss": 0.2426, "step": 37736 }, { "epoch": 0.06691259323090235, "grad_norm": 2.625, "learning_rate": 0.0014137204211584842, "loss": 0.282, "step": 37738 }, { "epoch": 0.06691613939621216, "grad_norm": 0.78515625, "learning_rate": 0.0014136646348526063, "loss": 0.1812, "step": 37740 }, { "epoch": 0.06691968556152197, "grad_norm": 0.361328125, "learning_rate": 0.0014136088471749413, "loss": 0.2213, "step": 37742 }, { "epoch": 0.06692323172683179, "grad_norm": 1.2109375, "learning_rate": 0.0014135530581257337, "loss": 0.1624, "step": 37744 }, { "epoch": 0.0669267778921416, "grad_norm": 0.494140625, "learning_rate": 0.0014134972677052266, "loss": 0.1981, "step": 37746 }, { "epoch": 0.06693032405745142, "grad_norm": 0.578125, "learning_rate": 0.001413441475913665, "loss": 0.2082, "step": 37748 }, { "epoch": 0.06693387022276123, "grad_norm": 0.95703125, "learning_rate": 0.0014133856827512922, "loss": 0.4842, "step": 37750 }, { "epoch": 0.06693741638807106, "grad_norm": 0.44921875, "learning_rate": 0.0014133298882183527, "loss": 0.1709, "step": 37752 }, { "epoch": 0.06694096255338088, "grad_norm": 0.40234375, "learning_rate": 0.00141327409231509, "loss": 0.171, "step": 37754 }, { "epoch": 0.06694450871869069, "grad_norm": 0.75390625, "learning_rate": 0.0014132182950417488, "loss": 0.1958, "step": 37756 }, { "epoch": 0.0669480548840005, "grad_norm": 2.828125, "learning_rate": 0.0014131624963985723, "loss": 0.3072, "step": 37758 }, { "epoch": 0.06695160104931032, "grad_norm": 0.251953125, "learning_rate": 0.001413106696385805, "loss": 0.176, "step": 37760 }, { "epoch": 0.06695514721462013, "grad_norm": 0.5859375, "learning_rate": 0.0014130508950036913, "loss": 0.2251, "step": 37762 }, { "epoch": 0.06695869337992995, "grad_norm": 0.490234375, "learning_rate": 0.0014129950922524743, "loss": 0.2637, "step": 37764 }, { "epoch": 0.06696223954523976, "grad_norm": 0.6484375, "learning_rate": 0.001412939288132399, "loss": 0.1544, "step": 37766 }, { "epoch": 0.06696578571054958, "grad_norm": 0.447265625, "learning_rate": 0.0014128834826437088, "loss": 0.1755, "step": 37768 }, { "epoch": 0.06696933187585939, "grad_norm": 0.439453125, "learning_rate": 0.0014128276757866477, "loss": 0.2043, "step": 37770 }, { "epoch": 0.0669728780411692, "grad_norm": 0.2392578125, "learning_rate": 0.0014127718675614606, "loss": 0.2637, "step": 37772 }, { "epoch": 0.06697642420647902, "grad_norm": 0.28125, "learning_rate": 0.0014127160579683908, "loss": 0.1467, "step": 37774 }, { "epoch": 0.06697997037178884, "grad_norm": 0.34765625, "learning_rate": 0.0014126602470076826, "loss": 0.1942, "step": 37776 }, { "epoch": 0.06698351653709865, "grad_norm": 0.5703125, "learning_rate": 0.0014126044346795801, "loss": 0.2474, "step": 37778 }, { "epoch": 0.06698706270240846, "grad_norm": 1.2265625, "learning_rate": 0.0014125486209843276, "loss": 0.1811, "step": 37780 }, { "epoch": 0.06699060886771828, "grad_norm": 0.85546875, "learning_rate": 0.0014124928059221686, "loss": 0.2249, "step": 37782 }, { "epoch": 0.0669941550330281, "grad_norm": 0.82421875, "learning_rate": 0.0014124369894933477, "loss": 0.198, "step": 37784 }, { "epoch": 0.06699770119833791, "grad_norm": 0.294921875, "learning_rate": 0.001412381171698109, "loss": 0.1259, "step": 37786 }, { "epoch": 0.06700124736364772, "grad_norm": 0.859375, "learning_rate": 0.0014123253525366963, "loss": 0.1821, "step": 37788 }, { "epoch": 0.06700479352895754, "grad_norm": 0.46484375, "learning_rate": 0.0014122695320093542, "loss": 0.1887, "step": 37790 }, { "epoch": 0.06700833969426735, "grad_norm": 0.54296875, "learning_rate": 0.0014122137101163265, "loss": 0.1649, "step": 37792 }, { "epoch": 0.06701188585957717, "grad_norm": 0.9921875, "learning_rate": 0.0014121578868578574, "loss": 0.2041, "step": 37794 }, { "epoch": 0.067015432024887, "grad_norm": 1.546875, "learning_rate": 0.0014121020622341908, "loss": 0.2977, "step": 37796 }, { "epoch": 0.06701897819019681, "grad_norm": 0.314453125, "learning_rate": 0.0014120462362455714, "loss": 0.1509, "step": 37798 }, { "epoch": 0.06702252435550662, "grad_norm": 0.181640625, "learning_rate": 0.0014119904088922429, "loss": 0.1859, "step": 37800 }, { "epoch": 0.06702607052081644, "grad_norm": 1.03125, "learning_rate": 0.0014119345801744494, "loss": 0.1934, "step": 37802 }, { "epoch": 0.06702961668612625, "grad_norm": 0.5, "learning_rate": 0.0014118787500924355, "loss": 0.1915, "step": 37804 }, { "epoch": 0.06703316285143607, "grad_norm": 0.314453125, "learning_rate": 0.0014118229186464448, "loss": 0.1977, "step": 37806 }, { "epoch": 0.06703670901674588, "grad_norm": 1.1171875, "learning_rate": 0.0014117670858367218, "loss": 0.1561, "step": 37808 }, { "epoch": 0.0670402551820557, "grad_norm": 0.5625, "learning_rate": 0.001411711251663511, "loss": 0.2105, "step": 37810 }, { "epoch": 0.06704380134736551, "grad_norm": 0.61328125, "learning_rate": 0.0014116554161270561, "loss": 0.2465, "step": 37812 }, { "epoch": 0.06704734751267533, "grad_norm": 1.265625, "learning_rate": 0.0014115995792276014, "loss": 0.4522, "step": 37814 }, { "epoch": 0.06705089367798514, "grad_norm": 0.466796875, "learning_rate": 0.0014115437409653912, "loss": 0.1883, "step": 37816 }, { "epoch": 0.06705443984329496, "grad_norm": 0.546875, "learning_rate": 0.0014114879013406695, "loss": 0.1698, "step": 37818 }, { "epoch": 0.06705798600860477, "grad_norm": 0.419921875, "learning_rate": 0.0014114320603536808, "loss": 0.1622, "step": 37820 }, { "epoch": 0.06706153217391458, "grad_norm": 0.453125, "learning_rate": 0.001411376218004669, "loss": 0.1824, "step": 37822 }, { "epoch": 0.0670650783392244, "grad_norm": 1.0, "learning_rate": 0.0014113203742938786, "loss": 0.2754, "step": 37824 }, { "epoch": 0.06706862450453421, "grad_norm": 0.345703125, "learning_rate": 0.0014112645292215537, "loss": 0.1593, "step": 37826 }, { "epoch": 0.06707217066984403, "grad_norm": 0.8203125, "learning_rate": 0.0014112086827879388, "loss": 0.1871, "step": 37828 }, { "epoch": 0.06707571683515384, "grad_norm": 0.2353515625, "learning_rate": 0.0014111528349932774, "loss": 0.1927, "step": 37830 }, { "epoch": 0.06707926300046366, "grad_norm": 0.78515625, "learning_rate": 0.0014110969858378145, "loss": 0.272, "step": 37832 }, { "epoch": 0.06708280916577347, "grad_norm": 2.09375, "learning_rate": 0.001411041135321794, "loss": 0.2612, "step": 37834 }, { "epoch": 0.06708635533108329, "grad_norm": 0.212890625, "learning_rate": 0.0014109852834454605, "loss": 0.1881, "step": 37836 }, { "epoch": 0.0670899014963931, "grad_norm": 0.486328125, "learning_rate": 0.001410929430209058, "loss": 0.1776, "step": 37838 }, { "epoch": 0.06709344766170292, "grad_norm": 0.28515625, "learning_rate": 0.0014108735756128304, "loss": 0.139, "step": 37840 }, { "epoch": 0.06709699382701274, "grad_norm": 0.7890625, "learning_rate": 0.001410817719657023, "loss": 0.1707, "step": 37842 }, { "epoch": 0.06710053999232256, "grad_norm": 0.38671875, "learning_rate": 0.0014107618623418789, "loss": 0.2294, "step": 37844 }, { "epoch": 0.06710408615763237, "grad_norm": 0.30078125, "learning_rate": 0.0014107060036676429, "loss": 0.1751, "step": 37846 }, { "epoch": 0.06710763232294219, "grad_norm": 0.37109375, "learning_rate": 0.0014106501436345594, "loss": 0.2709, "step": 37848 }, { "epoch": 0.067111178488252, "grad_norm": 1.6171875, "learning_rate": 0.0014105942822428728, "loss": 0.4506, "step": 37850 }, { "epoch": 0.06711472465356182, "grad_norm": 0.36328125, "learning_rate": 0.001410538419492827, "loss": 0.1542, "step": 37852 }, { "epoch": 0.06711827081887163, "grad_norm": 0.3515625, "learning_rate": 0.0014104825553846667, "loss": 0.311, "step": 37854 }, { "epoch": 0.06712181698418145, "grad_norm": 0.283203125, "learning_rate": 0.001410426689918636, "loss": 0.1437, "step": 37856 }, { "epoch": 0.06712536314949126, "grad_norm": 1.7890625, "learning_rate": 0.0014103708230949793, "loss": 0.1941, "step": 37858 }, { "epoch": 0.06712890931480107, "grad_norm": 0.87109375, "learning_rate": 0.0014103149549139409, "loss": 0.2397, "step": 37860 }, { "epoch": 0.06713245548011089, "grad_norm": 0.1845703125, "learning_rate": 0.0014102590853757651, "loss": 0.1483, "step": 37862 }, { "epoch": 0.0671360016454207, "grad_norm": 1.96875, "learning_rate": 0.0014102032144806965, "loss": 0.1891, "step": 37864 }, { "epoch": 0.06713954781073052, "grad_norm": 0.1474609375, "learning_rate": 0.0014101473422289787, "loss": 0.1202, "step": 37866 }, { "epoch": 0.06714309397604033, "grad_norm": 0.9921875, "learning_rate": 0.0014100914686208572, "loss": 0.2429, "step": 37868 }, { "epoch": 0.06714664014135015, "grad_norm": 0.38671875, "learning_rate": 0.0014100355936565753, "loss": 0.1764, "step": 37870 }, { "epoch": 0.06715018630665996, "grad_norm": 0.44140625, "learning_rate": 0.0014099797173363782, "loss": 0.1865, "step": 37872 }, { "epoch": 0.06715373247196978, "grad_norm": 0.46875, "learning_rate": 0.0014099238396605097, "loss": 0.1758, "step": 37874 }, { "epoch": 0.06715727863727959, "grad_norm": 0.734375, "learning_rate": 0.0014098679606292142, "loss": 0.1934, "step": 37876 }, { "epoch": 0.0671608248025894, "grad_norm": 0.44921875, "learning_rate": 0.0014098120802427362, "loss": 0.1662, "step": 37878 }, { "epoch": 0.06716437096789922, "grad_norm": 0.6484375, "learning_rate": 0.0014097561985013203, "loss": 0.1884, "step": 37880 }, { "epoch": 0.06716791713320903, "grad_norm": 0.64453125, "learning_rate": 0.0014097003154052107, "loss": 0.1588, "step": 37882 }, { "epoch": 0.06717146329851885, "grad_norm": 1.0390625, "learning_rate": 0.0014096444309546516, "loss": 0.2228, "step": 37884 }, { "epoch": 0.06717500946382866, "grad_norm": 0.578125, "learning_rate": 0.0014095885451498877, "loss": 0.1757, "step": 37886 }, { "epoch": 0.06717855562913849, "grad_norm": 0.2138671875, "learning_rate": 0.0014095326579911635, "loss": 0.1384, "step": 37888 }, { "epoch": 0.0671821017944483, "grad_norm": 0.35546875, "learning_rate": 0.0014094767694787234, "loss": 0.1763, "step": 37890 }, { "epoch": 0.06718564795975812, "grad_norm": 0.328125, "learning_rate": 0.0014094208796128114, "loss": 0.2442, "step": 37892 }, { "epoch": 0.06718919412506794, "grad_norm": 1.0, "learning_rate": 0.0014093649883936724, "loss": 0.1778, "step": 37894 }, { "epoch": 0.06719274029037775, "grad_norm": 0.228515625, "learning_rate": 0.0014093090958215504, "loss": 0.1752, "step": 37896 }, { "epoch": 0.06719628645568756, "grad_norm": 0.41796875, "learning_rate": 0.0014092532018966902, "loss": 0.2082, "step": 37898 }, { "epoch": 0.06719983262099738, "grad_norm": 0.5, "learning_rate": 0.0014091973066193359, "loss": 0.1871, "step": 37900 }, { "epoch": 0.0672033787863072, "grad_norm": 0.2890625, "learning_rate": 0.0014091414099897323, "loss": 0.1953, "step": 37902 }, { "epoch": 0.06720692495161701, "grad_norm": 1.71875, "learning_rate": 0.001409085512008124, "loss": 0.2654, "step": 37904 }, { "epoch": 0.06721047111692682, "grad_norm": 0.53125, "learning_rate": 0.0014090296126747551, "loss": 0.2025, "step": 37906 }, { "epoch": 0.06721401728223664, "grad_norm": 0.1982421875, "learning_rate": 0.00140897371198987, "loss": 0.1808, "step": 37908 }, { "epoch": 0.06721756344754645, "grad_norm": 0.52734375, "learning_rate": 0.0014089178099537137, "loss": 0.1908, "step": 37910 }, { "epoch": 0.06722110961285627, "grad_norm": 0.5859375, "learning_rate": 0.00140886190656653, "loss": 0.1686, "step": 37912 }, { "epoch": 0.06722465577816608, "grad_norm": 0.640625, "learning_rate": 0.001408806001828564, "loss": 0.1784, "step": 37914 }, { "epoch": 0.0672282019434759, "grad_norm": 0.2578125, "learning_rate": 0.0014087500957400598, "loss": 0.1764, "step": 37916 }, { "epoch": 0.06723174810878571, "grad_norm": 1.484375, "learning_rate": 0.0014086941883012618, "loss": 0.2464, "step": 37918 }, { "epoch": 0.06723529427409553, "grad_norm": 0.35546875, "learning_rate": 0.0014086382795124152, "loss": 0.1793, "step": 37920 }, { "epoch": 0.06723884043940534, "grad_norm": 0.20703125, "learning_rate": 0.0014085823693737636, "loss": 0.177, "step": 37922 }, { "epoch": 0.06724238660471515, "grad_norm": 0.7890625, "learning_rate": 0.0014085264578855522, "loss": 0.1918, "step": 37924 }, { "epoch": 0.06724593277002497, "grad_norm": 0.390625, "learning_rate": 0.0014084705450480254, "loss": 0.1857, "step": 37926 }, { "epoch": 0.06724947893533478, "grad_norm": 0.46875, "learning_rate": 0.001408414630861427, "loss": 0.2024, "step": 37928 }, { "epoch": 0.0672530251006446, "grad_norm": 0.251953125, "learning_rate": 0.0014083587153260028, "loss": 0.1362, "step": 37930 }, { "epoch": 0.06725657126595443, "grad_norm": 1.1875, "learning_rate": 0.0014083027984419967, "loss": 0.2532, "step": 37932 }, { "epoch": 0.06726011743126424, "grad_norm": 1.1171875, "learning_rate": 0.0014082468802096531, "loss": 0.1993, "step": 37934 }, { "epoch": 0.06726366359657406, "grad_norm": 0.2216796875, "learning_rate": 0.0014081909606292165, "loss": 0.1701, "step": 37936 }, { "epoch": 0.06726720976188387, "grad_norm": 0.63671875, "learning_rate": 0.0014081350397009316, "loss": 0.1771, "step": 37938 }, { "epoch": 0.06727075592719368, "grad_norm": 0.267578125, "learning_rate": 0.0014080791174250434, "loss": 0.15, "step": 37940 }, { "epoch": 0.0672743020925035, "grad_norm": 0.4375, "learning_rate": 0.0014080231938017958, "loss": 0.1878, "step": 37942 }, { "epoch": 0.06727784825781331, "grad_norm": 0.478515625, "learning_rate": 0.0014079672688314339, "loss": 0.3073, "step": 37944 }, { "epoch": 0.06728139442312313, "grad_norm": 0.41796875, "learning_rate": 0.0014079113425142019, "loss": 0.1538, "step": 37946 }, { "epoch": 0.06728494058843294, "grad_norm": 0.328125, "learning_rate": 0.0014078554148503446, "loss": 0.189, "step": 37948 }, { "epoch": 0.06728848675374276, "grad_norm": 0.34765625, "learning_rate": 0.0014077994858401066, "loss": 0.2102, "step": 37950 }, { "epoch": 0.06729203291905257, "grad_norm": 0.259765625, "learning_rate": 0.001407743555483732, "loss": 0.1926, "step": 37952 }, { "epoch": 0.06729557908436239, "grad_norm": 0.9296875, "learning_rate": 0.0014076876237814665, "loss": 0.2533, "step": 37954 }, { "epoch": 0.0672991252496722, "grad_norm": 1.21875, "learning_rate": 0.0014076316907335536, "loss": 0.2249, "step": 37956 }, { "epoch": 0.06730267141498202, "grad_norm": 0.3671875, "learning_rate": 0.0014075757563402388, "loss": 0.234, "step": 37958 }, { "epoch": 0.06730621758029183, "grad_norm": 0.224609375, "learning_rate": 0.0014075198206017659, "loss": 0.2358, "step": 37960 }, { "epoch": 0.06730976374560164, "grad_norm": 0.34375, "learning_rate": 0.00140746388351838, "loss": 0.2069, "step": 37962 }, { "epoch": 0.06731330991091146, "grad_norm": 0.2890625, "learning_rate": 0.001407407945090326, "loss": 0.1968, "step": 37964 }, { "epoch": 0.06731685607622127, "grad_norm": 0.30078125, "learning_rate": 0.001407352005317848, "loss": 0.2091, "step": 37966 }, { "epoch": 0.06732040224153109, "grad_norm": 0.2470703125, "learning_rate": 0.0014072960642011905, "loss": 0.1699, "step": 37968 }, { "epoch": 0.0673239484068409, "grad_norm": 0.37890625, "learning_rate": 0.001407240121740599, "loss": 0.159, "step": 37970 }, { "epoch": 0.06732749457215072, "grad_norm": 0.255859375, "learning_rate": 0.0014071841779363173, "loss": 0.1777, "step": 37972 }, { "epoch": 0.06733104073746053, "grad_norm": 0.201171875, "learning_rate": 0.0014071282327885908, "loss": 0.1944, "step": 37974 }, { "epoch": 0.06733458690277035, "grad_norm": 0.466796875, "learning_rate": 0.0014070722862976636, "loss": 0.2113, "step": 37976 }, { "epoch": 0.06733813306808017, "grad_norm": 0.203125, "learning_rate": 0.001407016338463781, "loss": 0.231, "step": 37978 }, { "epoch": 0.06734167923338999, "grad_norm": 0.3046875, "learning_rate": 0.0014069603892871865, "loss": 0.2113, "step": 37980 }, { "epoch": 0.0673452253986998, "grad_norm": 1.59375, "learning_rate": 0.001406904438768126, "loss": 0.2414, "step": 37982 }, { "epoch": 0.06734877156400962, "grad_norm": 0.2734375, "learning_rate": 0.0014068484869068439, "loss": 0.2097, "step": 37984 }, { "epoch": 0.06735231772931943, "grad_norm": 0.53125, "learning_rate": 0.0014067925337035846, "loss": 0.2418, "step": 37986 }, { "epoch": 0.06735586389462925, "grad_norm": 0.2197265625, "learning_rate": 0.001406736579158593, "loss": 0.1535, "step": 37988 }, { "epoch": 0.06735941005993906, "grad_norm": 1.2890625, "learning_rate": 0.0014066806232721136, "loss": 0.3729, "step": 37990 }, { "epoch": 0.06736295622524888, "grad_norm": 0.24609375, "learning_rate": 0.0014066246660443913, "loss": 0.1553, "step": 37992 }, { "epoch": 0.06736650239055869, "grad_norm": 0.30078125, "learning_rate": 0.0014065687074756713, "loss": 0.2318, "step": 37994 }, { "epoch": 0.0673700485558685, "grad_norm": 0.453125, "learning_rate": 0.0014065127475661973, "loss": 0.1901, "step": 37996 }, { "epoch": 0.06737359472117832, "grad_norm": 0.57421875, "learning_rate": 0.0014064567863162148, "loss": 0.1613, "step": 37998 }, { "epoch": 0.06737714088648813, "grad_norm": 0.1796875, "learning_rate": 0.0014064008237259682, "loss": 0.1549, "step": 38000 }, { "epoch": 0.06738068705179795, "grad_norm": 0.6015625, "learning_rate": 0.0014063448597957024, "loss": 0.146, "step": 38002 }, { "epoch": 0.06738423321710776, "grad_norm": 0.201171875, "learning_rate": 0.0014062888945256624, "loss": 0.1683, "step": 38004 }, { "epoch": 0.06738777938241758, "grad_norm": 0.169921875, "learning_rate": 0.0014062329279160926, "loss": 0.2238, "step": 38006 }, { "epoch": 0.0673913255477274, "grad_norm": 0.404296875, "learning_rate": 0.0014061769599672377, "loss": 0.2846, "step": 38008 }, { "epoch": 0.06739487171303721, "grad_norm": 0.1748046875, "learning_rate": 0.0014061209906793429, "loss": 0.5551, "step": 38010 }, { "epoch": 0.06739841787834702, "grad_norm": 0.255859375, "learning_rate": 0.0014060650200526524, "loss": 0.2026, "step": 38012 }, { "epoch": 0.06740196404365684, "grad_norm": 0.345703125, "learning_rate": 0.0014060090480874112, "loss": 0.1679, "step": 38014 }, { "epoch": 0.06740551020896665, "grad_norm": 1.8359375, "learning_rate": 0.0014059530747838641, "loss": 0.1761, "step": 38016 }, { "epoch": 0.06740905637427647, "grad_norm": 1.71875, "learning_rate": 0.0014058971001422564, "loss": 0.3153, "step": 38018 }, { "epoch": 0.06741260253958628, "grad_norm": 0.431640625, "learning_rate": 0.0014058411241628325, "loss": 0.1588, "step": 38020 }, { "epoch": 0.0674161487048961, "grad_norm": 0.24609375, "learning_rate": 0.0014057851468458366, "loss": 0.1653, "step": 38022 }, { "epoch": 0.06741969487020592, "grad_norm": 0.466796875, "learning_rate": 0.0014057291681915148, "loss": 0.1444, "step": 38024 }, { "epoch": 0.06742324103551574, "grad_norm": 0.3125, "learning_rate": 0.0014056731882001107, "loss": 0.2766, "step": 38026 }, { "epoch": 0.06742678720082555, "grad_norm": 0.3671875, "learning_rate": 0.00140561720687187, "loss": 0.1719, "step": 38028 }, { "epoch": 0.06743033336613537, "grad_norm": 0.89453125, "learning_rate": 0.0014055612242070369, "loss": 0.2115, "step": 38030 }, { "epoch": 0.06743387953144518, "grad_norm": 1.3203125, "learning_rate": 0.0014055052402058563, "loss": 0.3211, "step": 38032 }, { "epoch": 0.067437425696755, "grad_norm": 0.1171875, "learning_rate": 0.0014054492548685735, "loss": 0.1905, "step": 38034 }, { "epoch": 0.06744097186206481, "grad_norm": 1.0078125, "learning_rate": 0.0014053932681954333, "loss": 0.1506, "step": 38036 }, { "epoch": 0.06744451802737463, "grad_norm": 1.1015625, "learning_rate": 0.0014053372801866798, "loss": 0.4415, "step": 38038 }, { "epoch": 0.06744806419268444, "grad_norm": 0.326171875, "learning_rate": 0.001405281290842559, "loss": 0.2139, "step": 38040 }, { "epoch": 0.06745161035799425, "grad_norm": 0.671875, "learning_rate": 0.0014052253001633152, "loss": 0.1976, "step": 38042 }, { "epoch": 0.06745515652330407, "grad_norm": 0.341796875, "learning_rate": 0.0014051693081491925, "loss": 0.2087, "step": 38044 }, { "epoch": 0.06745870268861388, "grad_norm": 0.294921875, "learning_rate": 0.001405113314800437, "loss": 0.148, "step": 38046 }, { "epoch": 0.0674622488539237, "grad_norm": 0.84765625, "learning_rate": 0.001405057320117293, "loss": 0.2992, "step": 38048 }, { "epoch": 0.06746579501923351, "grad_norm": 0.208984375, "learning_rate": 0.0014050013241000054, "loss": 0.1409, "step": 38050 }, { "epoch": 0.06746934118454333, "grad_norm": 1.0390625, "learning_rate": 0.0014049453267488195, "loss": 0.1579, "step": 38052 }, { "epoch": 0.06747288734985314, "grad_norm": 0.4609375, "learning_rate": 0.0014048893280639795, "loss": 0.1945, "step": 38054 }, { "epoch": 0.06747643351516296, "grad_norm": 0.4453125, "learning_rate": 0.0014048333280457309, "loss": 0.1646, "step": 38056 }, { "epoch": 0.06747997968047277, "grad_norm": 0.275390625, "learning_rate": 0.0014047773266943181, "loss": 0.2174, "step": 38058 }, { "epoch": 0.06748352584578259, "grad_norm": 0.2431640625, "learning_rate": 0.0014047213240099869, "loss": 0.1712, "step": 38060 }, { "epoch": 0.0674870720110924, "grad_norm": 1.2421875, "learning_rate": 0.0014046653199929812, "loss": 0.2986, "step": 38062 }, { "epoch": 0.06749061817640221, "grad_norm": 0.443359375, "learning_rate": 0.0014046093146435463, "loss": 0.1652, "step": 38064 }, { "epoch": 0.06749416434171203, "grad_norm": 0.484375, "learning_rate": 0.0014045533079619272, "loss": 0.1476, "step": 38066 }, { "epoch": 0.06749771050702186, "grad_norm": 0.4140625, "learning_rate": 0.0014044972999483689, "loss": 0.2063, "step": 38068 }, { "epoch": 0.06750125667233167, "grad_norm": 0.34765625, "learning_rate": 0.0014044412906031166, "loss": 0.2072, "step": 38070 }, { "epoch": 0.06750480283764149, "grad_norm": 0.3515625, "learning_rate": 0.0014043852799264145, "loss": 0.1803, "step": 38072 }, { "epoch": 0.0675083490029513, "grad_norm": 0.703125, "learning_rate": 0.001404329267918508, "loss": 0.2009, "step": 38074 }, { "epoch": 0.06751189516826112, "grad_norm": 0.75390625, "learning_rate": 0.0014042732545796425, "loss": 0.1728, "step": 38076 }, { "epoch": 0.06751544133357093, "grad_norm": 0.7265625, "learning_rate": 0.0014042172399100622, "loss": 0.1489, "step": 38078 }, { "epoch": 0.06751898749888074, "grad_norm": 0.2734375, "learning_rate": 0.0014041612239100124, "loss": 0.2742, "step": 38080 }, { "epoch": 0.06752253366419056, "grad_norm": 0.37109375, "learning_rate": 0.001404105206579738, "loss": 0.167, "step": 38082 }, { "epoch": 0.06752607982950037, "grad_norm": 0.72265625, "learning_rate": 0.0014040491879194843, "loss": 0.2185, "step": 38084 }, { "epoch": 0.06752962599481019, "grad_norm": 0.921875, "learning_rate": 0.001403993167929496, "loss": 0.1936, "step": 38086 }, { "epoch": 0.06753317216012, "grad_norm": 0.32421875, "learning_rate": 0.0014039371466100182, "loss": 0.1794, "step": 38088 }, { "epoch": 0.06753671832542982, "grad_norm": 0.24609375, "learning_rate": 0.0014038811239612957, "loss": 0.1902, "step": 38090 }, { "epoch": 0.06754026449073963, "grad_norm": 0.68359375, "learning_rate": 0.0014038250999835735, "loss": 0.1845, "step": 38092 }, { "epoch": 0.06754381065604945, "grad_norm": 0.390625, "learning_rate": 0.0014037690746770972, "loss": 0.1408, "step": 38094 }, { "epoch": 0.06754735682135926, "grad_norm": 0.408203125, "learning_rate": 0.0014037130480421113, "loss": 0.2018, "step": 38096 }, { "epoch": 0.06755090298666908, "grad_norm": 0.47265625, "learning_rate": 0.001403657020078861, "loss": 0.2839, "step": 38098 }, { "epoch": 0.06755444915197889, "grad_norm": 0.283203125, "learning_rate": 0.0014036009907875914, "loss": 0.1785, "step": 38100 }, { "epoch": 0.0675579953172887, "grad_norm": 0.4453125, "learning_rate": 0.001403544960168547, "loss": 0.2157, "step": 38102 }, { "epoch": 0.06756154148259852, "grad_norm": 0.6484375, "learning_rate": 0.0014034889282219735, "loss": 0.3959, "step": 38104 }, { "epoch": 0.06756508764790833, "grad_norm": 0.515625, "learning_rate": 0.0014034328949481156, "loss": 0.1897, "step": 38106 }, { "epoch": 0.06756863381321815, "grad_norm": 0.416015625, "learning_rate": 0.0014033768603472186, "loss": 0.1691, "step": 38108 }, { "epoch": 0.06757217997852796, "grad_norm": 0.30859375, "learning_rate": 0.0014033208244195274, "loss": 0.1608, "step": 38110 }, { "epoch": 0.06757572614383778, "grad_norm": 2.875, "learning_rate": 0.0014032647871652871, "loss": 0.288, "step": 38112 }, { "epoch": 0.0675792723091476, "grad_norm": 0.5234375, "learning_rate": 0.001403208748584743, "loss": 0.1209, "step": 38114 }, { "epoch": 0.06758281847445742, "grad_norm": 0.66796875, "learning_rate": 0.0014031527086781398, "loss": 0.2391, "step": 38116 }, { "epoch": 0.06758636463976724, "grad_norm": 0.27734375, "learning_rate": 0.0014030966674457228, "loss": 0.1977, "step": 38118 }, { "epoch": 0.06758991080507705, "grad_norm": 0.365234375, "learning_rate": 0.0014030406248877368, "loss": 0.1569, "step": 38120 }, { "epoch": 0.06759345697038686, "grad_norm": 1.0390625, "learning_rate": 0.0014029845810044272, "loss": 0.2053, "step": 38122 }, { "epoch": 0.06759700313569668, "grad_norm": 0.359375, "learning_rate": 0.0014029285357960392, "loss": 0.1207, "step": 38124 }, { "epoch": 0.0676005493010065, "grad_norm": 0.259765625, "learning_rate": 0.0014028724892628176, "loss": 0.165, "step": 38126 }, { "epoch": 0.06760409546631631, "grad_norm": 0.51953125, "learning_rate": 0.0014028164414050075, "loss": 0.2324, "step": 38128 }, { "epoch": 0.06760764163162612, "grad_norm": 0.625, "learning_rate": 0.0014027603922228547, "loss": 0.1968, "step": 38130 }, { "epoch": 0.06761118779693594, "grad_norm": 0.70703125, "learning_rate": 0.0014027043417166034, "loss": 0.2596, "step": 38132 }, { "epoch": 0.06761473396224575, "grad_norm": 0.412109375, "learning_rate": 0.0014026482898864994, "loss": 0.2205, "step": 38134 }, { "epoch": 0.06761828012755557, "grad_norm": 0.5234375, "learning_rate": 0.0014025922367327873, "loss": 0.2112, "step": 38136 }, { "epoch": 0.06762182629286538, "grad_norm": 0.6953125, "learning_rate": 0.0014025361822557127, "loss": 0.1881, "step": 38138 }, { "epoch": 0.0676253724581752, "grad_norm": 0.609375, "learning_rate": 0.0014024801264555205, "loss": 0.3466, "step": 38140 }, { "epoch": 0.06762891862348501, "grad_norm": 0.302734375, "learning_rate": 0.001402424069332456, "loss": 0.2739, "step": 38142 }, { "epoch": 0.06763246478879482, "grad_norm": 1.03125, "learning_rate": 0.0014023680108867643, "loss": 0.1499, "step": 38144 }, { "epoch": 0.06763601095410464, "grad_norm": 1.0078125, "learning_rate": 0.0014023119511186904, "loss": 0.3911, "step": 38146 }, { "epoch": 0.06763955711941445, "grad_norm": 1.1875, "learning_rate": 0.0014022558900284796, "loss": 0.1605, "step": 38148 }, { "epoch": 0.06764310328472427, "grad_norm": 0.72265625, "learning_rate": 0.0014021998276163771, "loss": 0.1519, "step": 38150 }, { "epoch": 0.06764664945003408, "grad_norm": 0.46484375, "learning_rate": 0.0014021437638826286, "loss": 0.1927, "step": 38152 }, { "epoch": 0.0676501956153439, "grad_norm": 1.7578125, "learning_rate": 0.0014020876988274783, "loss": 0.2798, "step": 38154 }, { "epoch": 0.06765374178065371, "grad_norm": 0.6015625, "learning_rate": 0.001402031632451172, "loss": 0.2424, "step": 38156 }, { "epoch": 0.06765728794596353, "grad_norm": 1.03125, "learning_rate": 0.0014019755647539547, "loss": 0.1948, "step": 38158 }, { "epoch": 0.06766083411127335, "grad_norm": 0.439453125, "learning_rate": 0.0014019194957360717, "loss": 0.1654, "step": 38160 }, { "epoch": 0.06766438027658317, "grad_norm": 1.3671875, "learning_rate": 0.0014018634253977684, "loss": 0.2451, "step": 38162 }, { "epoch": 0.06766792644189298, "grad_norm": 1.3125, "learning_rate": 0.0014018073537392895, "loss": 0.274, "step": 38164 }, { "epoch": 0.0676714726072028, "grad_norm": 0.40625, "learning_rate": 0.001401751280760881, "loss": 0.1472, "step": 38166 }, { "epoch": 0.06767501877251261, "grad_norm": 0.25390625, "learning_rate": 0.001401695206462787, "loss": 0.361, "step": 38168 }, { "epoch": 0.06767856493782243, "grad_norm": 0.251953125, "learning_rate": 0.0014016391308452538, "loss": 0.1655, "step": 38170 }, { "epoch": 0.06768211110313224, "grad_norm": 0.474609375, "learning_rate": 0.0014015830539085266, "loss": 0.227, "step": 38172 }, { "epoch": 0.06768565726844206, "grad_norm": 0.2001953125, "learning_rate": 0.0014015269756528501, "loss": 0.1828, "step": 38174 }, { "epoch": 0.06768920343375187, "grad_norm": 0.298828125, "learning_rate": 0.0014014708960784694, "loss": 0.1842, "step": 38176 }, { "epoch": 0.06769274959906169, "grad_norm": 0.34375, "learning_rate": 0.0014014148151856305, "loss": 0.2086, "step": 38178 }, { "epoch": 0.0676962957643715, "grad_norm": 0.57421875, "learning_rate": 0.001401358732974578, "loss": 0.1567, "step": 38180 }, { "epoch": 0.06769984192968131, "grad_norm": 0.65625, "learning_rate": 0.0014013026494455575, "loss": 0.1712, "step": 38182 }, { "epoch": 0.06770338809499113, "grad_norm": 0.2265625, "learning_rate": 0.0014012465645988144, "loss": 0.2317, "step": 38184 }, { "epoch": 0.06770693426030094, "grad_norm": 1.59375, "learning_rate": 0.0014011904784345937, "loss": 0.2767, "step": 38186 }, { "epoch": 0.06771048042561076, "grad_norm": 0.72265625, "learning_rate": 0.0014011343909531406, "loss": 0.2158, "step": 38188 }, { "epoch": 0.06771402659092057, "grad_norm": 2.140625, "learning_rate": 0.0014010783021547008, "loss": 0.2056, "step": 38190 }, { "epoch": 0.06771757275623039, "grad_norm": 0.75390625, "learning_rate": 0.0014010222120395194, "loss": 0.1804, "step": 38192 }, { "epoch": 0.0677211189215402, "grad_norm": 0.84375, "learning_rate": 0.0014009661206078419, "loss": 0.2379, "step": 38194 }, { "epoch": 0.06772466508685002, "grad_norm": 0.490234375, "learning_rate": 0.0014009100278599134, "loss": 0.3141, "step": 38196 }, { "epoch": 0.06772821125215983, "grad_norm": 0.36328125, "learning_rate": 0.0014008539337959788, "loss": 0.1913, "step": 38198 }, { "epoch": 0.06773175741746965, "grad_norm": 0.376953125, "learning_rate": 0.0014007978384162844, "loss": 0.2258, "step": 38200 }, { "epoch": 0.06773530358277946, "grad_norm": 0.447265625, "learning_rate": 0.0014007417417210744, "loss": 0.2604, "step": 38202 }, { "epoch": 0.06773884974808929, "grad_norm": 0.291015625, "learning_rate": 0.001400685643710595, "loss": 0.2234, "step": 38204 }, { "epoch": 0.0677423959133991, "grad_norm": 0.57421875, "learning_rate": 0.001400629544385091, "loss": 0.1863, "step": 38206 }, { "epoch": 0.06774594207870892, "grad_norm": 0.306640625, "learning_rate": 0.0014005734437448083, "loss": 0.1819, "step": 38208 }, { "epoch": 0.06774948824401873, "grad_norm": 0.921875, "learning_rate": 0.0014005173417899921, "loss": 0.1733, "step": 38210 }, { "epoch": 0.06775303440932855, "grad_norm": 1.8125, "learning_rate": 0.0014004612385208875, "loss": 0.222, "step": 38212 }, { "epoch": 0.06775658057463836, "grad_norm": 0.765625, "learning_rate": 0.0014004051339377398, "loss": 0.1889, "step": 38214 }, { "epoch": 0.06776012673994818, "grad_norm": 0.462890625, "learning_rate": 0.0014003490280407945, "loss": 0.2515, "step": 38216 }, { "epoch": 0.06776367290525799, "grad_norm": 0.5703125, "learning_rate": 0.001400292920830297, "loss": 0.1981, "step": 38218 }, { "epoch": 0.0677672190705678, "grad_norm": 1.515625, "learning_rate": 0.001400236812306493, "loss": 0.2434, "step": 38220 }, { "epoch": 0.06777076523587762, "grad_norm": 0.734375, "learning_rate": 0.0014001807024696272, "loss": 0.1548, "step": 38222 }, { "epoch": 0.06777431140118743, "grad_norm": 0.9609375, "learning_rate": 0.0014001245913199457, "loss": 0.3761, "step": 38224 }, { "epoch": 0.06777785756649725, "grad_norm": 0.26953125, "learning_rate": 0.0014000684788576935, "loss": 0.1539, "step": 38226 }, { "epoch": 0.06778140373180706, "grad_norm": 0.9765625, "learning_rate": 0.001400012365083116, "loss": 0.3315, "step": 38228 }, { "epoch": 0.06778494989711688, "grad_norm": 0.2890625, "learning_rate": 0.0013999562499964586, "loss": 0.1445, "step": 38230 }, { "epoch": 0.06778849606242669, "grad_norm": 0.73828125, "learning_rate": 0.001399900133597967, "loss": 0.1767, "step": 38232 }, { "epoch": 0.06779204222773651, "grad_norm": 0.40234375, "learning_rate": 0.0013998440158878862, "loss": 0.184, "step": 38234 }, { "epoch": 0.06779558839304632, "grad_norm": 1.125, "learning_rate": 0.0013997878968664622, "loss": 0.4462, "step": 38236 }, { "epoch": 0.06779913455835614, "grad_norm": 1.546875, "learning_rate": 0.0013997317765339396, "loss": 0.171, "step": 38238 }, { "epoch": 0.06780268072366595, "grad_norm": 0.578125, "learning_rate": 0.0013996756548905647, "loss": 0.2151, "step": 38240 }, { "epoch": 0.06780622688897577, "grad_norm": 0.625, "learning_rate": 0.0013996195319365824, "loss": 0.4022, "step": 38242 }, { "epoch": 0.06780977305428558, "grad_norm": 0.51171875, "learning_rate": 0.0013995634076722384, "loss": 0.1521, "step": 38244 }, { "epoch": 0.0678133192195954, "grad_norm": 0.4765625, "learning_rate": 0.0013995072820977778, "loss": 0.1848, "step": 38246 }, { "epoch": 0.06781686538490521, "grad_norm": 0.2421875, "learning_rate": 0.0013994511552134466, "loss": 0.177, "step": 38248 }, { "epoch": 0.06782041155021504, "grad_norm": 0.1982421875, "learning_rate": 0.0013993950270194898, "loss": 0.1775, "step": 38250 }, { "epoch": 0.06782395771552485, "grad_norm": 0.33203125, "learning_rate": 0.0013993388975161532, "loss": 0.1328, "step": 38252 }, { "epoch": 0.06782750388083467, "grad_norm": 0.6796875, "learning_rate": 0.001399282766703682, "loss": 0.2512, "step": 38254 }, { "epoch": 0.06783105004614448, "grad_norm": 0.3046875, "learning_rate": 0.001399226634582322, "loss": 0.1851, "step": 38256 }, { "epoch": 0.0678345962114543, "grad_norm": 1.4375, "learning_rate": 0.0013991705011523184, "loss": 0.1973, "step": 38258 }, { "epoch": 0.06783814237676411, "grad_norm": 0.361328125, "learning_rate": 0.0013991143664139167, "loss": 0.2042, "step": 38260 }, { "epoch": 0.06784168854207392, "grad_norm": 6.5625, "learning_rate": 0.0013990582303673625, "loss": 0.3079, "step": 38262 }, { "epoch": 0.06784523470738374, "grad_norm": 1.015625, "learning_rate": 0.0013990020930129015, "loss": 0.2447, "step": 38264 }, { "epoch": 0.06784878087269355, "grad_norm": 0.51171875, "learning_rate": 0.0013989459543507791, "loss": 0.1873, "step": 38266 }, { "epoch": 0.06785232703800337, "grad_norm": 0.546875, "learning_rate": 0.001398889814381241, "loss": 0.1975, "step": 38268 }, { "epoch": 0.06785587320331318, "grad_norm": 0.41796875, "learning_rate": 0.001398833673104532, "loss": 0.1761, "step": 38270 }, { "epoch": 0.067859419368623, "grad_norm": 0.482421875, "learning_rate": 0.001398777530520898, "loss": 0.2234, "step": 38272 }, { "epoch": 0.06786296553393281, "grad_norm": 0.734375, "learning_rate": 0.0013987213866305849, "loss": 0.2178, "step": 38274 }, { "epoch": 0.06786651169924263, "grad_norm": 0.2275390625, "learning_rate": 0.0013986652414338374, "loss": 0.1797, "step": 38276 }, { "epoch": 0.06787005786455244, "grad_norm": 0.2109375, "learning_rate": 0.0013986090949309022, "loss": 0.2624, "step": 38278 }, { "epoch": 0.06787360402986226, "grad_norm": 0.30078125, "learning_rate": 0.0013985529471220242, "loss": 0.1474, "step": 38280 }, { "epoch": 0.06787715019517207, "grad_norm": 0.92578125, "learning_rate": 0.0013984967980074491, "loss": 0.2142, "step": 38282 }, { "epoch": 0.06788069636048188, "grad_norm": 0.37109375, "learning_rate": 0.0013984406475874224, "loss": 0.2321, "step": 38284 }, { "epoch": 0.0678842425257917, "grad_norm": 0.2890625, "learning_rate": 0.0013983844958621895, "loss": 0.3348, "step": 38286 }, { "epoch": 0.06788778869110151, "grad_norm": 1.109375, "learning_rate": 0.0013983283428319964, "loss": 0.2325, "step": 38288 }, { "epoch": 0.06789133485641133, "grad_norm": 0.404296875, "learning_rate": 0.001398272188497088, "loss": 0.17, "step": 38290 }, { "epoch": 0.06789488102172114, "grad_norm": 0.44921875, "learning_rate": 0.0013982160328577108, "loss": 0.2045, "step": 38292 }, { "epoch": 0.06789842718703096, "grad_norm": 0.3203125, "learning_rate": 0.0013981598759141094, "loss": 0.1807, "step": 38294 }, { "epoch": 0.06790197335234079, "grad_norm": 0.89453125, "learning_rate": 0.00139810371766653, "loss": 0.252, "step": 38296 }, { "epoch": 0.0679055195176506, "grad_norm": 0.380859375, "learning_rate": 0.0013980475581152182, "loss": 0.1487, "step": 38298 }, { "epoch": 0.06790906568296042, "grad_norm": 0.3359375, "learning_rate": 0.0013979913972604196, "loss": 0.1867, "step": 38300 }, { "epoch": 0.06791261184827023, "grad_norm": 0.45703125, "learning_rate": 0.0013979352351023797, "loss": 0.2257, "step": 38302 }, { "epoch": 0.06791615801358004, "grad_norm": 0.1845703125, "learning_rate": 0.0013978790716413442, "loss": 0.1502, "step": 38304 }, { "epoch": 0.06791970417888986, "grad_norm": 0.259765625, "learning_rate": 0.0013978229068775585, "loss": 0.1935, "step": 38306 }, { "epoch": 0.06792325034419967, "grad_norm": 0.2265625, "learning_rate": 0.0013977667408112684, "loss": 0.2329, "step": 38308 }, { "epoch": 0.06792679650950949, "grad_norm": 0.3203125, "learning_rate": 0.0013977105734427197, "loss": 0.2516, "step": 38310 }, { "epoch": 0.0679303426748193, "grad_norm": 0.2490234375, "learning_rate": 0.0013976544047721578, "loss": 0.1339, "step": 38312 }, { "epoch": 0.06793388884012912, "grad_norm": 0.88671875, "learning_rate": 0.0013975982347998283, "loss": 0.2951, "step": 38314 }, { "epoch": 0.06793743500543893, "grad_norm": 0.263671875, "learning_rate": 0.0013975420635259771, "loss": 0.1864, "step": 38316 }, { "epoch": 0.06794098117074875, "grad_norm": 1.4296875, "learning_rate": 0.0013974858909508497, "loss": 0.2524, "step": 38318 }, { "epoch": 0.06794452733605856, "grad_norm": 0.275390625, "learning_rate": 0.001397429717074692, "loss": 0.1751, "step": 38320 }, { "epoch": 0.06794807350136838, "grad_norm": 0.283203125, "learning_rate": 0.0013973735418977493, "loss": 0.1674, "step": 38322 }, { "epoch": 0.06795161966667819, "grad_norm": 0.328125, "learning_rate": 0.0013973173654202675, "loss": 0.1916, "step": 38324 }, { "epoch": 0.067955165831988, "grad_norm": 1.4140625, "learning_rate": 0.0013972611876424924, "loss": 0.2069, "step": 38326 }, { "epoch": 0.06795871199729782, "grad_norm": 0.294921875, "learning_rate": 0.0013972050085646693, "loss": 0.1723, "step": 38328 }, { "epoch": 0.06796225816260763, "grad_norm": 0.81640625, "learning_rate": 0.001397148828187044, "loss": 0.2891, "step": 38330 }, { "epoch": 0.06796580432791745, "grad_norm": 0.5625, "learning_rate": 0.0013970926465098627, "loss": 0.2552, "step": 38332 }, { "epoch": 0.06796935049322726, "grad_norm": 0.59375, "learning_rate": 0.0013970364635333705, "loss": 0.2104, "step": 38334 }, { "epoch": 0.06797289665853708, "grad_norm": 0.515625, "learning_rate": 0.0013969802792578134, "loss": 0.2075, "step": 38336 }, { "epoch": 0.06797644282384689, "grad_norm": 0.26171875, "learning_rate": 0.001396924093683437, "loss": 0.1632, "step": 38338 }, { "epoch": 0.06797998898915672, "grad_norm": 1.046875, "learning_rate": 0.0013968679068104876, "loss": 0.4355, "step": 38340 }, { "epoch": 0.06798353515446653, "grad_norm": 0.37890625, "learning_rate": 0.00139681171863921, "loss": 0.1994, "step": 38342 }, { "epoch": 0.06798708131977635, "grad_norm": 0.2392578125, "learning_rate": 0.0013967555291698503, "loss": 0.1779, "step": 38344 }, { "epoch": 0.06799062748508616, "grad_norm": 0.93359375, "learning_rate": 0.0013966993384026546, "loss": 0.2307, "step": 38346 }, { "epoch": 0.06799417365039598, "grad_norm": 0.306640625, "learning_rate": 0.0013966431463378678, "loss": 0.2312, "step": 38348 }, { "epoch": 0.06799771981570579, "grad_norm": 0.29296875, "learning_rate": 0.0013965869529757365, "loss": 0.2151, "step": 38350 }, { "epoch": 0.06800126598101561, "grad_norm": 0.234375, "learning_rate": 0.001396530758316506, "loss": 0.2118, "step": 38352 }, { "epoch": 0.06800481214632542, "grad_norm": 0.4375, "learning_rate": 0.0013964745623604225, "loss": 0.2759, "step": 38354 }, { "epoch": 0.06800835831163524, "grad_norm": 0.7734375, "learning_rate": 0.0013964183651077311, "loss": 0.2909, "step": 38356 }, { "epoch": 0.06801190447694505, "grad_norm": 0.2255859375, "learning_rate": 0.0013963621665586783, "loss": 0.2046, "step": 38358 }, { "epoch": 0.06801545064225487, "grad_norm": 1.1953125, "learning_rate": 0.0013963059667135094, "loss": 0.238, "step": 38360 }, { "epoch": 0.06801899680756468, "grad_norm": 2.3125, "learning_rate": 0.0013962497655724702, "loss": 0.3522, "step": 38362 }, { "epoch": 0.0680225429728745, "grad_norm": 0.3984375, "learning_rate": 0.0013961935631358066, "loss": 0.1794, "step": 38364 }, { "epoch": 0.06802608913818431, "grad_norm": 0.2890625, "learning_rate": 0.0013961373594037645, "loss": 0.1895, "step": 38366 }, { "epoch": 0.06802963530349412, "grad_norm": 0.2109375, "learning_rate": 0.0013960811543765896, "loss": 0.134, "step": 38368 }, { "epoch": 0.06803318146880394, "grad_norm": 0.234375, "learning_rate": 0.0013960249480545277, "loss": 0.2408, "step": 38370 }, { "epoch": 0.06803672763411375, "grad_norm": 0.318359375, "learning_rate": 0.0013959687404378248, "loss": 0.1731, "step": 38372 }, { "epoch": 0.06804027379942357, "grad_norm": 0.37109375, "learning_rate": 0.0013959125315267265, "loss": 0.2128, "step": 38374 }, { "epoch": 0.06804381996473338, "grad_norm": 0.279296875, "learning_rate": 0.0013958563213214783, "loss": 0.1669, "step": 38376 }, { "epoch": 0.0680473661300432, "grad_norm": 0.431640625, "learning_rate": 0.001395800109822327, "loss": 0.161, "step": 38378 }, { "epoch": 0.06805091229535301, "grad_norm": 0.447265625, "learning_rate": 0.0013957438970295178, "loss": 0.2179, "step": 38380 }, { "epoch": 0.06805445846066283, "grad_norm": 0.31640625, "learning_rate": 0.0013956876829432962, "loss": 0.2019, "step": 38382 }, { "epoch": 0.06805800462597264, "grad_norm": 0.50390625, "learning_rate": 0.0013956314675639084, "loss": 0.2087, "step": 38384 }, { "epoch": 0.06806155079128247, "grad_norm": 0.298828125, "learning_rate": 0.0013955752508916003, "loss": 0.1963, "step": 38386 }, { "epoch": 0.06806509695659228, "grad_norm": 0.5703125, "learning_rate": 0.001395519032926618, "loss": 0.1453, "step": 38388 }, { "epoch": 0.0680686431219021, "grad_norm": 0.453125, "learning_rate": 0.001395462813669207, "loss": 0.1736, "step": 38390 }, { "epoch": 0.06807218928721191, "grad_norm": 0.4453125, "learning_rate": 0.0013954065931196132, "loss": 0.1956, "step": 38392 }, { "epoch": 0.06807573545252173, "grad_norm": 1.078125, "learning_rate": 0.0013953503712780828, "loss": 0.1756, "step": 38394 }, { "epoch": 0.06807928161783154, "grad_norm": 0.71875, "learning_rate": 0.001395294148144861, "loss": 0.1888, "step": 38396 }, { "epoch": 0.06808282778314136, "grad_norm": 1.7734375, "learning_rate": 0.0013952379237201944, "loss": 0.2469, "step": 38398 }, { "epoch": 0.06808637394845117, "grad_norm": 0.3515625, "learning_rate": 0.0013951816980043285, "loss": 0.1884, "step": 38400 }, { "epoch": 0.06808992011376098, "grad_norm": 0.6171875, "learning_rate": 0.0013951254709975095, "loss": 0.1928, "step": 38402 }, { "epoch": 0.0680934662790708, "grad_norm": 0.578125, "learning_rate": 0.0013950692426999832, "loss": 0.1648, "step": 38404 }, { "epoch": 0.06809701244438061, "grad_norm": 1.453125, "learning_rate": 0.001395013013111995, "loss": 0.3896, "step": 38406 }, { "epoch": 0.06810055860969043, "grad_norm": 0.310546875, "learning_rate": 0.0013949567822337916, "loss": 0.2046, "step": 38408 }, { "epoch": 0.06810410477500024, "grad_norm": 0.6328125, "learning_rate": 0.0013949005500656184, "loss": 0.1951, "step": 38410 }, { "epoch": 0.06810765094031006, "grad_norm": 0.625, "learning_rate": 0.0013948443166077215, "loss": 0.2136, "step": 38412 }, { "epoch": 0.06811119710561987, "grad_norm": 0.6875, "learning_rate": 0.0013947880818603472, "loss": 0.1817, "step": 38414 }, { "epoch": 0.06811474327092969, "grad_norm": 0.78125, "learning_rate": 0.0013947318458237407, "loss": 0.3948, "step": 38416 }, { "epoch": 0.0681182894362395, "grad_norm": 0.2021484375, "learning_rate": 0.0013946756084981483, "loss": 0.1687, "step": 38418 }, { "epoch": 0.06812183560154932, "grad_norm": 0.478515625, "learning_rate": 0.001394619369883816, "loss": 0.1481, "step": 38420 }, { "epoch": 0.06812538176685913, "grad_norm": 0.46875, "learning_rate": 0.0013945631299809899, "loss": 0.1857, "step": 38422 }, { "epoch": 0.06812892793216895, "grad_norm": 0.97265625, "learning_rate": 0.0013945068887899156, "loss": 0.2757, "step": 38424 }, { "epoch": 0.06813247409747876, "grad_norm": 0.396484375, "learning_rate": 0.001394450646310839, "loss": 0.2142, "step": 38426 }, { "epoch": 0.06813602026278857, "grad_norm": 0.34375, "learning_rate": 0.0013943944025440066, "loss": 0.1548, "step": 38428 }, { "epoch": 0.06813956642809839, "grad_norm": 0.1513671875, "learning_rate": 0.0013943381574896638, "loss": 0.1506, "step": 38430 }, { "epoch": 0.06814311259340822, "grad_norm": 0.40234375, "learning_rate": 0.0013942819111480574, "loss": 0.1351, "step": 38432 }, { "epoch": 0.06814665875871803, "grad_norm": 0.369140625, "learning_rate": 0.0013942256635194326, "loss": 0.1807, "step": 38434 }, { "epoch": 0.06815020492402785, "grad_norm": 4.09375, "learning_rate": 0.0013941694146040356, "loss": 0.2372, "step": 38436 }, { "epoch": 0.06815375108933766, "grad_norm": 0.365234375, "learning_rate": 0.0013941131644021124, "loss": 0.1709, "step": 38438 }, { "epoch": 0.06815729725464748, "grad_norm": 0.59765625, "learning_rate": 0.0013940569129139093, "loss": 0.218, "step": 38440 }, { "epoch": 0.06816084341995729, "grad_norm": 0.5859375, "learning_rate": 0.0013940006601396717, "loss": 0.2435, "step": 38442 }, { "epoch": 0.0681643895852671, "grad_norm": 0.375, "learning_rate": 0.001393944406079646, "loss": 0.2094, "step": 38444 }, { "epoch": 0.06816793575057692, "grad_norm": 2.375, "learning_rate": 0.0013938881507340787, "loss": 0.2277, "step": 38446 }, { "epoch": 0.06817148191588673, "grad_norm": 0.322265625, "learning_rate": 0.001393831894103215, "loss": 0.1835, "step": 38448 }, { "epoch": 0.06817502808119655, "grad_norm": 0.27734375, "learning_rate": 0.001393775636187301, "loss": 0.2094, "step": 38450 }, { "epoch": 0.06817857424650636, "grad_norm": 0.640625, "learning_rate": 0.0013937193769865835, "loss": 0.2516, "step": 38452 }, { "epoch": 0.06818212041181618, "grad_norm": 0.96875, "learning_rate": 0.001393663116501308, "loss": 0.2791, "step": 38454 }, { "epoch": 0.06818566657712599, "grad_norm": 0.40625, "learning_rate": 0.0013936068547317204, "loss": 0.2281, "step": 38456 }, { "epoch": 0.0681892127424358, "grad_norm": 0.51171875, "learning_rate": 0.0013935505916780671, "loss": 0.1932, "step": 38458 }, { "epoch": 0.06819275890774562, "grad_norm": 0.859375, "learning_rate": 0.0013934943273405938, "loss": 0.2413, "step": 38460 }, { "epoch": 0.06819630507305544, "grad_norm": 1.359375, "learning_rate": 0.001393438061719547, "loss": 0.2003, "step": 38462 }, { "epoch": 0.06819985123836525, "grad_norm": 0.5546875, "learning_rate": 0.0013933817948151724, "loss": 0.1785, "step": 38464 }, { "epoch": 0.06820339740367506, "grad_norm": 0.384765625, "learning_rate": 0.0013933255266277166, "loss": 0.1791, "step": 38466 }, { "epoch": 0.06820694356898488, "grad_norm": 0.88671875, "learning_rate": 0.001393269257157425, "loss": 0.3308, "step": 38468 }, { "epoch": 0.0682104897342947, "grad_norm": 0.65234375, "learning_rate": 0.0013932129864045442, "loss": 0.2037, "step": 38470 }, { "epoch": 0.06821403589960451, "grad_norm": 0.92578125, "learning_rate": 0.0013931567143693202, "loss": 0.1881, "step": 38472 }, { "epoch": 0.06821758206491432, "grad_norm": 0.357421875, "learning_rate": 0.0013931004410519987, "loss": 0.2012, "step": 38474 }, { "epoch": 0.06822112823022415, "grad_norm": 0.1708984375, "learning_rate": 0.0013930441664528263, "loss": 0.1844, "step": 38476 }, { "epoch": 0.06822467439553397, "grad_norm": 0.6640625, "learning_rate": 0.001392987890572049, "loss": 0.2094, "step": 38478 }, { "epoch": 0.06822822056084378, "grad_norm": 0.291015625, "learning_rate": 0.0013929316134099125, "loss": 0.2119, "step": 38480 }, { "epoch": 0.0682317667261536, "grad_norm": 0.71484375, "learning_rate": 0.0013928753349666638, "loss": 0.1917, "step": 38482 }, { "epoch": 0.06823531289146341, "grad_norm": 0.173828125, "learning_rate": 0.0013928190552425482, "loss": 0.1582, "step": 38484 }, { "epoch": 0.06823885905677322, "grad_norm": 0.6015625, "learning_rate": 0.0013927627742378124, "loss": 0.1718, "step": 38486 }, { "epoch": 0.06824240522208304, "grad_norm": 0.53125, "learning_rate": 0.0013927064919527022, "loss": 0.1957, "step": 38488 }, { "epoch": 0.06824595138739285, "grad_norm": 0.875, "learning_rate": 0.0013926502083874638, "loss": 0.3304, "step": 38490 }, { "epoch": 0.06824949755270267, "grad_norm": 0.244140625, "learning_rate": 0.001392593923542343, "loss": 0.1809, "step": 38492 }, { "epoch": 0.06825304371801248, "grad_norm": 1.0859375, "learning_rate": 0.0013925376374175867, "loss": 0.2992, "step": 38494 }, { "epoch": 0.0682565898833223, "grad_norm": 7.8125, "learning_rate": 0.0013924813500134409, "loss": 0.2119, "step": 38496 }, { "epoch": 0.06826013604863211, "grad_norm": 4.125, "learning_rate": 0.0013924250613301511, "loss": 0.39, "step": 38498 }, { "epoch": 0.06826368221394193, "grad_norm": 0.5078125, "learning_rate": 0.001392368771367964, "loss": 0.1599, "step": 38500 }, { "epoch": 0.06826722837925174, "grad_norm": 0.28125, "learning_rate": 0.0013923124801271261, "loss": 0.4339, "step": 38502 }, { "epoch": 0.06827077454456155, "grad_norm": 0.3125, "learning_rate": 0.0013922561876078827, "loss": 0.1554, "step": 38504 }, { "epoch": 0.06827432070987137, "grad_norm": 1.28125, "learning_rate": 0.001392199893810481, "loss": 0.3344, "step": 38506 }, { "epoch": 0.06827786687518118, "grad_norm": 1.28125, "learning_rate": 0.0013921435987351665, "loss": 0.3097, "step": 38508 }, { "epoch": 0.068281413040491, "grad_norm": 0.376953125, "learning_rate": 0.0013920873023821857, "loss": 0.2352, "step": 38510 }, { "epoch": 0.06828495920580081, "grad_norm": 0.2353515625, "learning_rate": 0.0013920310047517846, "loss": 0.1384, "step": 38512 }, { "epoch": 0.06828850537111063, "grad_norm": 0.4765625, "learning_rate": 0.0013919747058442093, "loss": 0.1821, "step": 38514 }, { "epoch": 0.06829205153642044, "grad_norm": 0.56640625, "learning_rate": 0.0013919184056597064, "loss": 0.2065, "step": 38516 }, { "epoch": 0.06829559770173026, "grad_norm": 0.50390625, "learning_rate": 0.001391862104198522, "loss": 0.2006, "step": 38518 }, { "epoch": 0.06829914386704007, "grad_norm": 0.341796875, "learning_rate": 0.001391805801460902, "loss": 0.171, "step": 38520 }, { "epoch": 0.0683026900323499, "grad_norm": 0.28515625, "learning_rate": 0.0013917494974470936, "loss": 0.2022, "step": 38522 }, { "epoch": 0.06830623619765971, "grad_norm": 0.5078125, "learning_rate": 0.0013916931921573417, "loss": 0.1755, "step": 38524 }, { "epoch": 0.06830978236296953, "grad_norm": 1.1953125, "learning_rate": 0.0013916368855918936, "loss": 0.2213, "step": 38526 }, { "epoch": 0.06831332852827934, "grad_norm": 0.318359375, "learning_rate": 0.0013915805777509952, "loss": 0.2017, "step": 38528 }, { "epoch": 0.06831687469358916, "grad_norm": 1.046875, "learning_rate": 0.0013915242686348924, "loss": 0.2866, "step": 38530 }, { "epoch": 0.06832042085889897, "grad_norm": 0.40625, "learning_rate": 0.0013914679582438321, "loss": 0.2487, "step": 38532 }, { "epoch": 0.06832396702420879, "grad_norm": 0.75, "learning_rate": 0.00139141164657806, "loss": 0.2134, "step": 38534 }, { "epoch": 0.0683275131895186, "grad_norm": 0.3125, "learning_rate": 0.0013913553336378226, "loss": 0.1881, "step": 38536 }, { "epoch": 0.06833105935482842, "grad_norm": 0.296875, "learning_rate": 0.0013912990194233663, "loss": 0.1552, "step": 38538 }, { "epoch": 0.06833460552013823, "grad_norm": 0.345703125, "learning_rate": 0.0013912427039349375, "loss": 0.205, "step": 38540 }, { "epoch": 0.06833815168544805, "grad_norm": 0.37890625, "learning_rate": 0.0013911863871727819, "loss": 0.1956, "step": 38542 }, { "epoch": 0.06834169785075786, "grad_norm": 0.1767578125, "learning_rate": 0.0013911300691371467, "loss": 0.151, "step": 38544 }, { "epoch": 0.06834524401606767, "grad_norm": 0.26953125, "learning_rate": 0.0013910737498282772, "loss": 0.1941, "step": 38546 }, { "epoch": 0.06834879018137749, "grad_norm": 0.37890625, "learning_rate": 0.0013910174292464206, "loss": 0.1779, "step": 38548 }, { "epoch": 0.0683523363466873, "grad_norm": 0.58203125, "learning_rate": 0.0013909611073918225, "loss": 0.1621, "step": 38550 }, { "epoch": 0.06835588251199712, "grad_norm": 0.359375, "learning_rate": 0.0013909047842647299, "loss": 0.1613, "step": 38552 }, { "epoch": 0.06835942867730693, "grad_norm": 0.703125, "learning_rate": 0.0013908484598653883, "loss": 0.2225, "step": 38554 }, { "epoch": 0.06836297484261675, "grad_norm": 0.369140625, "learning_rate": 0.0013907921341940446, "loss": 0.1658, "step": 38556 }, { "epoch": 0.06836652100792656, "grad_norm": 0.427734375, "learning_rate": 0.001390735807250945, "loss": 0.1558, "step": 38558 }, { "epoch": 0.06837006717323638, "grad_norm": 0.42578125, "learning_rate": 0.001390679479036336, "loss": 0.1836, "step": 38560 }, { "epoch": 0.06837361333854619, "grad_norm": 0.359375, "learning_rate": 0.0013906231495504637, "loss": 0.1618, "step": 38562 }, { "epoch": 0.068377159503856, "grad_norm": 0.74609375, "learning_rate": 0.0013905668187935748, "loss": 0.2254, "step": 38564 }, { "epoch": 0.06838070566916582, "grad_norm": 0.78515625, "learning_rate": 0.001390510486765915, "loss": 0.1782, "step": 38566 }, { "epoch": 0.06838425183447565, "grad_norm": 0.212890625, "learning_rate": 0.0013904541534677315, "loss": 0.2047, "step": 38568 }, { "epoch": 0.06838779799978546, "grad_norm": 0.53515625, "learning_rate": 0.00139039781889927, "loss": 0.2004, "step": 38570 }, { "epoch": 0.06839134416509528, "grad_norm": 0.53515625, "learning_rate": 0.0013903414830607772, "loss": 0.1965, "step": 38572 }, { "epoch": 0.06839489033040509, "grad_norm": 0.30078125, "learning_rate": 0.0013902851459524994, "loss": 0.1595, "step": 38574 }, { "epoch": 0.0683984364957149, "grad_norm": 0.279296875, "learning_rate": 0.0013902288075746828, "loss": 0.1686, "step": 38576 }, { "epoch": 0.06840198266102472, "grad_norm": 0.765625, "learning_rate": 0.001390172467927574, "loss": 0.1831, "step": 38578 }, { "epoch": 0.06840552882633454, "grad_norm": 0.294921875, "learning_rate": 0.0013901161270114197, "loss": 0.1399, "step": 38580 }, { "epoch": 0.06840907499164435, "grad_norm": 0.43359375, "learning_rate": 0.001390059784826466, "loss": 0.2341, "step": 38582 }, { "epoch": 0.06841262115695416, "grad_norm": 0.33203125, "learning_rate": 0.001390003441372959, "loss": 0.1851, "step": 38584 }, { "epoch": 0.06841616732226398, "grad_norm": 0.228515625, "learning_rate": 0.0013899470966511453, "loss": 0.2308, "step": 38586 }, { "epoch": 0.0684197134875738, "grad_norm": 0.2099609375, "learning_rate": 0.0013898907506612718, "loss": 0.1891, "step": 38588 }, { "epoch": 0.06842325965288361, "grad_norm": 0.2041015625, "learning_rate": 0.0013898344034035841, "loss": 0.1586, "step": 38590 }, { "epoch": 0.06842680581819342, "grad_norm": 0.94921875, "learning_rate": 0.0013897780548783297, "loss": 0.2378, "step": 38592 }, { "epoch": 0.06843035198350324, "grad_norm": 0.435546875, "learning_rate": 0.001389721705085754, "loss": 0.2102, "step": 38594 }, { "epoch": 0.06843389814881305, "grad_norm": 0.73046875, "learning_rate": 0.0013896653540261038, "loss": 0.112, "step": 38596 }, { "epoch": 0.06843744431412287, "grad_norm": 0.48046875, "learning_rate": 0.0013896090016996257, "loss": 0.1799, "step": 38598 }, { "epoch": 0.06844099047943268, "grad_norm": 0.36328125, "learning_rate": 0.001389552648106566, "loss": 0.2451, "step": 38600 }, { "epoch": 0.0684445366447425, "grad_norm": 0.31640625, "learning_rate": 0.0013894962932471716, "loss": 0.1903, "step": 38602 }, { "epoch": 0.06844808281005231, "grad_norm": 0.345703125, "learning_rate": 0.0013894399371216879, "loss": 0.1838, "step": 38604 }, { "epoch": 0.06845162897536212, "grad_norm": 0.470703125, "learning_rate": 0.0013893835797303621, "loss": 0.1922, "step": 38606 }, { "epoch": 0.06845517514067194, "grad_norm": 0.373046875, "learning_rate": 0.0013893272210734413, "loss": 0.3005, "step": 38608 }, { "epoch": 0.06845872130598175, "grad_norm": 1.1953125, "learning_rate": 0.001389270861151171, "loss": 0.2285, "step": 38610 }, { "epoch": 0.06846226747129157, "grad_norm": 0.796875, "learning_rate": 0.0013892144999637975, "loss": 0.2986, "step": 38612 }, { "epoch": 0.0684658136366014, "grad_norm": 0.46875, "learning_rate": 0.0013891581375115683, "loss": 0.1625, "step": 38614 }, { "epoch": 0.06846935980191121, "grad_norm": 0.90234375, "learning_rate": 0.0013891017737947288, "loss": 0.2712, "step": 38616 }, { "epoch": 0.06847290596722103, "grad_norm": 0.328125, "learning_rate": 0.0013890454088135266, "loss": 0.2401, "step": 38618 }, { "epoch": 0.06847645213253084, "grad_norm": 1.1015625, "learning_rate": 0.0013889890425682076, "loss": 0.2564, "step": 38620 }, { "epoch": 0.06847999829784066, "grad_norm": 0.37109375, "learning_rate": 0.0013889326750590187, "loss": 0.2566, "step": 38622 }, { "epoch": 0.06848354446315047, "grad_norm": 0.26171875, "learning_rate": 0.0013888763062862056, "loss": 0.1777, "step": 38624 }, { "epoch": 0.06848709062846028, "grad_norm": 0.2890625, "learning_rate": 0.0013888199362500156, "loss": 0.2204, "step": 38626 }, { "epoch": 0.0684906367937701, "grad_norm": 1.1328125, "learning_rate": 0.0013887635649506946, "loss": 0.2267, "step": 38628 }, { "epoch": 0.06849418295907991, "grad_norm": 0.9296875, "learning_rate": 0.0013887071923884898, "loss": 0.2019, "step": 38630 }, { "epoch": 0.06849772912438973, "grad_norm": 0.494140625, "learning_rate": 0.0013886508185636476, "loss": 0.2052, "step": 38632 }, { "epoch": 0.06850127528969954, "grad_norm": 0.58984375, "learning_rate": 0.0013885944434764142, "loss": 0.16, "step": 38634 }, { "epoch": 0.06850482145500936, "grad_norm": 0.57421875, "learning_rate": 0.0013885380671270363, "loss": 0.2023, "step": 38636 }, { "epoch": 0.06850836762031917, "grad_norm": 0.62109375, "learning_rate": 0.0013884816895157605, "loss": 0.171, "step": 38638 }, { "epoch": 0.06851191378562899, "grad_norm": 0.400390625, "learning_rate": 0.0013884253106428333, "loss": 0.1549, "step": 38640 }, { "epoch": 0.0685154599509388, "grad_norm": 0.5625, "learning_rate": 0.0013883689305085015, "loss": 0.3093, "step": 38642 }, { "epoch": 0.06851900611624862, "grad_norm": 0.212890625, "learning_rate": 0.0013883125491130114, "loss": 0.2038, "step": 38644 }, { "epoch": 0.06852255228155843, "grad_norm": 0.392578125, "learning_rate": 0.0013882561664566096, "loss": 0.146, "step": 38646 }, { "epoch": 0.06852609844686824, "grad_norm": 1.625, "learning_rate": 0.0013881997825395428, "loss": 0.2772, "step": 38648 }, { "epoch": 0.06852964461217806, "grad_norm": 0.50390625, "learning_rate": 0.0013881433973620576, "loss": 0.2073, "step": 38650 }, { "epoch": 0.06853319077748787, "grad_norm": 0.35546875, "learning_rate": 0.0013880870109244006, "loss": 0.2884, "step": 38652 }, { "epoch": 0.06853673694279769, "grad_norm": 0.1953125, "learning_rate": 0.0013880306232268184, "loss": 0.1797, "step": 38654 }, { "epoch": 0.0685402831081075, "grad_norm": 0.9140625, "learning_rate": 0.0013879742342695575, "loss": 0.254, "step": 38656 }, { "epoch": 0.06854382927341733, "grad_norm": 2.25, "learning_rate": 0.0013879178440528645, "loss": 0.2781, "step": 38658 }, { "epoch": 0.06854737543872715, "grad_norm": 0.484375, "learning_rate": 0.0013878614525769863, "loss": 0.208, "step": 38660 }, { "epoch": 0.06855092160403696, "grad_norm": 0.5859375, "learning_rate": 0.0013878050598421694, "loss": 0.1603, "step": 38662 }, { "epoch": 0.06855446776934677, "grad_norm": 0.36328125, "learning_rate": 0.00138774866584866, "loss": 0.2282, "step": 38664 }, { "epoch": 0.06855801393465659, "grad_norm": 0.333984375, "learning_rate": 0.0013876922705967052, "loss": 0.1922, "step": 38666 }, { "epoch": 0.0685615600999664, "grad_norm": 0.349609375, "learning_rate": 0.0013876358740865513, "loss": 0.1808, "step": 38668 }, { "epoch": 0.06856510626527622, "grad_norm": 0.47265625, "learning_rate": 0.0013875794763184455, "loss": 0.1742, "step": 38670 }, { "epoch": 0.06856865243058603, "grad_norm": 0.34375, "learning_rate": 0.001387523077292634, "loss": 0.1383, "step": 38672 }, { "epoch": 0.06857219859589585, "grad_norm": 0.3515625, "learning_rate": 0.001387466677009364, "loss": 0.1723, "step": 38674 }, { "epoch": 0.06857574476120566, "grad_norm": 0.279296875, "learning_rate": 0.0013874102754688812, "loss": 0.185, "step": 38676 }, { "epoch": 0.06857929092651548, "grad_norm": 0.427734375, "learning_rate": 0.0013873538726714328, "loss": 0.1956, "step": 38678 }, { "epoch": 0.06858283709182529, "grad_norm": 0.73046875, "learning_rate": 0.0013872974686172654, "loss": 0.1889, "step": 38680 }, { "epoch": 0.0685863832571351, "grad_norm": 1.1640625, "learning_rate": 0.0013872410633066262, "loss": 0.7169, "step": 38682 }, { "epoch": 0.06858992942244492, "grad_norm": 0.53125, "learning_rate": 0.001387184656739761, "loss": 0.168, "step": 38684 }, { "epoch": 0.06859347558775473, "grad_norm": 1.421875, "learning_rate": 0.0013871282489169173, "loss": 0.1486, "step": 38686 }, { "epoch": 0.06859702175306455, "grad_norm": 0.4140625, "learning_rate": 0.0013870718398383412, "loss": 0.1885, "step": 38688 }, { "epoch": 0.06860056791837436, "grad_norm": 0.298828125, "learning_rate": 0.0013870154295042797, "loss": 0.1668, "step": 38690 }, { "epoch": 0.06860411408368418, "grad_norm": 1.546875, "learning_rate": 0.001386959017914979, "loss": 0.3711, "step": 38692 }, { "epoch": 0.06860766024899399, "grad_norm": 0.58984375, "learning_rate": 0.0013869026050706867, "loss": 0.2045, "step": 38694 }, { "epoch": 0.06861120641430381, "grad_norm": 0.61328125, "learning_rate": 0.0013868461909716493, "loss": 0.3048, "step": 38696 }, { "epoch": 0.06861475257961362, "grad_norm": 0.306640625, "learning_rate": 0.001386789775618113, "loss": 0.1788, "step": 38698 }, { "epoch": 0.06861829874492344, "grad_norm": 0.75390625, "learning_rate": 0.0013867333590103245, "loss": 0.208, "step": 38700 }, { "epoch": 0.06862184491023325, "grad_norm": 0.365234375, "learning_rate": 0.0013866769411485312, "loss": 0.3026, "step": 38702 }, { "epoch": 0.06862539107554308, "grad_norm": 0.46875, "learning_rate": 0.0013866205220329794, "loss": 0.1746, "step": 38704 }, { "epoch": 0.0686289372408529, "grad_norm": 0.32421875, "learning_rate": 0.0013865641016639159, "loss": 0.1726, "step": 38706 }, { "epoch": 0.06863248340616271, "grad_norm": 0.5234375, "learning_rate": 0.0013865076800415874, "loss": 0.181, "step": 38708 }, { "epoch": 0.06863602957147252, "grad_norm": 0.494140625, "learning_rate": 0.001386451257166241, "loss": 0.1777, "step": 38710 }, { "epoch": 0.06863957573678234, "grad_norm": 1.53125, "learning_rate": 0.001386394833038123, "loss": 0.5013, "step": 38712 }, { "epoch": 0.06864312190209215, "grad_norm": 3.15625, "learning_rate": 0.0013863384076574802, "loss": 0.3899, "step": 38714 }, { "epoch": 0.06864666806740197, "grad_norm": 0.41796875, "learning_rate": 0.0013862819810245603, "loss": 0.3849, "step": 38716 }, { "epoch": 0.06865021423271178, "grad_norm": 0.84375, "learning_rate": 0.0013862255531396084, "loss": 0.2061, "step": 38718 }, { "epoch": 0.0686537603980216, "grad_norm": 0.466796875, "learning_rate": 0.0013861691240028727, "loss": 0.2131, "step": 38720 }, { "epoch": 0.06865730656333141, "grad_norm": 0.86328125, "learning_rate": 0.001386112693614599, "loss": 0.2721, "step": 38722 }, { "epoch": 0.06866085272864123, "grad_norm": 1.0234375, "learning_rate": 0.001386056261975035, "loss": 0.1928, "step": 38724 }, { "epoch": 0.06866439889395104, "grad_norm": 0.5078125, "learning_rate": 0.001385999829084427, "loss": 0.2387, "step": 38726 }, { "epoch": 0.06866794505926085, "grad_norm": 0.59765625, "learning_rate": 0.0013859433949430216, "loss": 0.1539, "step": 38728 }, { "epoch": 0.06867149122457067, "grad_norm": 0.83203125, "learning_rate": 0.0013858869595510662, "loss": 0.2554, "step": 38730 }, { "epoch": 0.06867503738988048, "grad_norm": 0.5546875, "learning_rate": 0.001385830522908807, "loss": 0.2179, "step": 38732 }, { "epoch": 0.0686785835551903, "grad_norm": 0.48828125, "learning_rate": 0.0013857740850164914, "loss": 0.1837, "step": 38734 }, { "epoch": 0.06868212972050011, "grad_norm": 0.36328125, "learning_rate": 0.0013857176458743662, "loss": 0.1722, "step": 38736 }, { "epoch": 0.06868567588580993, "grad_norm": 0.431640625, "learning_rate": 0.0013856612054826774, "loss": 0.2289, "step": 38738 }, { "epoch": 0.06868922205111974, "grad_norm": 0.255859375, "learning_rate": 0.001385604763841673, "loss": 0.3891, "step": 38740 }, { "epoch": 0.06869276821642956, "grad_norm": 1.828125, "learning_rate": 0.0013855483209515987, "loss": 0.2045, "step": 38742 }, { "epoch": 0.06869631438173937, "grad_norm": 0.46875, "learning_rate": 0.0013854918768127023, "loss": 0.2637, "step": 38744 }, { "epoch": 0.06869986054704919, "grad_norm": 0.52734375, "learning_rate": 0.0013854354314252297, "loss": 0.283, "step": 38746 }, { "epoch": 0.068703406712359, "grad_norm": 0.365234375, "learning_rate": 0.001385378984789429, "loss": 0.2111, "step": 38748 }, { "epoch": 0.06870695287766883, "grad_norm": 0.330078125, "learning_rate": 0.0013853225369055463, "loss": 0.1722, "step": 38750 }, { "epoch": 0.06871049904297864, "grad_norm": 1.53125, "learning_rate": 0.0013852660877738284, "loss": 0.2091, "step": 38752 }, { "epoch": 0.06871404520828846, "grad_norm": 0.373046875, "learning_rate": 0.0013852096373945222, "loss": 0.1948, "step": 38754 }, { "epoch": 0.06871759137359827, "grad_norm": 0.296875, "learning_rate": 0.001385153185767875, "loss": 0.1899, "step": 38756 }, { "epoch": 0.06872113753890809, "grad_norm": 1.2265625, "learning_rate": 0.0013850967328941331, "loss": 0.2329, "step": 38758 }, { "epoch": 0.0687246837042179, "grad_norm": 0.294921875, "learning_rate": 0.0013850402787735439, "loss": 0.1819, "step": 38760 }, { "epoch": 0.06872822986952772, "grad_norm": 0.26171875, "learning_rate": 0.001384983823406354, "loss": 0.187, "step": 38762 }, { "epoch": 0.06873177603483753, "grad_norm": 0.287109375, "learning_rate": 0.00138492736679281, "loss": 0.1824, "step": 38764 }, { "epoch": 0.06873532220014734, "grad_norm": 0.384765625, "learning_rate": 0.0013848709089331597, "loss": 0.1288, "step": 38766 }, { "epoch": 0.06873886836545716, "grad_norm": 0.75, "learning_rate": 0.0013848144498276498, "loss": 0.244, "step": 38768 }, { "epoch": 0.06874241453076697, "grad_norm": 0.1953125, "learning_rate": 0.0013847579894765265, "loss": 0.2181, "step": 38770 }, { "epoch": 0.06874596069607679, "grad_norm": 0.29296875, "learning_rate": 0.0013847015278800373, "loss": 0.2719, "step": 38772 }, { "epoch": 0.0687495068613866, "grad_norm": 0.84375, "learning_rate": 0.0013846450650384287, "loss": 0.1907, "step": 38774 }, { "epoch": 0.06875305302669642, "grad_norm": 0.6953125, "learning_rate": 0.0013845886009519483, "loss": 0.1059, "step": 38776 }, { "epoch": 0.06875659919200623, "grad_norm": 0.2451171875, "learning_rate": 0.0013845321356208424, "loss": 0.1556, "step": 38778 }, { "epoch": 0.06876014535731605, "grad_norm": 0.76953125, "learning_rate": 0.0013844756690453584, "loss": 0.1781, "step": 38780 }, { "epoch": 0.06876369152262586, "grad_norm": 0.447265625, "learning_rate": 0.001384419201225743, "loss": 0.2238, "step": 38782 }, { "epoch": 0.06876723768793568, "grad_norm": 0.296875, "learning_rate": 0.001384362732162243, "loss": 0.1513, "step": 38784 }, { "epoch": 0.06877078385324549, "grad_norm": 0.431640625, "learning_rate": 0.0013843062618551058, "loss": 0.2141, "step": 38786 }, { "epoch": 0.0687743300185553, "grad_norm": 0.26953125, "learning_rate": 0.0013842497903045782, "loss": 0.2192, "step": 38788 }, { "epoch": 0.06877787618386512, "grad_norm": 0.546875, "learning_rate": 0.0013841933175109075, "loss": 0.2019, "step": 38790 }, { "epoch": 0.06878142234917493, "grad_norm": 0.46875, "learning_rate": 0.0013841368434743399, "loss": 0.1661, "step": 38792 }, { "epoch": 0.06878496851448476, "grad_norm": 1.0078125, "learning_rate": 0.0013840803681951226, "loss": 0.268, "step": 38794 }, { "epoch": 0.06878851467979458, "grad_norm": 0.56640625, "learning_rate": 0.001384023891673503, "loss": 0.2013, "step": 38796 }, { "epoch": 0.06879206084510439, "grad_norm": 1.21875, "learning_rate": 0.001383967413909728, "loss": 0.2282, "step": 38798 }, { "epoch": 0.0687956070104142, "grad_norm": 0.3046875, "learning_rate": 0.001383910934904044, "loss": 0.1804, "step": 38800 }, { "epoch": 0.06879915317572402, "grad_norm": 0.2265625, "learning_rate": 0.001383854454656699, "loss": 0.2195, "step": 38802 }, { "epoch": 0.06880269934103384, "grad_norm": 0.396484375, "learning_rate": 0.001383797973167939, "loss": 0.2714, "step": 38804 }, { "epoch": 0.06880624550634365, "grad_norm": 0.3203125, "learning_rate": 0.0013837414904380122, "loss": 0.2148, "step": 38806 }, { "epoch": 0.06880979167165346, "grad_norm": 0.42578125, "learning_rate": 0.0013836850064671644, "loss": 0.2032, "step": 38808 }, { "epoch": 0.06881333783696328, "grad_norm": 1.609375, "learning_rate": 0.0013836285212556435, "loss": 0.1533, "step": 38810 }, { "epoch": 0.0688168840022731, "grad_norm": 0.298828125, "learning_rate": 0.001383572034803696, "loss": 0.2029, "step": 38812 }, { "epoch": 0.06882043016758291, "grad_norm": 0.67578125, "learning_rate": 0.0013835155471115692, "loss": 0.2191, "step": 38814 }, { "epoch": 0.06882397633289272, "grad_norm": 0.2578125, "learning_rate": 0.00138345905817951, "loss": 0.2209, "step": 38816 }, { "epoch": 0.06882752249820254, "grad_norm": 1.1875, "learning_rate": 0.0013834025680077655, "loss": 0.2542, "step": 38818 }, { "epoch": 0.06883106866351235, "grad_norm": 0.63671875, "learning_rate": 0.001383346076596583, "loss": 0.1697, "step": 38820 }, { "epoch": 0.06883461482882217, "grad_norm": 0.828125, "learning_rate": 0.001383289583946209, "loss": 0.2327, "step": 38822 }, { "epoch": 0.06883816099413198, "grad_norm": 0.306640625, "learning_rate": 0.0013832330900568912, "loss": 0.1478, "step": 38824 }, { "epoch": 0.0688417071594418, "grad_norm": 0.419921875, "learning_rate": 0.0013831765949288764, "loss": 0.2718, "step": 38826 }, { "epoch": 0.06884525332475161, "grad_norm": 0.359375, "learning_rate": 0.0013831200985624118, "loss": 0.1641, "step": 38828 }, { "epoch": 0.06884879949006142, "grad_norm": 0.91015625, "learning_rate": 0.001383063600957744, "loss": 0.1828, "step": 38830 }, { "epoch": 0.06885234565537124, "grad_norm": 0.5703125, "learning_rate": 0.0013830071021151205, "loss": 0.2324, "step": 38832 }, { "epoch": 0.06885589182068105, "grad_norm": 1.5, "learning_rate": 0.0013829506020347885, "loss": 0.1831, "step": 38834 }, { "epoch": 0.06885943798599087, "grad_norm": 0.310546875, "learning_rate": 0.0013828941007169948, "loss": 0.2253, "step": 38836 }, { "epoch": 0.06886298415130068, "grad_norm": 0.59765625, "learning_rate": 0.0013828375981619868, "loss": 0.2929, "step": 38838 }, { "epoch": 0.06886653031661051, "grad_norm": 0.30078125, "learning_rate": 0.001382781094370011, "loss": 0.1126, "step": 38840 }, { "epoch": 0.06887007648192033, "grad_norm": 0.51171875, "learning_rate": 0.0013827245893413154, "loss": 0.1649, "step": 38842 }, { "epoch": 0.06887362264723014, "grad_norm": 0.97265625, "learning_rate": 0.0013826680830761465, "loss": 0.2236, "step": 38844 }, { "epoch": 0.06887716881253995, "grad_norm": 0.453125, "learning_rate": 0.0013826115755747517, "loss": 0.1491, "step": 38846 }, { "epoch": 0.06888071497784977, "grad_norm": 0.46875, "learning_rate": 0.0013825550668373778, "loss": 0.1669, "step": 38848 }, { "epoch": 0.06888426114315958, "grad_norm": 0.396484375, "learning_rate": 0.0013824985568642726, "loss": 0.1996, "step": 38850 }, { "epoch": 0.0688878073084694, "grad_norm": 1.0390625, "learning_rate": 0.0013824420456556824, "loss": 0.2398, "step": 38852 }, { "epoch": 0.06889135347377921, "grad_norm": 0.400390625, "learning_rate": 0.0013823855332118547, "loss": 0.1732, "step": 38854 }, { "epoch": 0.06889489963908903, "grad_norm": 0.65234375, "learning_rate": 0.0013823290195330367, "loss": 0.2042, "step": 38856 }, { "epoch": 0.06889844580439884, "grad_norm": 0.25390625, "learning_rate": 0.001382272504619476, "loss": 0.197, "step": 38858 }, { "epoch": 0.06890199196970866, "grad_norm": 1.7109375, "learning_rate": 0.0013822159884714189, "loss": 0.2177, "step": 38860 }, { "epoch": 0.06890553813501847, "grad_norm": 0.359375, "learning_rate": 0.001382159471089113, "loss": 0.1863, "step": 38862 }, { "epoch": 0.06890908430032829, "grad_norm": 0.2578125, "learning_rate": 0.001382102952472806, "loss": 0.1719, "step": 38864 }, { "epoch": 0.0689126304656381, "grad_norm": 0.255859375, "learning_rate": 0.0013820464326227437, "loss": 0.1712, "step": 38866 }, { "epoch": 0.06891617663094791, "grad_norm": 0.470703125, "learning_rate": 0.001381989911539175, "loss": 0.4004, "step": 38868 }, { "epoch": 0.06891972279625773, "grad_norm": 0.259765625, "learning_rate": 0.0013819333892223457, "loss": 0.1878, "step": 38870 }, { "epoch": 0.06892326896156754, "grad_norm": 0.2041015625, "learning_rate": 0.0013818768656725035, "loss": 0.1938, "step": 38872 }, { "epoch": 0.06892681512687736, "grad_norm": 0.76171875, "learning_rate": 0.001381820340889896, "loss": 0.166, "step": 38874 }, { "epoch": 0.06893036129218717, "grad_norm": 0.9296875, "learning_rate": 0.0013817638148747695, "loss": 0.2672, "step": 38876 }, { "epoch": 0.06893390745749699, "grad_norm": 0.3359375, "learning_rate": 0.0013817072876273723, "loss": 0.1814, "step": 38878 }, { "epoch": 0.0689374536228068, "grad_norm": 0.216796875, "learning_rate": 0.0013816507591479508, "loss": 0.142, "step": 38880 }, { "epoch": 0.06894099978811662, "grad_norm": 0.318359375, "learning_rate": 0.0013815942294367525, "loss": 0.1413, "step": 38882 }, { "epoch": 0.06894454595342643, "grad_norm": 0.81640625, "learning_rate": 0.0013815376984940247, "loss": 0.2014, "step": 38884 }, { "epoch": 0.06894809211873626, "grad_norm": 0.365234375, "learning_rate": 0.0013814811663200141, "loss": 0.2333, "step": 38886 }, { "epoch": 0.06895163828404607, "grad_norm": 0.435546875, "learning_rate": 0.001381424632914969, "loss": 0.2058, "step": 38888 }, { "epoch": 0.06895518444935589, "grad_norm": 0.134765625, "learning_rate": 0.0013813680982791357, "loss": 0.1578, "step": 38890 }, { "epoch": 0.0689587306146657, "grad_norm": 0.451171875, "learning_rate": 0.0013813115624127618, "loss": 0.2366, "step": 38892 }, { "epoch": 0.06896227677997552, "grad_norm": 0.376953125, "learning_rate": 0.0013812550253160946, "loss": 0.1827, "step": 38894 }, { "epoch": 0.06896582294528533, "grad_norm": 1.9453125, "learning_rate": 0.0013811984869893813, "loss": 0.1987, "step": 38896 }, { "epoch": 0.06896936911059515, "grad_norm": 0.921875, "learning_rate": 0.001381141947432869, "loss": 0.2456, "step": 38898 }, { "epoch": 0.06897291527590496, "grad_norm": 0.5859375, "learning_rate": 0.0013810854066468052, "loss": 0.1645, "step": 38900 }, { "epoch": 0.06897646144121478, "grad_norm": 0.43359375, "learning_rate": 0.0013810288646314375, "loss": 0.1397, "step": 38902 }, { "epoch": 0.06898000760652459, "grad_norm": 0.37109375, "learning_rate": 0.0013809723213870125, "loss": 0.2136, "step": 38904 }, { "epoch": 0.0689835537718344, "grad_norm": 0.3046875, "learning_rate": 0.0013809157769137774, "loss": 0.1348, "step": 38906 }, { "epoch": 0.06898709993714422, "grad_norm": 2.0, "learning_rate": 0.0013808592312119802, "loss": 0.2627, "step": 38908 }, { "epoch": 0.06899064610245403, "grad_norm": 0.201171875, "learning_rate": 0.001380802684281868, "loss": 0.1499, "step": 38910 }, { "epoch": 0.06899419226776385, "grad_norm": 0.70703125, "learning_rate": 0.0013807461361236878, "loss": 0.1544, "step": 38912 }, { "epoch": 0.06899773843307366, "grad_norm": 0.6640625, "learning_rate": 0.0013806895867376867, "loss": 0.1776, "step": 38914 }, { "epoch": 0.06900128459838348, "grad_norm": 0.60546875, "learning_rate": 0.001380633036124113, "loss": 0.1805, "step": 38916 }, { "epoch": 0.06900483076369329, "grad_norm": 0.443359375, "learning_rate": 0.001380576484283213, "loss": 0.2728, "step": 38918 }, { "epoch": 0.0690083769290031, "grad_norm": 0.3515625, "learning_rate": 0.0013805199312152349, "loss": 0.1943, "step": 38920 }, { "epoch": 0.06901192309431292, "grad_norm": 0.91015625, "learning_rate": 0.0013804633769204254, "loss": 0.2868, "step": 38922 }, { "epoch": 0.06901546925962274, "grad_norm": 0.71875, "learning_rate": 0.001380406821399032, "loss": 0.1875, "step": 38924 }, { "epoch": 0.06901901542493255, "grad_norm": 0.42578125, "learning_rate": 0.0013803502646513015, "loss": 0.2583, "step": 38926 }, { "epoch": 0.06902256159024237, "grad_norm": 0.58984375, "learning_rate": 0.0013802937066774825, "loss": 0.2339, "step": 38928 }, { "epoch": 0.0690261077555522, "grad_norm": 0.796875, "learning_rate": 0.0013802371474778213, "loss": 0.1866, "step": 38930 }, { "epoch": 0.06902965392086201, "grad_norm": 0.244140625, "learning_rate": 0.0013801805870525656, "loss": 0.2355, "step": 38932 }, { "epoch": 0.06903320008617182, "grad_norm": 0.359375, "learning_rate": 0.0013801240254019626, "loss": 0.1801, "step": 38934 }, { "epoch": 0.06903674625148164, "grad_norm": 0.54296875, "learning_rate": 0.0013800674625262604, "loss": 0.2283, "step": 38936 }, { "epoch": 0.06904029241679145, "grad_norm": 0.1533203125, "learning_rate": 0.0013800108984257054, "loss": 0.1347, "step": 38938 }, { "epoch": 0.06904383858210127, "grad_norm": 0.65625, "learning_rate": 0.0013799543331005455, "loss": 0.1374, "step": 38940 }, { "epoch": 0.06904738474741108, "grad_norm": 0.30078125, "learning_rate": 0.0013798977665510277, "loss": 0.131, "step": 38942 }, { "epoch": 0.0690509309127209, "grad_norm": 3.375, "learning_rate": 0.0013798411987774003, "loss": 0.2717, "step": 38944 }, { "epoch": 0.06905447707803071, "grad_norm": 0.30078125, "learning_rate": 0.0013797846297799092, "loss": 0.1871, "step": 38946 }, { "epoch": 0.06905802324334052, "grad_norm": 0.267578125, "learning_rate": 0.001379728059558803, "loss": 0.1699, "step": 38948 }, { "epoch": 0.06906156940865034, "grad_norm": 0.546875, "learning_rate": 0.0013796714881143288, "loss": 0.2035, "step": 38950 }, { "epoch": 0.06906511557396015, "grad_norm": 0.640625, "learning_rate": 0.001379614915446734, "loss": 0.2158, "step": 38952 }, { "epoch": 0.06906866173926997, "grad_norm": 1.046875, "learning_rate": 0.0013795583415562658, "loss": 0.2759, "step": 38954 }, { "epoch": 0.06907220790457978, "grad_norm": 0.419921875, "learning_rate": 0.0013795017664431722, "loss": 0.1518, "step": 38956 }, { "epoch": 0.0690757540698896, "grad_norm": 2.21875, "learning_rate": 0.0013794451901077, "loss": 0.2249, "step": 38958 }, { "epoch": 0.06907930023519941, "grad_norm": 0.373046875, "learning_rate": 0.0013793886125500966, "loss": 0.1615, "step": 38960 }, { "epoch": 0.06908284640050923, "grad_norm": 0.4765625, "learning_rate": 0.00137933203377061, "loss": 0.1975, "step": 38962 }, { "epoch": 0.06908639256581904, "grad_norm": 0.56640625, "learning_rate": 0.0013792754537694872, "loss": 0.2532, "step": 38964 }, { "epoch": 0.06908993873112886, "grad_norm": 1.75, "learning_rate": 0.0013792188725469758, "loss": 0.2386, "step": 38966 }, { "epoch": 0.06909348489643867, "grad_norm": 0.328125, "learning_rate": 0.0013791622901033235, "loss": 0.158, "step": 38968 }, { "epoch": 0.06909703106174848, "grad_norm": 1.65625, "learning_rate": 0.0013791057064387773, "loss": 0.2005, "step": 38970 }, { "epoch": 0.0691005772270583, "grad_norm": 2.328125, "learning_rate": 0.0013790491215535847, "loss": 0.2941, "step": 38972 }, { "epoch": 0.06910412339236811, "grad_norm": 0.400390625, "learning_rate": 0.0013789925354479936, "loss": 0.2034, "step": 38974 }, { "epoch": 0.06910766955767794, "grad_norm": 0.38671875, "learning_rate": 0.0013789359481222513, "loss": 0.1746, "step": 38976 }, { "epoch": 0.06911121572298776, "grad_norm": 0.33984375, "learning_rate": 0.001378879359576605, "loss": 0.2583, "step": 38978 }, { "epoch": 0.06911476188829757, "grad_norm": 0.66015625, "learning_rate": 0.0013788227698113022, "loss": 0.1874, "step": 38980 }, { "epoch": 0.06911830805360739, "grad_norm": 0.51171875, "learning_rate": 0.001378766178826591, "loss": 0.2246, "step": 38982 }, { "epoch": 0.0691218542189172, "grad_norm": 0.44921875, "learning_rate": 0.0013787095866227177, "loss": 0.1803, "step": 38984 }, { "epoch": 0.06912540038422701, "grad_norm": 0.7109375, "learning_rate": 0.0013786529931999311, "loss": 0.1907, "step": 38986 }, { "epoch": 0.06912894654953683, "grad_norm": 1.1171875, "learning_rate": 0.0013785963985584783, "loss": 0.2449, "step": 38988 }, { "epoch": 0.06913249271484664, "grad_norm": 0.345703125, "learning_rate": 0.0013785398026986064, "loss": 0.173, "step": 38990 }, { "epoch": 0.06913603888015646, "grad_norm": 0.453125, "learning_rate": 0.0013784832056205631, "loss": 0.192, "step": 38992 }, { "epoch": 0.06913958504546627, "grad_norm": 1.0234375, "learning_rate": 0.0013784266073245965, "loss": 0.2923, "step": 38994 }, { "epoch": 0.06914313121077609, "grad_norm": 0.2470703125, "learning_rate": 0.0013783700078109533, "loss": 0.1412, "step": 38996 }, { "epoch": 0.0691466773760859, "grad_norm": 0.357421875, "learning_rate": 0.0013783134070798815, "loss": 0.2173, "step": 38998 }, { "epoch": 0.06915022354139572, "grad_norm": 0.84375, "learning_rate": 0.0013782568051316287, "loss": 0.2496, "step": 39000 }, { "epoch": 0.06915376970670553, "grad_norm": 0.388671875, "learning_rate": 0.0013782002019664416, "loss": 0.1667, "step": 39002 }, { "epoch": 0.06915731587201535, "grad_norm": 1.875, "learning_rate": 0.0013781435975845691, "loss": 0.392, "step": 39004 }, { "epoch": 0.06916086203732516, "grad_norm": 0.93359375, "learning_rate": 0.0013780869919862578, "loss": 0.205, "step": 39006 }, { "epoch": 0.06916440820263497, "grad_norm": 0.38671875, "learning_rate": 0.0013780303851717555, "loss": 0.2089, "step": 39008 }, { "epoch": 0.06916795436794479, "grad_norm": 2.625, "learning_rate": 0.0013779737771413099, "loss": 0.2644, "step": 39010 }, { "epoch": 0.0691715005332546, "grad_norm": 4.71875, "learning_rate": 0.0013779171678951682, "loss": 0.2357, "step": 39012 }, { "epoch": 0.06917504669856442, "grad_norm": 0.2392578125, "learning_rate": 0.0013778605574335787, "loss": 0.1713, "step": 39014 }, { "epoch": 0.06917859286387423, "grad_norm": 0.421875, "learning_rate": 0.0013778039457567883, "loss": 0.1755, "step": 39016 }, { "epoch": 0.06918213902918405, "grad_norm": 0.765625, "learning_rate": 0.0013777473328650447, "loss": 0.2225, "step": 39018 }, { "epoch": 0.06918568519449386, "grad_norm": 0.55078125, "learning_rate": 0.0013776907187585956, "loss": 0.229, "step": 39020 }, { "epoch": 0.06918923135980369, "grad_norm": 0.353515625, "learning_rate": 0.0013776341034376887, "loss": 0.2176, "step": 39022 }, { "epoch": 0.0691927775251135, "grad_norm": 0.486328125, "learning_rate": 0.0013775774869025715, "loss": 0.2182, "step": 39024 }, { "epoch": 0.06919632369042332, "grad_norm": 0.255859375, "learning_rate": 0.0013775208691534915, "loss": 0.1483, "step": 39026 }, { "epoch": 0.06919986985573313, "grad_norm": 0.30078125, "learning_rate": 0.0013774642501906966, "loss": 0.2045, "step": 39028 }, { "epoch": 0.06920341602104295, "grad_norm": 0.298828125, "learning_rate": 0.0013774076300144343, "loss": 0.2157, "step": 39030 }, { "epoch": 0.06920696218635276, "grad_norm": 0.2734375, "learning_rate": 0.001377351008624952, "loss": 0.2509, "step": 39032 }, { "epoch": 0.06921050835166258, "grad_norm": 0.95703125, "learning_rate": 0.0013772943860224974, "loss": 0.218, "step": 39034 }, { "epoch": 0.06921405451697239, "grad_norm": 0.302734375, "learning_rate": 0.0013772377622073184, "loss": 0.1836, "step": 39036 }, { "epoch": 0.06921760068228221, "grad_norm": 0.734375, "learning_rate": 0.001377181137179662, "loss": 0.2329, "step": 39038 }, { "epoch": 0.06922114684759202, "grad_norm": 0.95703125, "learning_rate": 0.001377124510939777, "loss": 0.2342, "step": 39040 }, { "epoch": 0.06922469301290184, "grad_norm": 6.125, "learning_rate": 0.00137706788348791, "loss": 0.3604, "step": 39042 }, { "epoch": 0.06922823917821165, "grad_norm": 0.458984375, "learning_rate": 0.001377011254824309, "loss": 0.2617, "step": 39044 }, { "epoch": 0.06923178534352147, "grad_norm": 0.96875, "learning_rate": 0.0013769546249492218, "loss": 0.1644, "step": 39046 }, { "epoch": 0.06923533150883128, "grad_norm": 0.7265625, "learning_rate": 0.001376897993862896, "loss": 0.2211, "step": 39048 }, { "epoch": 0.0692388776741411, "grad_norm": 0.1953125, "learning_rate": 0.0013768413615655793, "loss": 0.1609, "step": 39050 }, { "epoch": 0.06924242383945091, "grad_norm": 0.34765625, "learning_rate": 0.0013767847280575193, "loss": 0.3056, "step": 39052 }, { "epoch": 0.06924597000476072, "grad_norm": 2.234375, "learning_rate": 0.0013767280933389635, "loss": 0.2036, "step": 39054 }, { "epoch": 0.06924951617007054, "grad_norm": 0.345703125, "learning_rate": 0.0013766714574101595, "loss": 0.1643, "step": 39056 }, { "epoch": 0.06925306233538035, "grad_norm": 0.40625, "learning_rate": 0.0013766148202713556, "loss": 0.2271, "step": 39058 }, { "epoch": 0.06925660850069017, "grad_norm": 0.263671875, "learning_rate": 0.001376558181922799, "loss": 0.2597, "step": 39060 }, { "epoch": 0.06926015466599998, "grad_norm": 0.6328125, "learning_rate": 0.0013765015423647376, "loss": 0.2582, "step": 39062 }, { "epoch": 0.0692637008313098, "grad_norm": 0.498046875, "learning_rate": 0.0013764449015974192, "loss": 0.154, "step": 39064 }, { "epoch": 0.06926724699661962, "grad_norm": 0.64453125, "learning_rate": 0.0013763882596210912, "loss": 0.1708, "step": 39066 }, { "epoch": 0.06927079316192944, "grad_norm": 0.322265625, "learning_rate": 0.0013763316164360016, "loss": 0.206, "step": 39068 }, { "epoch": 0.06927433932723925, "grad_norm": 1.171875, "learning_rate": 0.001376274972042398, "loss": 0.1629, "step": 39070 }, { "epoch": 0.06927788549254907, "grad_norm": 0.1826171875, "learning_rate": 0.001376218326440528, "loss": 0.128, "step": 39072 }, { "epoch": 0.06928143165785888, "grad_norm": 3.4375, "learning_rate": 0.0013761616796306396, "loss": 0.2082, "step": 39074 }, { "epoch": 0.0692849778231687, "grad_norm": 0.275390625, "learning_rate": 0.0013761050316129805, "loss": 0.2223, "step": 39076 }, { "epoch": 0.06928852398847851, "grad_norm": 0.326171875, "learning_rate": 0.0013760483823877982, "loss": 0.1573, "step": 39078 }, { "epoch": 0.06929207015378833, "grad_norm": 0.6328125, "learning_rate": 0.001375991731955341, "loss": 0.2425, "step": 39080 }, { "epoch": 0.06929561631909814, "grad_norm": 0.5390625, "learning_rate": 0.001375935080315856, "loss": 0.2031, "step": 39082 }, { "epoch": 0.06929916248440796, "grad_norm": 1.1171875, "learning_rate": 0.0013758784274695913, "loss": 0.345, "step": 39084 }, { "epoch": 0.06930270864971777, "grad_norm": 0.279296875, "learning_rate": 0.0013758217734167943, "loss": 0.2171, "step": 39086 }, { "epoch": 0.06930625481502758, "grad_norm": 0.298828125, "learning_rate": 0.0013757651181577138, "loss": 0.1624, "step": 39088 }, { "epoch": 0.0693098009803374, "grad_norm": 0.5546875, "learning_rate": 0.0013757084616925963, "loss": 0.1815, "step": 39090 }, { "epoch": 0.06931334714564721, "grad_norm": 0.361328125, "learning_rate": 0.0013756518040216902, "loss": 0.2404, "step": 39092 }, { "epoch": 0.06931689331095703, "grad_norm": 0.7734375, "learning_rate": 0.0013755951451452433, "loss": 0.2177, "step": 39094 }, { "epoch": 0.06932043947626684, "grad_norm": 0.6953125, "learning_rate": 0.0013755384850635034, "loss": 0.1606, "step": 39096 }, { "epoch": 0.06932398564157666, "grad_norm": 0.380859375, "learning_rate": 0.0013754818237767179, "loss": 0.2213, "step": 39098 }, { "epoch": 0.06932753180688647, "grad_norm": 0.388671875, "learning_rate": 0.0013754251612851352, "loss": 0.2233, "step": 39100 }, { "epoch": 0.06933107797219629, "grad_norm": 0.275390625, "learning_rate": 0.0013753684975890026, "loss": 0.1577, "step": 39102 }, { "epoch": 0.0693346241375061, "grad_norm": 0.1982421875, "learning_rate": 0.0013753118326885684, "loss": 0.2061, "step": 39104 }, { "epoch": 0.06933817030281592, "grad_norm": 0.7265625, "learning_rate": 0.0013752551665840804, "loss": 0.156, "step": 39106 }, { "epoch": 0.06934171646812573, "grad_norm": 0.5390625, "learning_rate": 0.0013751984992757859, "loss": 0.2068, "step": 39108 }, { "epoch": 0.06934526263343554, "grad_norm": 0.419921875, "learning_rate": 0.0013751418307639328, "loss": 0.1798, "step": 39110 }, { "epoch": 0.06934880879874537, "grad_norm": 0.41015625, "learning_rate": 0.0013750851610487697, "loss": 0.1749, "step": 39112 }, { "epoch": 0.06935235496405519, "grad_norm": 0.390625, "learning_rate": 0.0013750284901305433, "loss": 0.1805, "step": 39114 }, { "epoch": 0.069355901129365, "grad_norm": 0.2373046875, "learning_rate": 0.001374971818009502, "loss": 0.1463, "step": 39116 }, { "epoch": 0.06935944729467482, "grad_norm": 1.515625, "learning_rate": 0.0013749151446858941, "loss": 0.2651, "step": 39118 }, { "epoch": 0.06936299345998463, "grad_norm": 1.578125, "learning_rate": 0.0013748584701599668, "loss": 0.2021, "step": 39120 }, { "epoch": 0.06936653962529445, "grad_norm": 0.61328125, "learning_rate": 0.0013748017944319685, "loss": 0.2095, "step": 39122 }, { "epoch": 0.06937008579060426, "grad_norm": 0.392578125, "learning_rate": 0.0013747451175021466, "loss": 0.1892, "step": 39124 }, { "epoch": 0.06937363195591408, "grad_norm": 0.55859375, "learning_rate": 0.0013746884393707492, "loss": 0.2083, "step": 39126 }, { "epoch": 0.06937717812122389, "grad_norm": 0.59375, "learning_rate": 0.0013746317600380241, "loss": 0.2751, "step": 39128 }, { "epoch": 0.0693807242865337, "grad_norm": 0.328125, "learning_rate": 0.001374575079504219, "loss": 0.161, "step": 39130 }, { "epoch": 0.06938427045184352, "grad_norm": 0.427734375, "learning_rate": 0.0013745183977695822, "loss": 0.1875, "step": 39132 }, { "epoch": 0.06938781661715333, "grad_norm": 2.875, "learning_rate": 0.0013744617148343614, "loss": 0.3237, "step": 39134 }, { "epoch": 0.06939136278246315, "grad_norm": 1.8671875, "learning_rate": 0.0013744050306988044, "loss": 0.2617, "step": 39136 }, { "epoch": 0.06939490894777296, "grad_norm": 0.271484375, "learning_rate": 0.0013743483453631594, "loss": 0.1934, "step": 39138 }, { "epoch": 0.06939845511308278, "grad_norm": 1.3515625, "learning_rate": 0.0013742916588276738, "loss": 0.1688, "step": 39140 }, { "epoch": 0.06940200127839259, "grad_norm": 0.58203125, "learning_rate": 0.0013742349710925959, "loss": 0.1909, "step": 39142 }, { "epoch": 0.0694055474437024, "grad_norm": 0.2734375, "learning_rate": 0.0013741782821581735, "loss": 0.2134, "step": 39144 }, { "epoch": 0.06940909360901222, "grad_norm": 0.2431640625, "learning_rate": 0.0013741215920246548, "loss": 0.2087, "step": 39146 }, { "epoch": 0.06941263977432204, "grad_norm": 0.1748046875, "learning_rate": 0.0013740649006922872, "loss": 0.1546, "step": 39148 }, { "epoch": 0.06941618593963185, "grad_norm": 0.67578125, "learning_rate": 0.0013740082081613193, "loss": 0.1477, "step": 39150 }, { "epoch": 0.06941973210494166, "grad_norm": 0.5625, "learning_rate": 0.001373951514431998, "loss": 0.314, "step": 39152 }, { "epoch": 0.06942327827025148, "grad_norm": 0.5, "learning_rate": 0.0013738948195045723, "loss": 0.3834, "step": 39154 }, { "epoch": 0.0694268244355613, "grad_norm": 1.8671875, "learning_rate": 0.00137383812337929, "loss": 0.2823, "step": 39156 }, { "epoch": 0.06943037060087112, "grad_norm": 0.51171875, "learning_rate": 0.0013737814260563984, "loss": 0.1531, "step": 39158 }, { "epoch": 0.06943391676618094, "grad_norm": 0.87890625, "learning_rate": 0.001373724727536146, "loss": 0.2378, "step": 39160 }, { "epoch": 0.06943746293149075, "grad_norm": 2.015625, "learning_rate": 0.0013736680278187807, "loss": 0.2065, "step": 39162 }, { "epoch": 0.06944100909680057, "grad_norm": 0.609375, "learning_rate": 0.0013736113269045506, "loss": 0.1978, "step": 39164 }, { "epoch": 0.06944455526211038, "grad_norm": 2.828125, "learning_rate": 0.0013735546247937035, "loss": 0.2105, "step": 39166 }, { "epoch": 0.0694481014274202, "grad_norm": 0.34375, "learning_rate": 0.001373497921486487, "loss": 0.1961, "step": 39168 }, { "epoch": 0.06945164759273001, "grad_norm": 0.94921875, "learning_rate": 0.0013734412169831493, "loss": 0.1592, "step": 39170 }, { "epoch": 0.06945519375803982, "grad_norm": 0.48828125, "learning_rate": 0.0013733845112839389, "loss": 0.1611, "step": 39172 }, { "epoch": 0.06945873992334964, "grad_norm": 0.9609375, "learning_rate": 0.0013733278043891036, "loss": 0.2299, "step": 39174 }, { "epoch": 0.06946228608865945, "grad_norm": 0.447265625, "learning_rate": 0.001373271096298891, "loss": 0.188, "step": 39176 }, { "epoch": 0.06946583225396927, "grad_norm": 0.4375, "learning_rate": 0.0013732143870135495, "loss": 0.2266, "step": 39178 }, { "epoch": 0.06946937841927908, "grad_norm": 0.41796875, "learning_rate": 0.0013731576765333266, "loss": 0.1917, "step": 39180 }, { "epoch": 0.0694729245845889, "grad_norm": 0.65625, "learning_rate": 0.0013731009648584713, "loss": 0.1405, "step": 39182 }, { "epoch": 0.06947647074989871, "grad_norm": 0.40625, "learning_rate": 0.0013730442519892308, "loss": 0.2033, "step": 39184 }, { "epoch": 0.06948001691520853, "grad_norm": 0.4765625, "learning_rate": 0.0013729875379258534, "loss": 0.1868, "step": 39186 }, { "epoch": 0.06948356308051834, "grad_norm": 0.4609375, "learning_rate": 0.0013729308226685868, "loss": 0.1652, "step": 39188 }, { "epoch": 0.06948710924582815, "grad_norm": 0.34375, "learning_rate": 0.0013728741062176792, "loss": 0.171, "step": 39190 }, { "epoch": 0.06949065541113797, "grad_norm": 0.55859375, "learning_rate": 0.001372817388573379, "loss": 0.1863, "step": 39192 }, { "epoch": 0.06949420157644778, "grad_norm": 0.640625, "learning_rate": 0.001372760669735934, "loss": 0.1805, "step": 39194 }, { "epoch": 0.0694977477417576, "grad_norm": 0.3984375, "learning_rate": 0.0013727039497055925, "loss": 0.1568, "step": 39196 }, { "epoch": 0.06950129390706741, "grad_norm": 0.421875, "learning_rate": 0.0013726472284826023, "loss": 0.1686, "step": 39198 }, { "epoch": 0.06950484007237723, "grad_norm": 0.39453125, "learning_rate": 0.001372590506067211, "loss": 0.1999, "step": 39200 }, { "epoch": 0.06950838623768706, "grad_norm": 0.357421875, "learning_rate": 0.0013725337824596676, "loss": 0.287, "step": 39202 }, { "epoch": 0.06951193240299687, "grad_norm": 0.61328125, "learning_rate": 0.0013724770576602195, "loss": 0.1853, "step": 39204 }, { "epoch": 0.06951547856830669, "grad_norm": 3.265625, "learning_rate": 0.001372420331669115, "loss": 0.3077, "step": 39206 }, { "epoch": 0.0695190247336165, "grad_norm": 0.3828125, "learning_rate": 0.0013723636044866026, "loss": 0.1991, "step": 39208 }, { "epoch": 0.06952257089892631, "grad_norm": 0.54296875, "learning_rate": 0.0013723068761129296, "loss": 0.2168, "step": 39210 }, { "epoch": 0.06952611706423613, "grad_norm": 0.50390625, "learning_rate": 0.0013722501465483446, "loss": 0.1849, "step": 39212 }, { "epoch": 0.06952966322954594, "grad_norm": 4.21875, "learning_rate": 0.0013721934157930956, "loss": 0.319, "step": 39214 }, { "epoch": 0.06953320939485576, "grad_norm": 0.37109375, "learning_rate": 0.0013721366838474309, "loss": 0.1526, "step": 39216 }, { "epoch": 0.06953675556016557, "grad_norm": 0.31640625, "learning_rate": 0.0013720799507115984, "loss": 0.1966, "step": 39218 }, { "epoch": 0.06954030172547539, "grad_norm": 0.7734375, "learning_rate": 0.001372023216385846, "loss": 0.1945, "step": 39220 }, { "epoch": 0.0695438478907852, "grad_norm": 0.2412109375, "learning_rate": 0.0013719664808704221, "loss": 0.1684, "step": 39222 }, { "epoch": 0.06954739405609502, "grad_norm": 0.169921875, "learning_rate": 0.0013719097441655752, "loss": 0.2218, "step": 39224 }, { "epoch": 0.06955094022140483, "grad_norm": 0.7734375, "learning_rate": 0.0013718530062715524, "loss": 0.2157, "step": 39226 }, { "epoch": 0.06955448638671465, "grad_norm": 0.470703125, "learning_rate": 0.0013717962671886028, "loss": 0.1772, "step": 39228 }, { "epoch": 0.06955803255202446, "grad_norm": 0.828125, "learning_rate": 0.001371739526916974, "loss": 0.312, "step": 39230 }, { "epoch": 0.06956157871733427, "grad_norm": 0.57421875, "learning_rate": 0.0013716827854569147, "loss": 0.256, "step": 39232 }, { "epoch": 0.06956512488264409, "grad_norm": 0.5625, "learning_rate": 0.0013716260428086723, "loss": 0.2076, "step": 39234 }, { "epoch": 0.0695686710479539, "grad_norm": 0.828125, "learning_rate": 0.0013715692989724956, "loss": 0.2129, "step": 39236 }, { "epoch": 0.06957221721326372, "grad_norm": 1.2421875, "learning_rate": 0.001371512553948633, "loss": 0.2132, "step": 39238 }, { "epoch": 0.06957576337857353, "grad_norm": 0.3671875, "learning_rate": 0.0013714558077373316, "loss": 0.177, "step": 39240 }, { "epoch": 0.06957930954388335, "grad_norm": 0.9296875, "learning_rate": 0.0013713990603388402, "loss": 0.3473, "step": 39242 }, { "epoch": 0.06958285570919316, "grad_norm": 2.328125, "learning_rate": 0.0013713423117534072, "loss": 0.2037, "step": 39244 }, { "epoch": 0.06958640187450298, "grad_norm": 0.51953125, "learning_rate": 0.0013712855619812804, "loss": 0.1548, "step": 39246 }, { "epoch": 0.0695899480398128, "grad_norm": 0.455078125, "learning_rate": 0.001371228811022708, "loss": 0.1804, "step": 39248 }, { "epoch": 0.06959349420512262, "grad_norm": 0.1533203125, "learning_rate": 0.0013711720588779385, "loss": 0.125, "step": 39250 }, { "epoch": 0.06959704037043243, "grad_norm": 0.35546875, "learning_rate": 0.00137111530554722, "loss": 0.2229, "step": 39252 }, { "epoch": 0.06960058653574225, "grad_norm": 0.81640625, "learning_rate": 0.0013710585510308005, "loss": 0.191, "step": 39254 }, { "epoch": 0.06960413270105206, "grad_norm": 0.2890625, "learning_rate": 0.0013710017953289285, "loss": 0.2911, "step": 39256 }, { "epoch": 0.06960767886636188, "grad_norm": 0.1865234375, "learning_rate": 0.0013709450384418518, "loss": 0.3502, "step": 39258 }, { "epoch": 0.06961122503167169, "grad_norm": 0.283203125, "learning_rate": 0.001370888280369819, "loss": 0.2258, "step": 39260 }, { "epoch": 0.0696147711969815, "grad_norm": 1.390625, "learning_rate": 0.0013708315211130781, "loss": 0.2306, "step": 39262 }, { "epoch": 0.06961831736229132, "grad_norm": 0.5078125, "learning_rate": 0.0013707747606718779, "loss": 0.2524, "step": 39264 }, { "epoch": 0.06962186352760114, "grad_norm": 5.40625, "learning_rate": 0.0013707179990464658, "loss": 0.2523, "step": 39266 }, { "epoch": 0.06962540969291095, "grad_norm": 0.7421875, "learning_rate": 0.0013706612362370905, "loss": 0.2019, "step": 39268 }, { "epoch": 0.06962895585822076, "grad_norm": 0.70703125, "learning_rate": 0.001370604472244, "loss": 0.2609, "step": 39270 }, { "epoch": 0.06963250202353058, "grad_norm": 0.439453125, "learning_rate": 0.001370547707067443, "loss": 0.1699, "step": 39272 }, { "epoch": 0.0696360481888404, "grad_norm": 0.232421875, "learning_rate": 0.0013704909407076674, "loss": 0.216, "step": 39274 }, { "epoch": 0.06963959435415021, "grad_norm": 0.59765625, "learning_rate": 0.0013704341731649214, "loss": 0.1723, "step": 39276 }, { "epoch": 0.06964314051946002, "grad_norm": 0.458984375, "learning_rate": 0.0013703774044394536, "loss": 0.2022, "step": 39278 }, { "epoch": 0.06964668668476984, "grad_norm": 0.322265625, "learning_rate": 0.001370320634531512, "loss": 0.1496, "step": 39280 }, { "epoch": 0.06965023285007965, "grad_norm": 0.6875, "learning_rate": 0.001370263863441345, "loss": 0.1989, "step": 39282 }, { "epoch": 0.06965377901538947, "grad_norm": 0.318359375, "learning_rate": 0.0013702070911692007, "loss": 0.2432, "step": 39284 }, { "epoch": 0.06965732518069928, "grad_norm": 0.6640625, "learning_rate": 0.0013701503177153277, "loss": 0.1603, "step": 39286 }, { "epoch": 0.0696608713460091, "grad_norm": 1.1171875, "learning_rate": 0.0013700935430799742, "loss": 0.2809, "step": 39288 }, { "epoch": 0.06966441751131891, "grad_norm": 1.5234375, "learning_rate": 0.0013700367672633883, "loss": 0.2048, "step": 39290 }, { "epoch": 0.06966796367662872, "grad_norm": 0.53515625, "learning_rate": 0.0013699799902658185, "loss": 0.1746, "step": 39292 }, { "epoch": 0.06967150984193855, "grad_norm": 0.35546875, "learning_rate": 0.0013699232120875132, "loss": 0.2209, "step": 39294 }, { "epoch": 0.06967505600724837, "grad_norm": 13.375, "learning_rate": 0.0013698664327287202, "loss": 0.32, "step": 39296 }, { "epoch": 0.06967860217255818, "grad_norm": 0.9921875, "learning_rate": 0.0013698096521896886, "loss": 0.2308, "step": 39298 }, { "epoch": 0.069682148337868, "grad_norm": 0.515625, "learning_rate": 0.001369752870470666, "loss": 0.2316, "step": 39300 }, { "epoch": 0.06968569450317781, "grad_norm": 0.67578125, "learning_rate": 0.0013696960875719008, "loss": 0.1946, "step": 39302 }, { "epoch": 0.06968924066848763, "grad_norm": 0.71484375, "learning_rate": 0.0013696393034936419, "loss": 0.6397, "step": 39304 }, { "epoch": 0.06969278683379744, "grad_norm": 0.90234375, "learning_rate": 0.0013695825182361372, "loss": 0.1963, "step": 39306 }, { "epoch": 0.06969633299910725, "grad_norm": 0.51171875, "learning_rate": 0.0013695257317996353, "loss": 0.1907, "step": 39308 }, { "epoch": 0.06969987916441707, "grad_norm": 0.2734375, "learning_rate": 0.0013694689441843843, "loss": 0.2383, "step": 39310 }, { "epoch": 0.06970342532972688, "grad_norm": 0.65625, "learning_rate": 0.0013694121553906326, "loss": 0.1667, "step": 39312 }, { "epoch": 0.0697069714950367, "grad_norm": 0.2431640625, "learning_rate": 0.001369355365418629, "loss": 0.1556, "step": 39314 }, { "epoch": 0.06971051766034651, "grad_norm": 0.8984375, "learning_rate": 0.001369298574268621, "loss": 0.2431, "step": 39316 }, { "epoch": 0.06971406382565633, "grad_norm": 0.6015625, "learning_rate": 0.001369241781940858, "loss": 0.1601, "step": 39318 }, { "epoch": 0.06971760999096614, "grad_norm": 0.306640625, "learning_rate": 0.0013691849884355876, "loss": 0.196, "step": 39320 }, { "epoch": 0.06972115615627596, "grad_norm": 0.671875, "learning_rate": 0.0013691281937530581, "loss": 0.1584, "step": 39322 }, { "epoch": 0.06972470232158577, "grad_norm": 1.4921875, "learning_rate": 0.0013690713978935186, "loss": 0.2903, "step": 39324 }, { "epoch": 0.06972824848689559, "grad_norm": 0.53515625, "learning_rate": 0.001369014600857217, "loss": 0.2248, "step": 39326 }, { "epoch": 0.0697317946522054, "grad_norm": 0.48828125, "learning_rate": 0.0013689578026444015, "loss": 0.2133, "step": 39328 }, { "epoch": 0.06973534081751522, "grad_norm": 0.609375, "learning_rate": 0.0013689010032553212, "loss": 0.2924, "step": 39330 }, { "epoch": 0.06973888698282503, "grad_norm": 0.310546875, "learning_rate": 0.0013688442026902241, "loss": 0.1257, "step": 39332 }, { "epoch": 0.06974243314813484, "grad_norm": 0.640625, "learning_rate": 0.0013687874009493583, "loss": 0.209, "step": 39334 }, { "epoch": 0.06974597931344466, "grad_norm": 0.24609375, "learning_rate": 0.001368730598032973, "loss": 0.1893, "step": 39336 }, { "epoch": 0.06974952547875449, "grad_norm": 0.3984375, "learning_rate": 0.0013686737939413158, "loss": 0.1759, "step": 39338 }, { "epoch": 0.0697530716440643, "grad_norm": 0.60546875, "learning_rate": 0.0013686169886746354, "loss": 0.2258, "step": 39340 }, { "epoch": 0.06975661780937412, "grad_norm": 0.97265625, "learning_rate": 0.0013685601822331806, "loss": 0.1558, "step": 39342 }, { "epoch": 0.06976016397468393, "grad_norm": 0.6796875, "learning_rate": 0.0013685033746171993, "loss": 0.155, "step": 39344 }, { "epoch": 0.06976371013999375, "grad_norm": 0.41796875, "learning_rate": 0.0013684465658269406, "loss": 0.4398, "step": 39346 }, { "epoch": 0.06976725630530356, "grad_norm": 0.30078125, "learning_rate": 0.0013683897558626522, "loss": 0.1279, "step": 39348 }, { "epoch": 0.06977080247061337, "grad_norm": 0.80859375, "learning_rate": 0.0013683329447245832, "loss": 0.219, "step": 39350 }, { "epoch": 0.06977434863592319, "grad_norm": 0.4375, "learning_rate": 0.0013682761324129815, "loss": 0.2159, "step": 39352 }, { "epoch": 0.069777894801233, "grad_norm": 0.453125, "learning_rate": 0.0013682193189280961, "loss": 0.1797, "step": 39354 }, { "epoch": 0.06978144096654282, "grad_norm": 1.203125, "learning_rate": 0.001368162504270175, "loss": 0.2075, "step": 39356 }, { "epoch": 0.06978498713185263, "grad_norm": 0.953125, "learning_rate": 0.001368105688439467, "loss": 0.4457, "step": 39358 }, { "epoch": 0.06978853329716245, "grad_norm": 0.34375, "learning_rate": 0.00136804887143622, "loss": 0.3058, "step": 39360 }, { "epoch": 0.06979207946247226, "grad_norm": 0.5625, "learning_rate": 0.0013679920532606834, "loss": 0.2225, "step": 39362 }, { "epoch": 0.06979562562778208, "grad_norm": 0.30078125, "learning_rate": 0.001367935233913105, "loss": 0.2052, "step": 39364 }, { "epoch": 0.06979917179309189, "grad_norm": 0.2236328125, "learning_rate": 0.0013678784133937338, "loss": 0.2366, "step": 39366 }, { "epoch": 0.0698027179584017, "grad_norm": 0.240234375, "learning_rate": 0.0013678215917028178, "loss": 0.2147, "step": 39368 }, { "epoch": 0.06980626412371152, "grad_norm": 0.296875, "learning_rate": 0.0013677647688406058, "loss": 0.2027, "step": 39370 }, { "epoch": 0.06980981028902133, "grad_norm": 0.57421875, "learning_rate": 0.0013677079448073464, "loss": 0.1955, "step": 39372 }, { "epoch": 0.06981335645433115, "grad_norm": 0.90625, "learning_rate": 0.0013676511196032875, "loss": 0.2022, "step": 39374 }, { "epoch": 0.06981690261964096, "grad_norm": 0.380859375, "learning_rate": 0.0013675942932286785, "loss": 0.174, "step": 39376 }, { "epoch": 0.06982044878495078, "grad_norm": 0.93359375, "learning_rate": 0.0013675374656837674, "loss": 0.33, "step": 39378 }, { "epoch": 0.06982399495026059, "grad_norm": 0.73046875, "learning_rate": 0.0013674806369688027, "loss": 0.1911, "step": 39380 }, { "epoch": 0.06982754111557041, "grad_norm": 0.76171875, "learning_rate": 0.001367423807084033, "loss": 0.2315, "step": 39382 }, { "epoch": 0.06983108728088024, "grad_norm": 0.36328125, "learning_rate": 0.001367366976029707, "loss": 0.1508, "step": 39384 }, { "epoch": 0.06983463344619005, "grad_norm": 0.42578125, "learning_rate": 0.0013673101438060732, "loss": 0.2059, "step": 39386 }, { "epoch": 0.06983817961149986, "grad_norm": 1.0, "learning_rate": 0.0013672533104133805, "loss": 0.1699, "step": 39388 }, { "epoch": 0.06984172577680968, "grad_norm": 0.484375, "learning_rate": 0.0013671964758518766, "loss": 0.2582, "step": 39390 }, { "epoch": 0.0698452719421195, "grad_norm": 0.9375, "learning_rate": 0.0013671396401218104, "loss": 0.2007, "step": 39392 }, { "epoch": 0.06984881810742931, "grad_norm": 0.47265625, "learning_rate": 0.0013670828032234306, "loss": 0.3647, "step": 39394 }, { "epoch": 0.06985236427273912, "grad_norm": 0.427734375, "learning_rate": 0.001367025965156986, "loss": 0.1801, "step": 39396 }, { "epoch": 0.06985591043804894, "grad_norm": 0.52734375, "learning_rate": 0.0013669691259227247, "loss": 0.2043, "step": 39398 }, { "epoch": 0.06985945660335875, "grad_norm": 1.40625, "learning_rate": 0.001366912285520896, "loss": 0.2448, "step": 39400 }, { "epoch": 0.06986300276866857, "grad_norm": 0.2353515625, "learning_rate": 0.0013668554439517475, "loss": 0.2215, "step": 39402 }, { "epoch": 0.06986654893397838, "grad_norm": 0.859375, "learning_rate": 0.0013667986012155288, "loss": 0.2539, "step": 39404 }, { "epoch": 0.0698700950992882, "grad_norm": 0.9453125, "learning_rate": 0.0013667417573124876, "loss": 0.2454, "step": 39406 }, { "epoch": 0.06987364126459801, "grad_norm": 0.2294921875, "learning_rate": 0.001366684912242873, "loss": 0.1922, "step": 39408 }, { "epoch": 0.06987718742990782, "grad_norm": 0.302734375, "learning_rate": 0.0013666280660069331, "loss": 0.1912, "step": 39410 }, { "epoch": 0.06988073359521764, "grad_norm": 0.345703125, "learning_rate": 0.0013665712186049176, "loss": 0.1741, "step": 39412 }, { "epoch": 0.06988427976052745, "grad_norm": 1.1015625, "learning_rate": 0.001366514370037074, "loss": 0.2303, "step": 39414 }, { "epoch": 0.06988782592583727, "grad_norm": 0.5703125, "learning_rate": 0.0013664575203036515, "loss": 0.1642, "step": 39416 }, { "epoch": 0.06989137209114708, "grad_norm": 0.369140625, "learning_rate": 0.0013664006694048985, "loss": 0.1562, "step": 39418 }, { "epoch": 0.0698949182564569, "grad_norm": 0.578125, "learning_rate": 0.0013663438173410639, "loss": 0.1852, "step": 39420 }, { "epoch": 0.06989846442176671, "grad_norm": 0.458984375, "learning_rate": 0.0013662869641123958, "loss": 0.1733, "step": 39422 }, { "epoch": 0.06990201058707653, "grad_norm": 0.51171875, "learning_rate": 0.0013662301097191437, "loss": 0.1842, "step": 39424 }, { "epoch": 0.06990555675238634, "grad_norm": 4.625, "learning_rate": 0.0013661732541615556, "loss": 0.3214, "step": 39426 }, { "epoch": 0.06990910291769616, "grad_norm": 0.484375, "learning_rate": 0.0013661163974398802, "loss": 0.2017, "step": 39428 }, { "epoch": 0.06991264908300598, "grad_norm": 0.66015625, "learning_rate": 0.001366059539554366, "loss": 0.3421, "step": 39430 }, { "epoch": 0.0699161952483158, "grad_norm": 0.47265625, "learning_rate": 0.001366002680505262, "loss": 0.2036, "step": 39432 }, { "epoch": 0.06991974141362561, "grad_norm": 0.205078125, "learning_rate": 0.0013659458202928173, "loss": 0.1866, "step": 39434 }, { "epoch": 0.06992328757893543, "grad_norm": 0.51953125, "learning_rate": 0.0013658889589172796, "loss": 0.2605, "step": 39436 }, { "epoch": 0.06992683374424524, "grad_norm": 0.462890625, "learning_rate": 0.001365832096378898, "loss": 0.1619, "step": 39438 }, { "epoch": 0.06993037990955506, "grad_norm": 0.298828125, "learning_rate": 0.0013657752326779215, "loss": 0.1864, "step": 39440 }, { "epoch": 0.06993392607486487, "grad_norm": 0.3515625, "learning_rate": 0.0013657183678145981, "loss": 0.2278, "step": 39442 }, { "epoch": 0.06993747224017469, "grad_norm": 0.3046875, "learning_rate": 0.0013656615017891773, "loss": 0.1632, "step": 39444 }, { "epoch": 0.0699410184054845, "grad_norm": 0.8125, "learning_rate": 0.0013656046346019076, "loss": 0.1935, "step": 39446 }, { "epoch": 0.06994456457079432, "grad_norm": 0.58984375, "learning_rate": 0.001365547766253037, "loss": 0.1681, "step": 39448 }, { "epoch": 0.06994811073610413, "grad_norm": 0.31640625, "learning_rate": 0.0013654908967428148, "loss": 0.1887, "step": 39450 }, { "epoch": 0.06995165690141394, "grad_norm": 1.1015625, "learning_rate": 0.0013654340260714896, "loss": 0.2333, "step": 39452 }, { "epoch": 0.06995520306672376, "grad_norm": 0.42578125, "learning_rate": 0.0013653771542393104, "loss": 0.1781, "step": 39454 }, { "epoch": 0.06995874923203357, "grad_norm": 0.4921875, "learning_rate": 0.0013653202812465258, "loss": 0.2361, "step": 39456 }, { "epoch": 0.06996229539734339, "grad_norm": 0.2734375, "learning_rate": 0.0013652634070933842, "loss": 0.1574, "step": 39458 }, { "epoch": 0.0699658415626532, "grad_norm": 0.41796875, "learning_rate": 0.0013652065317801346, "loss": 0.1966, "step": 39460 }, { "epoch": 0.06996938772796302, "grad_norm": 1.2265625, "learning_rate": 0.0013651496553070254, "loss": 0.238, "step": 39462 }, { "epoch": 0.06997293389327283, "grad_norm": 0.458984375, "learning_rate": 0.0013650927776743059, "loss": 0.2204, "step": 39464 }, { "epoch": 0.06997648005858265, "grad_norm": 0.36328125, "learning_rate": 0.0013650358988822244, "loss": 0.2259, "step": 39466 }, { "epoch": 0.06998002622389246, "grad_norm": 0.68359375, "learning_rate": 0.00136497901893103, "loss": 0.167, "step": 39468 }, { "epoch": 0.06998357238920228, "grad_norm": 0.291015625, "learning_rate": 0.0013649221378209714, "loss": 0.1739, "step": 39470 }, { "epoch": 0.06998711855451209, "grad_norm": 9.0625, "learning_rate": 0.0013648652555522968, "loss": 0.197, "step": 39472 }, { "epoch": 0.06999066471982192, "grad_norm": 3.421875, "learning_rate": 0.0013648083721252559, "loss": 0.1339, "step": 39474 }, { "epoch": 0.06999421088513173, "grad_norm": 0.2294921875, "learning_rate": 0.0013647514875400966, "loss": 0.2285, "step": 39476 }, { "epoch": 0.06999775705044155, "grad_norm": 0.341796875, "learning_rate": 0.0013646946017970684, "loss": 0.2904, "step": 39478 }, { "epoch": 0.07000130321575136, "grad_norm": 0.298828125, "learning_rate": 0.0013646377148964198, "loss": 0.2153, "step": 39480 }, { "epoch": 0.07000484938106118, "grad_norm": 0.33984375, "learning_rate": 0.0013645808268383994, "loss": 0.2854, "step": 39482 }, { "epoch": 0.07000839554637099, "grad_norm": 0.2060546875, "learning_rate": 0.0013645239376232558, "loss": 0.1225, "step": 39484 }, { "epoch": 0.0700119417116808, "grad_norm": 0.462890625, "learning_rate": 0.0013644670472512388, "loss": 0.1992, "step": 39486 }, { "epoch": 0.07001548787699062, "grad_norm": 0.88671875, "learning_rate": 0.0013644101557225962, "loss": 0.2525, "step": 39488 }, { "epoch": 0.07001903404230043, "grad_norm": 0.375, "learning_rate": 0.0013643532630375774, "loss": 0.3063, "step": 39490 }, { "epoch": 0.07002258020761025, "grad_norm": 0.6015625, "learning_rate": 0.0013642963691964307, "loss": 0.1981, "step": 39492 }, { "epoch": 0.07002612637292006, "grad_norm": 0.255859375, "learning_rate": 0.0013642394741994054, "loss": 0.1511, "step": 39494 }, { "epoch": 0.07002967253822988, "grad_norm": 0.48828125, "learning_rate": 0.0013641825780467498, "loss": 0.1825, "step": 39496 }, { "epoch": 0.07003321870353969, "grad_norm": 0.3203125, "learning_rate": 0.0013641256807387133, "loss": 0.2005, "step": 39498 }, { "epoch": 0.07003676486884951, "grad_norm": 0.55078125, "learning_rate": 0.0013640687822755447, "loss": 0.3158, "step": 39500 }, { "epoch": 0.07004031103415932, "grad_norm": 0.73828125, "learning_rate": 0.0013640118826574925, "loss": 0.1923, "step": 39502 }, { "epoch": 0.07004385719946914, "grad_norm": 0.54296875, "learning_rate": 0.0013639549818848054, "loss": 0.1865, "step": 39504 }, { "epoch": 0.07004740336477895, "grad_norm": 1.28125, "learning_rate": 0.001363898079957733, "loss": 0.1819, "step": 39506 }, { "epoch": 0.07005094953008877, "grad_norm": 0.365234375, "learning_rate": 0.0013638411768765234, "loss": 0.2091, "step": 39508 }, { "epoch": 0.07005449569539858, "grad_norm": 0.828125, "learning_rate": 0.0013637842726414258, "loss": 0.2809, "step": 39510 }, { "epoch": 0.0700580418607084, "grad_norm": 0.416015625, "learning_rate": 0.0013637273672526889, "loss": 0.1813, "step": 39512 }, { "epoch": 0.07006158802601821, "grad_norm": 0.59375, "learning_rate": 0.0013636704607105617, "loss": 0.2378, "step": 39514 }, { "epoch": 0.07006513419132802, "grad_norm": 0.365234375, "learning_rate": 0.0013636135530152931, "loss": 0.193, "step": 39516 }, { "epoch": 0.07006868035663784, "grad_norm": 0.54296875, "learning_rate": 0.001363556644167132, "loss": 0.1873, "step": 39518 }, { "epoch": 0.07007222652194767, "grad_norm": 1.21875, "learning_rate": 0.0013634997341663273, "loss": 0.2261, "step": 39520 }, { "epoch": 0.07007577268725748, "grad_norm": 0.25390625, "learning_rate": 0.0013634428230131278, "loss": 0.1574, "step": 39522 }, { "epoch": 0.0700793188525673, "grad_norm": 0.48046875, "learning_rate": 0.0013633859107077821, "loss": 0.174, "step": 39524 }, { "epoch": 0.07008286501787711, "grad_norm": 0.322265625, "learning_rate": 0.0013633289972505398, "loss": 0.2614, "step": 39526 }, { "epoch": 0.07008641118318693, "grad_norm": 0.52734375, "learning_rate": 0.0013632720826416494, "loss": 0.1636, "step": 39528 }, { "epoch": 0.07008995734849674, "grad_norm": 0.27734375, "learning_rate": 0.0013632151668813597, "loss": 0.2678, "step": 39530 }, { "epoch": 0.07009350351380655, "grad_norm": 0.353515625, "learning_rate": 0.0013631582499699197, "loss": 0.2319, "step": 39532 }, { "epoch": 0.07009704967911637, "grad_norm": 0.87109375, "learning_rate": 0.0013631013319075785, "loss": 0.1722, "step": 39534 }, { "epoch": 0.07010059584442618, "grad_norm": 1.109375, "learning_rate": 0.0013630444126945848, "loss": 0.1678, "step": 39536 }, { "epoch": 0.070104142009736, "grad_norm": 1.90625, "learning_rate": 0.0013629874923311877, "loss": 0.1674, "step": 39538 }, { "epoch": 0.07010768817504581, "grad_norm": 0.4375, "learning_rate": 0.0013629305708176363, "loss": 0.1808, "step": 39540 }, { "epoch": 0.07011123434035563, "grad_norm": 1.6328125, "learning_rate": 0.0013628736481541789, "loss": 0.1827, "step": 39542 }, { "epoch": 0.07011478050566544, "grad_norm": 0.470703125, "learning_rate": 0.0013628167243410652, "loss": 0.1685, "step": 39544 }, { "epoch": 0.07011832667097526, "grad_norm": 6.59375, "learning_rate": 0.0013627597993785435, "loss": 0.2463, "step": 39546 }, { "epoch": 0.07012187283628507, "grad_norm": 0.306640625, "learning_rate": 0.0013627028732668633, "loss": 0.1795, "step": 39548 }, { "epoch": 0.07012541900159489, "grad_norm": 0.2294921875, "learning_rate": 0.0013626459460062733, "loss": 0.3245, "step": 39550 }, { "epoch": 0.0701289651669047, "grad_norm": 0.734375, "learning_rate": 0.0013625890175970224, "loss": 0.1695, "step": 39552 }, { "epoch": 0.07013251133221451, "grad_norm": 0.453125, "learning_rate": 0.0013625320880393597, "loss": 0.1603, "step": 39554 }, { "epoch": 0.07013605749752433, "grad_norm": 0.3203125, "learning_rate": 0.0013624751573335344, "loss": 0.1927, "step": 39556 }, { "epoch": 0.07013960366283414, "grad_norm": 0.6796875, "learning_rate": 0.0013624182254797948, "loss": 0.1898, "step": 39558 }, { "epoch": 0.07014314982814396, "grad_norm": 1.296875, "learning_rate": 0.0013623612924783905, "loss": 0.1913, "step": 39560 }, { "epoch": 0.07014669599345377, "grad_norm": 0.212890625, "learning_rate": 0.0013623043583295702, "loss": 0.1882, "step": 39562 }, { "epoch": 0.07015024215876359, "grad_norm": 0.333984375, "learning_rate": 0.0013622474230335832, "loss": 0.2216, "step": 39564 }, { "epoch": 0.07015378832407342, "grad_norm": 0.796875, "learning_rate": 0.001362190486590678, "loss": 0.2112, "step": 39566 }, { "epoch": 0.07015733448938323, "grad_norm": 0.2314453125, "learning_rate": 0.0013621335490011043, "loss": 0.1427, "step": 39568 }, { "epoch": 0.07016088065469304, "grad_norm": 1.0078125, "learning_rate": 0.0013620766102651104, "loss": 0.5895, "step": 39570 }, { "epoch": 0.07016442682000286, "grad_norm": 0.36328125, "learning_rate": 0.0013620196703829459, "loss": 0.1945, "step": 39572 }, { "epoch": 0.07016797298531267, "grad_norm": 0.466796875, "learning_rate": 0.0013619627293548595, "loss": 0.2072, "step": 39574 }, { "epoch": 0.07017151915062249, "grad_norm": 0.5703125, "learning_rate": 0.0013619057871811003, "loss": 0.1916, "step": 39576 }, { "epoch": 0.0701750653159323, "grad_norm": 0.59375, "learning_rate": 0.0013618488438619171, "loss": 0.2227, "step": 39578 }, { "epoch": 0.07017861148124212, "grad_norm": 0.71484375, "learning_rate": 0.0013617918993975592, "loss": 0.2108, "step": 39580 }, { "epoch": 0.07018215764655193, "grad_norm": 0.80078125, "learning_rate": 0.0013617349537882757, "loss": 0.2064, "step": 39582 }, { "epoch": 0.07018570381186175, "grad_norm": 0.58203125, "learning_rate": 0.0013616780070343155, "loss": 0.1652, "step": 39584 }, { "epoch": 0.07018924997717156, "grad_norm": 0.98046875, "learning_rate": 0.0013616210591359279, "loss": 0.2287, "step": 39586 }, { "epoch": 0.07019279614248138, "grad_norm": 0.333984375, "learning_rate": 0.0013615641100933615, "loss": 0.2367, "step": 39588 }, { "epoch": 0.07019634230779119, "grad_norm": 0.46875, "learning_rate": 0.0013615071599068658, "loss": 0.1744, "step": 39590 }, { "epoch": 0.070199888473101, "grad_norm": 4.65625, "learning_rate": 0.0013614502085766895, "loss": 0.1652, "step": 39592 }, { "epoch": 0.07020343463841082, "grad_norm": 3.03125, "learning_rate": 0.0013613932561030821, "loss": 0.2077, "step": 39594 }, { "epoch": 0.07020698080372063, "grad_norm": 0.392578125, "learning_rate": 0.0013613363024862924, "loss": 0.1896, "step": 39596 }, { "epoch": 0.07021052696903045, "grad_norm": 1.1171875, "learning_rate": 0.0013612793477265693, "loss": 0.4711, "step": 39598 }, { "epoch": 0.07021407313434026, "grad_norm": 0.458984375, "learning_rate": 0.0013612223918241621, "loss": 0.1817, "step": 39600 }, { "epoch": 0.07021761929965008, "grad_norm": 0.37890625, "learning_rate": 0.00136116543477932, "loss": 0.1665, "step": 39602 }, { "epoch": 0.07022116546495989, "grad_norm": 0.6875, "learning_rate": 0.001361108476592292, "loss": 0.2127, "step": 39604 }, { "epoch": 0.0702247116302697, "grad_norm": 1.78125, "learning_rate": 0.001361051517263327, "loss": 0.3103, "step": 39606 }, { "epoch": 0.07022825779557952, "grad_norm": 0.2373046875, "learning_rate": 0.0013609945567926746, "loss": 0.2234, "step": 39608 }, { "epoch": 0.07023180396088935, "grad_norm": 0.197265625, "learning_rate": 0.0013609375951805834, "loss": 0.1792, "step": 39610 }, { "epoch": 0.07023535012619916, "grad_norm": 0.671875, "learning_rate": 0.001360880632427303, "loss": 0.2279, "step": 39612 }, { "epoch": 0.07023889629150898, "grad_norm": 0.51171875, "learning_rate": 0.0013608236685330821, "loss": 0.2064, "step": 39614 }, { "epoch": 0.0702424424568188, "grad_norm": 1.4453125, "learning_rate": 0.00136076670349817, "loss": 0.4225, "step": 39616 }, { "epoch": 0.07024598862212861, "grad_norm": 0.8046875, "learning_rate": 0.0013607097373228157, "loss": 0.2006, "step": 39618 }, { "epoch": 0.07024953478743842, "grad_norm": 0.71875, "learning_rate": 0.0013606527700072686, "loss": 0.2223, "step": 39620 }, { "epoch": 0.07025308095274824, "grad_norm": 0.302734375, "learning_rate": 0.0013605958015517777, "loss": 0.1673, "step": 39622 }, { "epoch": 0.07025662711805805, "grad_norm": 1.671875, "learning_rate": 0.0013605388319565916, "loss": 0.4129, "step": 39624 }, { "epoch": 0.07026017328336787, "grad_norm": 0.498046875, "learning_rate": 0.0013604818612219605, "loss": 0.18, "step": 39626 }, { "epoch": 0.07026371944867768, "grad_norm": 0.328125, "learning_rate": 0.001360424889348133, "loss": 0.1757, "step": 39628 }, { "epoch": 0.0702672656139875, "grad_norm": 0.33203125, "learning_rate": 0.0013603679163353584, "loss": 0.2172, "step": 39630 }, { "epoch": 0.07027081177929731, "grad_norm": 0.2578125, "learning_rate": 0.0013603109421838858, "loss": 0.1741, "step": 39632 }, { "epoch": 0.07027435794460712, "grad_norm": 0.345703125, "learning_rate": 0.001360253966893964, "loss": 0.1808, "step": 39634 }, { "epoch": 0.07027790410991694, "grad_norm": 0.494140625, "learning_rate": 0.0013601969904658427, "loss": 0.2741, "step": 39636 }, { "epoch": 0.07028145027522675, "grad_norm": 0.8828125, "learning_rate": 0.001360140012899771, "loss": 0.1832, "step": 39638 }, { "epoch": 0.07028499644053657, "grad_norm": 0.6015625, "learning_rate": 0.0013600830341959978, "loss": 0.1978, "step": 39640 }, { "epoch": 0.07028854260584638, "grad_norm": 0.41796875, "learning_rate": 0.0013600260543547727, "loss": 0.1647, "step": 39642 }, { "epoch": 0.0702920887711562, "grad_norm": 1.140625, "learning_rate": 0.0013599690733763446, "loss": 0.2417, "step": 39644 }, { "epoch": 0.07029563493646601, "grad_norm": 1.640625, "learning_rate": 0.0013599120912609626, "loss": 0.271, "step": 39646 }, { "epoch": 0.07029918110177583, "grad_norm": 0.3984375, "learning_rate": 0.0013598551080088765, "loss": 0.2414, "step": 39648 }, { "epoch": 0.07030272726708564, "grad_norm": 0.29296875, "learning_rate": 0.0013597981236203348, "loss": 0.159, "step": 39650 }, { "epoch": 0.07030627343239546, "grad_norm": 0.34765625, "learning_rate": 0.0013597411380955869, "loss": 0.1927, "step": 39652 }, { "epoch": 0.07030981959770527, "grad_norm": 0.48828125, "learning_rate": 0.0013596841514348825, "loss": 0.2081, "step": 39654 }, { "epoch": 0.0703133657630151, "grad_norm": 0.2119140625, "learning_rate": 0.0013596271636384701, "loss": 0.2242, "step": 39656 }, { "epoch": 0.07031691192832491, "grad_norm": 0.1796875, "learning_rate": 0.0013595701747065993, "loss": 0.1477, "step": 39658 }, { "epoch": 0.07032045809363473, "grad_norm": 0.22265625, "learning_rate": 0.0013595131846395197, "loss": 0.2141, "step": 39660 }, { "epoch": 0.07032400425894454, "grad_norm": 0.294921875, "learning_rate": 0.00135945619343748, "loss": 0.1962, "step": 39662 }, { "epoch": 0.07032755042425436, "grad_norm": 2.375, "learning_rate": 0.0013593992011007295, "loss": 0.2309, "step": 39664 }, { "epoch": 0.07033109658956417, "grad_norm": 1.59375, "learning_rate": 0.0013593422076295177, "loss": 0.2472, "step": 39666 }, { "epoch": 0.07033464275487399, "grad_norm": 0.7109375, "learning_rate": 0.0013592852130240936, "loss": 0.2368, "step": 39668 }, { "epoch": 0.0703381889201838, "grad_norm": 0.46875, "learning_rate": 0.0013592282172847068, "loss": 0.2088, "step": 39670 }, { "epoch": 0.07034173508549361, "grad_norm": 0.4296875, "learning_rate": 0.0013591712204116062, "loss": 0.1185, "step": 39672 }, { "epoch": 0.07034528125080343, "grad_norm": 0.314453125, "learning_rate": 0.0013591142224050412, "loss": 0.1967, "step": 39674 }, { "epoch": 0.07034882741611324, "grad_norm": 0.77734375, "learning_rate": 0.0013590572232652612, "loss": 0.1457, "step": 39676 }, { "epoch": 0.07035237358142306, "grad_norm": 0.2392578125, "learning_rate": 0.0013590002229925153, "loss": 0.2271, "step": 39678 }, { "epoch": 0.07035591974673287, "grad_norm": 0.53125, "learning_rate": 0.001358943221587053, "loss": 0.1882, "step": 39680 }, { "epoch": 0.07035946591204269, "grad_norm": 0.1328125, "learning_rate": 0.0013588862190491231, "loss": 0.1912, "step": 39682 }, { "epoch": 0.0703630120773525, "grad_norm": 0.32421875, "learning_rate": 0.0013588292153789757, "loss": 0.2855, "step": 39684 }, { "epoch": 0.07036655824266232, "grad_norm": 1.3046875, "learning_rate": 0.0013587722105768594, "loss": 0.3307, "step": 39686 }, { "epoch": 0.07037010440797213, "grad_norm": 0.3984375, "learning_rate": 0.001358715204643024, "loss": 0.2412, "step": 39688 }, { "epoch": 0.07037365057328195, "grad_norm": 0.35546875, "learning_rate": 0.0013586581975777185, "loss": 0.188, "step": 39690 }, { "epoch": 0.07037719673859176, "grad_norm": 0.314453125, "learning_rate": 0.001358601189381192, "loss": 0.1547, "step": 39692 }, { "epoch": 0.07038074290390157, "grad_norm": 0.40625, "learning_rate": 0.0013585441800536943, "loss": 0.1995, "step": 39694 }, { "epoch": 0.07038428906921139, "grad_norm": 0.25390625, "learning_rate": 0.0013584871695954746, "loss": 0.2243, "step": 39696 }, { "epoch": 0.0703878352345212, "grad_norm": 0.205078125, "learning_rate": 0.0013584301580067822, "loss": 0.165, "step": 39698 }, { "epoch": 0.07039138139983102, "grad_norm": 0.31640625, "learning_rate": 0.0013583731452878663, "loss": 0.1742, "step": 39700 }, { "epoch": 0.07039492756514085, "grad_norm": 1.1875, "learning_rate": 0.0013583161314389763, "loss": 0.2424, "step": 39702 }, { "epoch": 0.07039847373045066, "grad_norm": 0.640625, "learning_rate": 0.0013582591164603617, "loss": 0.179, "step": 39704 }, { "epoch": 0.07040201989576048, "grad_norm": 1.484375, "learning_rate": 0.0013582021003522718, "loss": 0.2594, "step": 39706 }, { "epoch": 0.07040556606107029, "grad_norm": 0.419921875, "learning_rate": 0.0013581450831149557, "loss": 0.1977, "step": 39708 }, { "epoch": 0.0704091122263801, "grad_norm": 0.412109375, "learning_rate": 0.0013580880647486632, "loss": 0.2201, "step": 39710 }, { "epoch": 0.07041265839168992, "grad_norm": 0.2080078125, "learning_rate": 0.0013580310452536433, "loss": 0.199, "step": 39712 }, { "epoch": 0.07041620455699973, "grad_norm": 0.53515625, "learning_rate": 0.0013579740246301452, "loss": 0.2607, "step": 39714 }, { "epoch": 0.07041975072230955, "grad_norm": 0.5078125, "learning_rate": 0.001357917002878419, "loss": 0.1325, "step": 39716 }, { "epoch": 0.07042329688761936, "grad_norm": 0.302734375, "learning_rate": 0.0013578599799987131, "loss": 0.1655, "step": 39718 }, { "epoch": 0.07042684305292918, "grad_norm": 0.359375, "learning_rate": 0.001357802955991278, "loss": 0.1606, "step": 39720 }, { "epoch": 0.07043038921823899, "grad_norm": 0.62109375, "learning_rate": 0.0013577459308563625, "loss": 0.2562, "step": 39722 }, { "epoch": 0.0704339353835488, "grad_norm": 0.30859375, "learning_rate": 0.0013576889045942158, "loss": 0.1527, "step": 39724 }, { "epoch": 0.07043748154885862, "grad_norm": 0.63671875, "learning_rate": 0.0013576318772050874, "loss": 0.2225, "step": 39726 }, { "epoch": 0.07044102771416844, "grad_norm": 0.423828125, "learning_rate": 0.0013575748486892272, "loss": 0.1977, "step": 39728 }, { "epoch": 0.07044457387947825, "grad_norm": 0.5078125, "learning_rate": 0.001357517819046884, "loss": 0.2292, "step": 39730 }, { "epoch": 0.07044812004478807, "grad_norm": 1.1328125, "learning_rate": 0.0013574607882783074, "loss": 0.1325, "step": 39732 }, { "epoch": 0.07045166621009788, "grad_norm": 0.4609375, "learning_rate": 0.001357403756383747, "loss": 0.1762, "step": 39734 }, { "epoch": 0.0704552123754077, "grad_norm": 0.76953125, "learning_rate": 0.0013573467233634518, "loss": 0.2099, "step": 39736 }, { "epoch": 0.07045875854071751, "grad_norm": 0.431640625, "learning_rate": 0.0013572896892176717, "loss": 0.4089, "step": 39738 }, { "epoch": 0.07046230470602732, "grad_norm": 0.73046875, "learning_rate": 0.001357232653946656, "loss": 0.2257, "step": 39740 }, { "epoch": 0.07046585087133714, "grad_norm": 1.2421875, "learning_rate": 0.0013571756175506542, "loss": 0.2263, "step": 39742 }, { "epoch": 0.07046939703664695, "grad_norm": 1.0703125, "learning_rate": 0.0013571185800299154, "loss": 0.2004, "step": 39744 }, { "epoch": 0.07047294320195678, "grad_norm": 0.1884765625, "learning_rate": 0.0013570615413846894, "loss": 0.1677, "step": 39746 }, { "epoch": 0.0704764893672666, "grad_norm": 0.29296875, "learning_rate": 0.0013570045016152257, "loss": 0.2597, "step": 39748 }, { "epoch": 0.07048003553257641, "grad_norm": 0.349609375, "learning_rate": 0.001356947460721773, "loss": 0.2302, "step": 39750 }, { "epoch": 0.07048358169788622, "grad_norm": 0.49609375, "learning_rate": 0.0013568904187045818, "loss": 0.2152, "step": 39752 }, { "epoch": 0.07048712786319604, "grad_norm": 0.5, "learning_rate": 0.0013568333755639012, "loss": 0.1797, "step": 39754 }, { "epoch": 0.07049067402850585, "grad_norm": 0.58984375, "learning_rate": 0.0013567763312999806, "loss": 0.2028, "step": 39756 }, { "epoch": 0.07049422019381567, "grad_norm": 0.283203125, "learning_rate": 0.0013567192859130692, "loss": 0.1669, "step": 39758 }, { "epoch": 0.07049776635912548, "grad_norm": 0.314453125, "learning_rate": 0.0013566622394034172, "loss": 0.2858, "step": 39760 }, { "epoch": 0.0705013125244353, "grad_norm": 0.302734375, "learning_rate": 0.0013566051917712734, "loss": 0.2104, "step": 39762 }, { "epoch": 0.07050485868974511, "grad_norm": 0.31640625, "learning_rate": 0.0013565481430168876, "loss": 0.1802, "step": 39764 }, { "epoch": 0.07050840485505493, "grad_norm": 0.8203125, "learning_rate": 0.001356491093140509, "loss": 0.2437, "step": 39766 }, { "epoch": 0.07051195102036474, "grad_norm": 1.21875, "learning_rate": 0.0013564340421423876, "loss": 0.2587, "step": 39768 }, { "epoch": 0.07051549718567456, "grad_norm": 0.400390625, "learning_rate": 0.0013563769900227724, "loss": 0.1474, "step": 39770 }, { "epoch": 0.07051904335098437, "grad_norm": 0.447265625, "learning_rate": 0.0013563199367819135, "loss": 0.218, "step": 39772 }, { "epoch": 0.07052258951629418, "grad_norm": 0.84765625, "learning_rate": 0.0013562628824200598, "loss": 0.2109, "step": 39774 }, { "epoch": 0.070526135681604, "grad_norm": 2.25, "learning_rate": 0.0013562058269374613, "loss": 0.4406, "step": 39776 }, { "epoch": 0.07052968184691381, "grad_norm": 0.322265625, "learning_rate": 0.0013561487703343672, "loss": 0.2028, "step": 39778 }, { "epoch": 0.07053322801222363, "grad_norm": 0.4765625, "learning_rate": 0.0013560917126110273, "loss": 0.1721, "step": 39780 }, { "epoch": 0.07053677417753344, "grad_norm": 1.0, "learning_rate": 0.0013560346537676911, "loss": 0.1861, "step": 39782 }, { "epoch": 0.07054032034284326, "grad_norm": 0.828125, "learning_rate": 0.0013559775938046077, "loss": 0.3128, "step": 39784 }, { "epoch": 0.07054386650815307, "grad_norm": 0.353515625, "learning_rate": 0.0013559205327220271, "loss": 0.1694, "step": 39786 }, { "epoch": 0.07054741267346289, "grad_norm": 0.3046875, "learning_rate": 0.0013558634705201989, "loss": 0.1614, "step": 39788 }, { "epoch": 0.0705509588387727, "grad_norm": 0.453125, "learning_rate": 0.0013558064071993723, "loss": 0.1783, "step": 39790 }, { "epoch": 0.07055450500408253, "grad_norm": 0.68359375, "learning_rate": 0.0013557493427597974, "loss": 0.1892, "step": 39792 }, { "epoch": 0.07055805116939234, "grad_norm": 0.431640625, "learning_rate": 0.0013556922772017233, "loss": 0.1349, "step": 39794 }, { "epoch": 0.07056159733470216, "grad_norm": 4.40625, "learning_rate": 0.0013556352105253992, "loss": 0.2353, "step": 39796 }, { "epoch": 0.07056514350001197, "grad_norm": 0.2158203125, "learning_rate": 0.0013555781427310757, "loss": 0.3057, "step": 39798 }, { "epoch": 0.07056868966532179, "grad_norm": 0.56640625, "learning_rate": 0.001355521073819002, "loss": 0.1646, "step": 39800 }, { "epoch": 0.0705722358306316, "grad_norm": 0.47265625, "learning_rate": 0.0013554640037894273, "loss": 0.2572, "step": 39802 }, { "epoch": 0.07057578199594142, "grad_norm": 1.9140625, "learning_rate": 0.0013554069326426016, "loss": 0.278, "step": 39804 }, { "epoch": 0.07057932816125123, "grad_norm": 0.20703125, "learning_rate": 0.0013553498603787741, "loss": 0.1884, "step": 39806 }, { "epoch": 0.07058287432656105, "grad_norm": 0.314453125, "learning_rate": 0.0013552927869981946, "loss": 0.1953, "step": 39808 }, { "epoch": 0.07058642049187086, "grad_norm": 0.318359375, "learning_rate": 0.0013552357125011132, "loss": 0.1982, "step": 39810 }, { "epoch": 0.07058996665718067, "grad_norm": 0.369140625, "learning_rate": 0.0013551786368877783, "loss": 0.1608, "step": 39812 }, { "epoch": 0.07059351282249049, "grad_norm": 0.54296875, "learning_rate": 0.001355121560158441, "loss": 0.1723, "step": 39814 }, { "epoch": 0.0705970589878003, "grad_norm": 0.2314453125, "learning_rate": 0.00135506448231335, "loss": 0.2112, "step": 39816 }, { "epoch": 0.07060060515311012, "grad_norm": 0.5546875, "learning_rate": 0.001355007403352755, "loss": 0.1876, "step": 39818 }, { "epoch": 0.07060415131841993, "grad_norm": 0.296875, "learning_rate": 0.0013549503232769056, "loss": 0.1794, "step": 39820 }, { "epoch": 0.07060769748372975, "grad_norm": 0.39453125, "learning_rate": 0.0013548932420860518, "loss": 0.2297, "step": 39822 }, { "epoch": 0.07061124364903956, "grad_norm": 0.271484375, "learning_rate": 0.0013548361597804429, "loss": 0.1717, "step": 39824 }, { "epoch": 0.07061478981434938, "grad_norm": 0.5234375, "learning_rate": 0.0013547790763603286, "loss": 0.1983, "step": 39826 }, { "epoch": 0.07061833597965919, "grad_norm": 0.4375, "learning_rate": 0.0013547219918259585, "loss": 0.2296, "step": 39828 }, { "epoch": 0.070621882144969, "grad_norm": 1.828125, "learning_rate": 0.0013546649061775828, "loss": 0.2149, "step": 39830 }, { "epoch": 0.07062542831027882, "grad_norm": 0.490234375, "learning_rate": 0.0013546078194154505, "loss": 0.1873, "step": 39832 }, { "epoch": 0.07062897447558864, "grad_norm": 0.7421875, "learning_rate": 0.0013545507315398115, "loss": 0.1988, "step": 39834 }, { "epoch": 0.07063252064089845, "grad_norm": 0.4453125, "learning_rate": 0.0013544936425509155, "loss": 0.1657, "step": 39836 }, { "epoch": 0.07063606680620828, "grad_norm": 0.29296875, "learning_rate": 0.0013544365524490124, "loss": 0.232, "step": 39838 }, { "epoch": 0.07063961297151809, "grad_norm": 0.333984375, "learning_rate": 0.001354379461234351, "loss": 0.1607, "step": 39840 }, { "epoch": 0.07064315913682791, "grad_norm": 0.61328125, "learning_rate": 0.001354322368907182, "loss": 0.1918, "step": 39842 }, { "epoch": 0.07064670530213772, "grad_norm": 0.474609375, "learning_rate": 0.0013542652754677544, "loss": 0.2861, "step": 39844 }, { "epoch": 0.07065025146744754, "grad_norm": 0.66015625, "learning_rate": 0.0013542081809163188, "loss": 0.1896, "step": 39846 }, { "epoch": 0.07065379763275735, "grad_norm": 1.3046875, "learning_rate": 0.0013541510852531236, "loss": 0.2449, "step": 39848 }, { "epoch": 0.07065734379806717, "grad_norm": 0.4375, "learning_rate": 0.0013540939884784196, "loss": 0.2238, "step": 39850 }, { "epoch": 0.07066088996337698, "grad_norm": 0.75, "learning_rate": 0.0013540368905924556, "loss": 0.2319, "step": 39852 }, { "epoch": 0.0706644361286868, "grad_norm": 0.62890625, "learning_rate": 0.001353979791595482, "loss": 0.1616, "step": 39854 }, { "epoch": 0.07066798229399661, "grad_norm": 0.83984375, "learning_rate": 0.0013539226914877488, "loss": 0.1497, "step": 39856 }, { "epoch": 0.07067152845930642, "grad_norm": 0.392578125, "learning_rate": 0.001353865590269505, "loss": 0.143, "step": 39858 }, { "epoch": 0.07067507462461624, "grad_norm": 0.392578125, "learning_rate": 0.0013538084879410003, "loss": 0.1695, "step": 39860 }, { "epoch": 0.07067862078992605, "grad_norm": 0.6640625, "learning_rate": 0.001353751384502485, "loss": 0.1952, "step": 39862 }, { "epoch": 0.07068216695523587, "grad_norm": 0.4765625, "learning_rate": 0.0013536942799542082, "loss": 0.1807, "step": 39864 }, { "epoch": 0.07068571312054568, "grad_norm": 0.412109375, "learning_rate": 0.0013536371742964202, "loss": 0.2531, "step": 39866 }, { "epoch": 0.0706892592858555, "grad_norm": 0.435546875, "learning_rate": 0.0013535800675293706, "loss": 0.1658, "step": 39868 }, { "epoch": 0.07069280545116531, "grad_norm": 0.443359375, "learning_rate": 0.0013535229596533087, "loss": 0.2321, "step": 39870 }, { "epoch": 0.07069635161647513, "grad_norm": 0.419921875, "learning_rate": 0.0013534658506684852, "loss": 0.2076, "step": 39872 }, { "epoch": 0.07069989778178494, "grad_norm": 0.53515625, "learning_rate": 0.0013534087405751491, "loss": 0.4276, "step": 39874 }, { "epoch": 0.07070344394709475, "grad_norm": 0.345703125, "learning_rate": 0.0013533516293735505, "loss": 0.2574, "step": 39876 }, { "epoch": 0.07070699011240457, "grad_norm": 0.404296875, "learning_rate": 0.0013532945170639384, "loss": 0.1819, "step": 39878 }, { "epoch": 0.07071053627771438, "grad_norm": 0.48046875, "learning_rate": 0.0013532374036465638, "loss": 0.1842, "step": 39880 }, { "epoch": 0.07071408244302421, "grad_norm": 0.5859375, "learning_rate": 0.0013531802891216759, "loss": 0.1948, "step": 39882 }, { "epoch": 0.07071762860833403, "grad_norm": 0.2890625, "learning_rate": 0.001353123173489524, "loss": 0.1846, "step": 39884 }, { "epoch": 0.07072117477364384, "grad_norm": 0.2470703125, "learning_rate": 0.0013530660567503587, "loss": 0.2093, "step": 39886 }, { "epoch": 0.07072472093895366, "grad_norm": 0.306640625, "learning_rate": 0.0013530089389044296, "loss": 0.1894, "step": 39888 }, { "epoch": 0.07072826710426347, "grad_norm": 1.8125, "learning_rate": 0.001352951819951986, "loss": 0.2638, "step": 39890 }, { "epoch": 0.07073181326957328, "grad_norm": 0.5625, "learning_rate": 0.0013528946998932786, "loss": 0.1793, "step": 39892 }, { "epoch": 0.0707353594348831, "grad_norm": 0.55078125, "learning_rate": 0.0013528375787285566, "loss": 0.1712, "step": 39894 }, { "epoch": 0.07073890560019291, "grad_norm": 0.408203125, "learning_rate": 0.0013527804564580695, "loss": 0.1586, "step": 39896 }, { "epoch": 0.07074245176550273, "grad_norm": 0.271484375, "learning_rate": 0.0013527233330820677, "loss": 0.1525, "step": 39898 }, { "epoch": 0.07074599793081254, "grad_norm": 0.7109375, "learning_rate": 0.001352666208600801, "loss": 0.2459, "step": 39900 }, { "epoch": 0.07074954409612236, "grad_norm": 0.8515625, "learning_rate": 0.0013526090830145187, "loss": 0.1932, "step": 39902 }, { "epoch": 0.07075309026143217, "grad_norm": 0.349609375, "learning_rate": 0.0013525519563234712, "loss": 0.203, "step": 39904 }, { "epoch": 0.07075663642674199, "grad_norm": 0.67578125, "learning_rate": 0.0013524948285279083, "loss": 0.2397, "step": 39906 }, { "epoch": 0.0707601825920518, "grad_norm": 1.6328125, "learning_rate": 0.0013524376996280794, "loss": 0.2204, "step": 39908 }, { "epoch": 0.07076372875736162, "grad_norm": 0.6171875, "learning_rate": 0.0013523805696242347, "loss": 0.2321, "step": 39910 }, { "epoch": 0.07076727492267143, "grad_norm": 0.265625, "learning_rate": 0.0013523234385166242, "loss": 0.2023, "step": 39912 }, { "epoch": 0.07077082108798124, "grad_norm": 0.2578125, "learning_rate": 0.0013522663063054974, "loss": 0.2345, "step": 39914 }, { "epoch": 0.07077436725329106, "grad_norm": 0.54296875, "learning_rate": 0.0013522091729911043, "loss": 0.1223, "step": 39916 }, { "epoch": 0.07077791341860087, "grad_norm": 0.439453125, "learning_rate": 0.0013521520385736947, "loss": 0.1566, "step": 39918 }, { "epoch": 0.07078145958391069, "grad_norm": 0.609375, "learning_rate": 0.0013520949030535188, "loss": 0.2546, "step": 39920 }, { "epoch": 0.0707850057492205, "grad_norm": 0.63671875, "learning_rate": 0.0013520377664308259, "loss": 0.1791, "step": 39922 }, { "epoch": 0.07078855191453032, "grad_norm": 0.2236328125, "learning_rate": 0.0013519806287058665, "loss": 0.1919, "step": 39924 }, { "epoch": 0.07079209807984013, "grad_norm": 0.2412109375, "learning_rate": 0.00135192348987889, "loss": 0.2205, "step": 39926 }, { "epoch": 0.07079564424514996, "grad_norm": 0.66796875, "learning_rate": 0.0013518663499501467, "loss": 0.1903, "step": 39928 }, { "epoch": 0.07079919041045978, "grad_norm": 2.140625, "learning_rate": 0.0013518092089198862, "loss": 0.1992, "step": 39930 }, { "epoch": 0.07080273657576959, "grad_norm": 0.609375, "learning_rate": 0.0013517520667883586, "loss": 0.1736, "step": 39932 }, { "epoch": 0.0708062827410794, "grad_norm": 0.65625, "learning_rate": 0.0013516949235558133, "loss": 0.2434, "step": 39934 }, { "epoch": 0.07080982890638922, "grad_norm": 0.365234375, "learning_rate": 0.001351637779222501, "loss": 0.2039, "step": 39936 }, { "epoch": 0.07081337507169903, "grad_norm": 0.38671875, "learning_rate": 0.0013515806337886709, "loss": 0.2317, "step": 39938 }, { "epoch": 0.07081692123700885, "grad_norm": 0.330078125, "learning_rate": 0.0013515234872545734, "loss": 0.1936, "step": 39940 }, { "epoch": 0.07082046740231866, "grad_norm": 0.271484375, "learning_rate": 0.0013514663396204582, "loss": 0.1989, "step": 39942 }, { "epoch": 0.07082401356762848, "grad_norm": 0.75390625, "learning_rate": 0.0013514091908865755, "loss": 0.2187, "step": 39944 }, { "epoch": 0.07082755973293829, "grad_norm": 0.412109375, "learning_rate": 0.0013513520410531749, "loss": 0.2441, "step": 39946 }, { "epoch": 0.0708311058982481, "grad_norm": 0.55859375, "learning_rate": 0.0013512948901205064, "loss": 0.2104, "step": 39948 }, { "epoch": 0.07083465206355792, "grad_norm": 0.412109375, "learning_rate": 0.0013512377380888202, "loss": 0.2011, "step": 39950 }, { "epoch": 0.07083819822886774, "grad_norm": 0.5703125, "learning_rate": 0.0013511805849583663, "loss": 0.1808, "step": 39952 }, { "epoch": 0.07084174439417755, "grad_norm": 0.94140625, "learning_rate": 0.0013511234307293939, "loss": 0.1694, "step": 39954 }, { "epoch": 0.07084529055948736, "grad_norm": 0.73046875, "learning_rate": 0.0013510662754021537, "loss": 0.2588, "step": 39956 }, { "epoch": 0.07084883672479718, "grad_norm": 2.921875, "learning_rate": 0.0013510091189768954, "loss": 0.4586, "step": 39958 }, { "epoch": 0.070852382890107, "grad_norm": 1.59375, "learning_rate": 0.0013509519614538688, "loss": 0.2665, "step": 39960 }, { "epoch": 0.07085592905541681, "grad_norm": 0.486328125, "learning_rate": 0.0013508948028333244, "loss": 0.2802, "step": 39962 }, { "epoch": 0.07085947522072662, "grad_norm": 0.78515625, "learning_rate": 0.001350837643115512, "loss": 0.1708, "step": 39964 }, { "epoch": 0.07086302138603644, "grad_norm": 2.109375, "learning_rate": 0.0013507804823006814, "loss": 0.3963, "step": 39966 }, { "epoch": 0.07086656755134625, "grad_norm": 0.45703125, "learning_rate": 0.0013507233203890823, "loss": 0.1948, "step": 39968 }, { "epoch": 0.07087011371665607, "grad_norm": 0.47265625, "learning_rate": 0.0013506661573809654, "loss": 0.2886, "step": 39970 }, { "epoch": 0.07087365988196588, "grad_norm": 0.388671875, "learning_rate": 0.0013506089932765802, "loss": 0.1551, "step": 39972 }, { "epoch": 0.07087720604727571, "grad_norm": 0.47265625, "learning_rate": 0.0013505518280761768, "loss": 0.2885, "step": 39974 }, { "epoch": 0.07088075221258552, "grad_norm": 0.41015625, "learning_rate": 0.0013504946617800052, "loss": 0.2086, "step": 39976 }, { "epoch": 0.07088429837789534, "grad_norm": 0.67578125, "learning_rate": 0.0013504374943883156, "loss": 0.1693, "step": 39978 }, { "epoch": 0.07088784454320515, "grad_norm": 0.26171875, "learning_rate": 0.0013503803259013575, "loss": 0.1964, "step": 39980 }, { "epoch": 0.07089139070851497, "grad_norm": 0.57421875, "learning_rate": 0.0013503231563193815, "loss": 0.2173, "step": 39982 }, { "epoch": 0.07089493687382478, "grad_norm": 1.5703125, "learning_rate": 0.0013502659856426378, "loss": 0.1788, "step": 39984 }, { "epoch": 0.0708984830391346, "grad_norm": 2.171875, "learning_rate": 0.0013502088138713756, "loss": 0.281, "step": 39986 }, { "epoch": 0.07090202920444441, "grad_norm": 1.28125, "learning_rate": 0.0013501516410058454, "loss": 0.1778, "step": 39988 }, { "epoch": 0.07090557536975423, "grad_norm": 0.54296875, "learning_rate": 0.0013500944670462975, "loss": 0.2102, "step": 39990 }, { "epoch": 0.07090912153506404, "grad_norm": 0.4765625, "learning_rate": 0.0013500372919929816, "loss": 0.1924, "step": 39992 }, { "epoch": 0.07091266770037385, "grad_norm": 0.66015625, "learning_rate": 0.0013499801158461478, "loss": 0.2229, "step": 39994 }, { "epoch": 0.07091621386568367, "grad_norm": 0.58984375, "learning_rate": 0.001349922938606046, "loss": 0.2235, "step": 39996 }, { "epoch": 0.07091976003099348, "grad_norm": 0.498046875, "learning_rate": 0.0013498657602729267, "loss": 0.2502, "step": 39998 }, { "epoch": 0.0709233061963033, "grad_norm": 0.7578125, "learning_rate": 0.0013498085808470394, "loss": 0.1928, "step": 40000 } ], "logging_steps": 2, "max_steps": 96010, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.3133692313300173e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }