data4elm-SLaM-submission / trainer_state.json
lwhalen7's picture
Migrate LoRA adapter from dataset repository
f94d935 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 13563,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0014746000147460002,
"grad_norm": 0.4404066503047943,
"learning_rate": 9.985991299859913e-06,
"loss": 2.4466,
"step": 20
},
{
"epoch": 0.0029492000294920003,
"grad_norm": 0.5653194785118103,
"learning_rate": 9.971245299712454e-06,
"loss": 2.4307,
"step": 40
},
{
"epoch": 0.004423800044238001,
"grad_norm": 0.4618721902370453,
"learning_rate": 9.956499299564993e-06,
"loss": 2.4367,
"step": 60
},
{
"epoch": 0.005898400058984001,
"grad_norm": 0.4391142427921295,
"learning_rate": 9.941753299417534e-06,
"loss": 2.4183,
"step": 80
},
{
"epoch": 0.007373000073730001,
"grad_norm": 0.474587619304657,
"learning_rate": 9.927007299270073e-06,
"loss": 2.42,
"step": 100
},
{
"epoch": 0.008847600088476001,
"grad_norm": 0.420256644487381,
"learning_rate": 9.912261299122614e-06,
"loss": 2.4164,
"step": 120
},
{
"epoch": 0.010322200103222,
"grad_norm": 0.40069517493247986,
"learning_rate": 9.897515298975153e-06,
"loss": 2.3962,
"step": 140
},
{
"epoch": 0.011796800117968001,
"grad_norm": 0.36043497920036316,
"learning_rate": 9.882769298827694e-06,
"loss": 2.4026,
"step": 160
},
{
"epoch": 0.013271400132714002,
"grad_norm": 0.354769229888916,
"learning_rate": 9.868023298680233e-06,
"loss": 2.4013,
"step": 180
},
{
"epoch": 0.014746000147460001,
"grad_norm": 0.40864863991737366,
"learning_rate": 9.853277298532774e-06,
"loss": 2.4077,
"step": 200
},
{
"epoch": 0.016220600162206,
"grad_norm": 0.37636885046958923,
"learning_rate": 9.838531298385315e-06,
"loss": 2.4058,
"step": 220
},
{
"epoch": 0.017695200176952003,
"grad_norm": 0.37770289182662964,
"learning_rate": 9.823785298237854e-06,
"loss": 2.3789,
"step": 240
},
{
"epoch": 0.019169800191698002,
"grad_norm": 0.4997946321964264,
"learning_rate": 9.809039298090393e-06,
"loss": 2.3921,
"step": 260
},
{
"epoch": 0.020644400206444,
"grad_norm": 0.42146745324134827,
"learning_rate": 9.794293297942934e-06,
"loss": 2.389,
"step": 280
},
{
"epoch": 0.022119000221190004,
"grad_norm": 0.31817182898521423,
"learning_rate": 9.779547297795475e-06,
"loss": 2.3969,
"step": 300
},
{
"epoch": 0.023593600235936003,
"grad_norm": 0.41462913155555725,
"learning_rate": 9.764801297648014e-06,
"loss": 2.3903,
"step": 320
},
{
"epoch": 0.025068200250682002,
"grad_norm": 0.35277777910232544,
"learning_rate": 9.750055297500553e-06,
"loss": 2.3886,
"step": 340
},
{
"epoch": 0.026542800265428004,
"grad_norm": 0.39531123638153076,
"learning_rate": 9.735309297353094e-06,
"loss": 2.3847,
"step": 360
},
{
"epoch": 0.028017400280174003,
"grad_norm": 0.3687122166156769,
"learning_rate": 9.720563297205633e-06,
"loss": 2.3824,
"step": 380
},
{
"epoch": 0.029492000294920002,
"grad_norm": 0.5362212061882019,
"learning_rate": 9.705817297058172e-06,
"loss": 2.3861,
"step": 400
},
{
"epoch": 0.030966600309666,
"grad_norm": 0.5793041586875916,
"learning_rate": 9.691071296910713e-06,
"loss": 2.4069,
"step": 420
},
{
"epoch": 0.032441200324412,
"grad_norm": 0.3618432581424713,
"learning_rate": 9.676325296763254e-06,
"loss": 2.379,
"step": 440
},
{
"epoch": 0.033915800339158,
"grad_norm": 0.39441928267478943,
"learning_rate": 9.661579296615793e-06,
"loss": 2.3878,
"step": 460
},
{
"epoch": 0.035390400353904006,
"grad_norm": 0.3998846113681793,
"learning_rate": 9.646833296468334e-06,
"loss": 2.4028,
"step": 480
},
{
"epoch": 0.03686500036865,
"grad_norm": 0.354809045791626,
"learning_rate": 9.632087296320873e-06,
"loss": 2.384,
"step": 500
},
{
"epoch": 0.038339600383396004,
"grad_norm": 0.6007148623466492,
"learning_rate": 9.617341296173414e-06,
"loss": 2.3974,
"step": 520
},
{
"epoch": 0.039814200398142006,
"grad_norm": 0.37758493423461914,
"learning_rate": 9.602595296025953e-06,
"loss": 2.3762,
"step": 540
},
{
"epoch": 0.041288800412888,
"grad_norm": 0.4141014516353607,
"learning_rate": 9.587849295878494e-06,
"loss": 2.3831,
"step": 560
},
{
"epoch": 0.042763400427634005,
"grad_norm": 0.48960772156715393,
"learning_rate": 9.573103295731033e-06,
"loss": 2.3961,
"step": 580
},
{
"epoch": 0.04423800044238001,
"grad_norm": 0.3811470568180084,
"learning_rate": 9.558357295583574e-06,
"loss": 2.3751,
"step": 600
},
{
"epoch": 0.045712600457126,
"grad_norm": 0.35424861311912537,
"learning_rate": 9.543611295436113e-06,
"loss": 2.3725,
"step": 620
},
{
"epoch": 0.047187200471872005,
"grad_norm": 0.36190420389175415,
"learning_rate": 9.528865295288654e-06,
"loss": 2.3944,
"step": 640
},
{
"epoch": 0.04866180048661801,
"grad_norm": 0.42110538482666016,
"learning_rate": 9.514119295141195e-06,
"loss": 2.3909,
"step": 660
},
{
"epoch": 0.050136400501364004,
"grad_norm": 0.4361230134963989,
"learning_rate": 9.499373294993734e-06,
"loss": 2.3752,
"step": 680
},
{
"epoch": 0.051611000516110006,
"grad_norm": 0.3302260935306549,
"learning_rate": 9.484627294846273e-06,
"loss": 2.3699,
"step": 700
},
{
"epoch": 0.05308560053085601,
"grad_norm": 0.3521312177181244,
"learning_rate": 9.469881294698814e-06,
"loss": 2.3772,
"step": 720
},
{
"epoch": 0.054560200545602004,
"grad_norm": 0.3700699806213379,
"learning_rate": 9.455135294551355e-06,
"loss": 2.3751,
"step": 740
},
{
"epoch": 0.05603480056034801,
"grad_norm": 0.3583256006240845,
"learning_rate": 9.440389294403894e-06,
"loss": 2.3675,
"step": 760
},
{
"epoch": 0.057509400575094,
"grad_norm": 0.33290043473243713,
"learning_rate": 9.425643294256433e-06,
"loss": 2.3784,
"step": 780
},
{
"epoch": 0.058984000589840005,
"grad_norm": 0.3490481674671173,
"learning_rate": 9.410897294108974e-06,
"loss": 2.3597,
"step": 800
},
{
"epoch": 0.06045860060458601,
"grad_norm": 0.479775071144104,
"learning_rate": 9.396151293961515e-06,
"loss": 2.3852,
"step": 820
},
{
"epoch": 0.061933200619332,
"grad_norm": 0.36794590950012207,
"learning_rate": 9.381405293814054e-06,
"loss": 2.3748,
"step": 840
},
{
"epoch": 0.06340780063407801,
"grad_norm": 0.38288605213165283,
"learning_rate": 9.366659293666593e-06,
"loss": 2.3673,
"step": 860
},
{
"epoch": 0.064882400648824,
"grad_norm": 0.40629106760025024,
"learning_rate": 9.351913293519134e-06,
"loss": 2.3906,
"step": 880
},
{
"epoch": 0.06635700066357,
"grad_norm": 0.3594074249267578,
"learning_rate": 9.337167293371675e-06,
"loss": 2.3773,
"step": 900
},
{
"epoch": 0.067831600678316,
"grad_norm": 0.463144451379776,
"learning_rate": 9.322421293224214e-06,
"loss": 2.3846,
"step": 920
},
{
"epoch": 0.06930620069306201,
"grad_norm": 0.35561177134513855,
"learning_rate": 9.307675293076753e-06,
"loss": 2.3723,
"step": 940
},
{
"epoch": 0.07078080070780801,
"grad_norm": 0.3446957767009735,
"learning_rate": 9.292929292929294e-06,
"loss": 2.4011,
"step": 960
},
{
"epoch": 0.07225540072255401,
"grad_norm": 0.5307037830352783,
"learning_rate": 9.278183292781835e-06,
"loss": 2.3747,
"step": 980
},
{
"epoch": 0.0737300007373,
"grad_norm": 0.3605501055717468,
"learning_rate": 9.263437292634374e-06,
"loss": 2.3523,
"step": 1000
},
{
"epoch": 0.075204600752046,
"grad_norm": 0.3705900013446808,
"learning_rate": 9.248691292486913e-06,
"loss": 2.3693,
"step": 1020
},
{
"epoch": 0.07667920076679201,
"grad_norm": 0.3397590219974518,
"learning_rate": 9.233945292339454e-06,
"loss": 2.362,
"step": 1040
},
{
"epoch": 0.07815380078153801,
"grad_norm": 0.32325974106788635,
"learning_rate": 9.219199292191993e-06,
"loss": 2.3863,
"step": 1060
},
{
"epoch": 0.07962840079628401,
"grad_norm": 0.3661843240261078,
"learning_rate": 9.204453292044534e-06,
"loss": 2.3769,
"step": 1080
},
{
"epoch": 0.08110300081103,
"grad_norm": 0.3777139186859131,
"learning_rate": 9.189707291897075e-06,
"loss": 2.3626,
"step": 1100
},
{
"epoch": 0.082577600825776,
"grad_norm": 0.37038654088974,
"learning_rate": 9.174961291749614e-06,
"loss": 2.3895,
"step": 1120
},
{
"epoch": 0.084052200840522,
"grad_norm": 0.3628358542919159,
"learning_rate": 9.160215291602153e-06,
"loss": 2.3649,
"step": 1140
},
{
"epoch": 0.08552680085526801,
"grad_norm": 0.3579985201358795,
"learning_rate": 9.145469291454694e-06,
"loss": 2.3696,
"step": 1160
},
{
"epoch": 0.08700140087001401,
"grad_norm": 0.42436483502388,
"learning_rate": 9.130723291307235e-06,
"loss": 2.3845,
"step": 1180
},
{
"epoch": 0.08847600088476001,
"grad_norm": 0.3851209580898285,
"learning_rate": 9.115977291159774e-06,
"loss": 2.36,
"step": 1200
},
{
"epoch": 0.089950600899506,
"grad_norm": 0.5885825157165527,
"learning_rate": 9.101231291012313e-06,
"loss": 2.3776,
"step": 1220
},
{
"epoch": 0.091425200914252,
"grad_norm": 0.42304036021232605,
"learning_rate": 9.086485290864854e-06,
"loss": 2.3632,
"step": 1240
},
{
"epoch": 0.09289980092899801,
"grad_norm": 0.4534706473350525,
"learning_rate": 9.071739290717394e-06,
"loss": 2.3531,
"step": 1260
},
{
"epoch": 0.09437440094374401,
"grad_norm": 0.38540026545524597,
"learning_rate": 9.056993290569934e-06,
"loss": 2.3755,
"step": 1280
},
{
"epoch": 0.09584900095849001,
"grad_norm": 0.38401320576667786,
"learning_rate": 9.042247290422473e-06,
"loss": 2.3614,
"step": 1300
},
{
"epoch": 0.09732360097323602,
"grad_norm": 0.35363873839378357,
"learning_rate": 9.027501290275014e-06,
"loss": 2.3655,
"step": 1320
},
{
"epoch": 0.098798200987982,
"grad_norm": 0.36643800139427185,
"learning_rate": 9.012755290127554e-06,
"loss": 2.369,
"step": 1340
},
{
"epoch": 0.10027280100272801,
"grad_norm": 0.3596145212650299,
"learning_rate": 8.998009289980094e-06,
"loss": 2.3703,
"step": 1360
},
{
"epoch": 0.10174740101747401,
"grad_norm": 0.554706871509552,
"learning_rate": 8.983263289832633e-06,
"loss": 2.3631,
"step": 1380
},
{
"epoch": 0.10322200103222001,
"grad_norm": 0.3995848298072815,
"learning_rate": 8.968517289685174e-06,
"loss": 2.3671,
"step": 1400
},
{
"epoch": 0.10469660104696601,
"grad_norm": 0.3653299808502197,
"learning_rate": 8.953771289537714e-06,
"loss": 2.3783,
"step": 1420
},
{
"epoch": 0.10617120106171202,
"grad_norm": 0.39819827675819397,
"learning_rate": 8.939025289390254e-06,
"loss": 2.3617,
"step": 1440
},
{
"epoch": 0.107645801076458,
"grad_norm": 0.3512992262840271,
"learning_rate": 8.924279289242793e-06,
"loss": 2.3649,
"step": 1460
},
{
"epoch": 0.10912040109120401,
"grad_norm": 0.43283718824386597,
"learning_rate": 8.909533289095333e-06,
"loss": 2.3624,
"step": 1480
},
{
"epoch": 0.11059500110595001,
"grad_norm": 0.3857913613319397,
"learning_rate": 8.894787288947874e-06,
"loss": 2.3686,
"step": 1500
},
{
"epoch": 0.11206960112069601,
"grad_norm": 0.3893970251083374,
"learning_rate": 8.880041288800413e-06,
"loss": 2.3722,
"step": 1520
},
{
"epoch": 0.11354420113544202,
"grad_norm": 0.6659526228904724,
"learning_rate": 8.865295288652953e-06,
"loss": 2.347,
"step": 1540
},
{
"epoch": 0.115018801150188,
"grad_norm": 0.3801191449165344,
"learning_rate": 8.850549288505493e-06,
"loss": 2.3595,
"step": 1560
},
{
"epoch": 0.11649340116493401,
"grad_norm": 0.3556024432182312,
"learning_rate": 8.835803288358034e-06,
"loss": 2.3597,
"step": 1580
},
{
"epoch": 0.11796800117968001,
"grad_norm": 0.45088592171669006,
"learning_rate": 8.821057288210573e-06,
"loss": 2.3623,
"step": 1600
},
{
"epoch": 0.11944260119442601,
"grad_norm": 0.3952132761478424,
"learning_rate": 8.806311288063114e-06,
"loss": 2.3616,
"step": 1620
},
{
"epoch": 0.12091720120917201,
"grad_norm": 0.4083469808101654,
"learning_rate": 8.791565287915653e-06,
"loss": 2.3597,
"step": 1640
},
{
"epoch": 0.12239180122391802,
"grad_norm": 0.374976247549057,
"learning_rate": 8.776819287768194e-06,
"loss": 2.3656,
"step": 1660
},
{
"epoch": 0.123866401238664,
"grad_norm": 0.37194111943244934,
"learning_rate": 8.762073287620733e-06,
"loss": 2.368,
"step": 1680
},
{
"epoch": 0.12534100125341002,
"grad_norm": 0.35863035917282104,
"learning_rate": 8.747327287473274e-06,
"loss": 2.3648,
"step": 1700
},
{
"epoch": 0.12681560126815603,
"grad_norm": 0.43522682785987854,
"learning_rate": 8.732581287325813e-06,
"loss": 2.3663,
"step": 1720
},
{
"epoch": 0.128290201282902,
"grad_norm": 0.3582712411880493,
"learning_rate": 8.717835287178354e-06,
"loss": 2.3684,
"step": 1740
},
{
"epoch": 0.129764801297648,
"grad_norm": 0.40035149455070496,
"learning_rate": 8.703089287030893e-06,
"loss": 2.3679,
"step": 1760
},
{
"epoch": 0.131239401312394,
"grad_norm": 0.36125797033309937,
"learning_rate": 8.688343286883434e-06,
"loss": 2.3501,
"step": 1780
},
{
"epoch": 0.13271400132714,
"grad_norm": 0.3568665087223053,
"learning_rate": 8.673597286735973e-06,
"loss": 2.3594,
"step": 1800
},
{
"epoch": 0.134188601341886,
"grad_norm": 0.4135202169418335,
"learning_rate": 8.658851286588512e-06,
"loss": 2.387,
"step": 1820
},
{
"epoch": 0.135663201356632,
"grad_norm": 0.3586069345474243,
"learning_rate": 8.644105286441053e-06,
"loss": 2.3702,
"step": 1840
},
{
"epoch": 0.13713780137137802,
"grad_norm": 0.47354263067245483,
"learning_rate": 8.629359286293594e-06,
"loss": 2.3709,
"step": 1860
},
{
"epoch": 0.13861240138612402,
"grad_norm": 0.3735561668872833,
"learning_rate": 8.614613286146133e-06,
"loss": 2.3639,
"step": 1880
},
{
"epoch": 0.14008700140087002,
"grad_norm": 0.351646363735199,
"learning_rate": 8.599867285998672e-06,
"loss": 2.3678,
"step": 1900
},
{
"epoch": 0.14156160141561602,
"grad_norm": 0.3624926507472992,
"learning_rate": 8.585121285851213e-06,
"loss": 2.3725,
"step": 1920
},
{
"epoch": 0.14303620143036203,
"grad_norm": 0.41470956802368164,
"learning_rate": 8.570375285703754e-06,
"loss": 2.364,
"step": 1940
},
{
"epoch": 0.14451080144510803,
"grad_norm": 0.3892461657524109,
"learning_rate": 8.555629285556293e-06,
"loss": 2.3623,
"step": 1960
},
{
"epoch": 0.145985401459854,
"grad_norm": 0.37670454382896423,
"learning_rate": 8.540883285408832e-06,
"loss": 2.3649,
"step": 1980
},
{
"epoch": 0.1474600014746,
"grad_norm": 0.36948758363723755,
"learning_rate": 8.526137285261373e-06,
"loss": 2.3689,
"step": 2000
},
{
"epoch": 0.148934601489346,
"grad_norm": 0.3593231737613678,
"learning_rate": 8.511391285113914e-06,
"loss": 2.3693,
"step": 2020
},
{
"epoch": 0.150409201504092,
"grad_norm": 0.3645029366016388,
"learning_rate": 8.496645284966453e-06,
"loss": 2.3607,
"step": 2040
},
{
"epoch": 0.151883801518838,
"grad_norm": 0.761593759059906,
"learning_rate": 8.481899284818994e-06,
"loss": 2.362,
"step": 2060
},
{
"epoch": 0.15335840153358402,
"grad_norm": 0.3621225953102112,
"learning_rate": 8.467153284671533e-06,
"loss": 2.3566,
"step": 2080
},
{
"epoch": 0.15483300154833002,
"grad_norm": 0.353289395570755,
"learning_rate": 8.452407284524074e-06,
"loss": 2.3565,
"step": 2100
},
{
"epoch": 0.15630760156307602,
"grad_norm": 0.4106830060482025,
"learning_rate": 8.437661284376613e-06,
"loss": 2.3536,
"step": 2120
},
{
"epoch": 0.15778220157782202,
"grad_norm": 0.41519981622695923,
"learning_rate": 8.422915284229154e-06,
"loss": 2.3532,
"step": 2140
},
{
"epoch": 0.15925680159256803,
"grad_norm": 0.3569793701171875,
"learning_rate": 8.408169284081693e-06,
"loss": 2.3648,
"step": 2160
},
{
"epoch": 0.16073140160731403,
"grad_norm": 0.3916598856449127,
"learning_rate": 8.393423283934234e-06,
"loss": 2.3675,
"step": 2180
},
{
"epoch": 0.16220600162206,
"grad_norm": 0.3435116708278656,
"learning_rate": 8.378677283786773e-06,
"loss": 2.3685,
"step": 2200
},
{
"epoch": 0.163680601636806,
"grad_norm": 0.47191643714904785,
"learning_rate": 8.363931283639314e-06,
"loss": 2.3596,
"step": 2220
},
{
"epoch": 0.165155201651552,
"grad_norm": 0.54694002866745,
"learning_rate": 8.349185283491853e-06,
"loss": 2.3551,
"step": 2240
},
{
"epoch": 0.166629801666298,
"grad_norm": 0.9548392295837402,
"learning_rate": 8.334439283344394e-06,
"loss": 2.3784,
"step": 2260
},
{
"epoch": 0.168104401681044,
"grad_norm": 0.3765574097633362,
"learning_rate": 8.319693283196933e-06,
"loss": 2.3668,
"step": 2280
},
{
"epoch": 0.16957900169579002,
"grad_norm": 0.748131275177002,
"learning_rate": 8.304947283049474e-06,
"loss": 2.3492,
"step": 2300
},
{
"epoch": 0.17105360171053602,
"grad_norm": 0.3873535096645355,
"learning_rate": 8.290201282902015e-06,
"loss": 2.3592,
"step": 2320
},
{
"epoch": 0.17252820172528202,
"grad_norm": 0.3429403305053711,
"learning_rate": 8.275455282754554e-06,
"loss": 2.3688,
"step": 2340
},
{
"epoch": 0.17400280174002802,
"grad_norm": 0.3973939120769501,
"learning_rate": 8.260709282607093e-06,
"loss": 2.3666,
"step": 2360
},
{
"epoch": 0.17547740175477403,
"grad_norm": 0.35062074661254883,
"learning_rate": 8.245963282459634e-06,
"loss": 2.3534,
"step": 2380
},
{
"epoch": 0.17695200176952003,
"grad_norm": 0.3548290431499481,
"learning_rate": 8.231217282312175e-06,
"loss": 2.3477,
"step": 2400
},
{
"epoch": 0.17842660178426603,
"grad_norm": 0.4035143256187439,
"learning_rate": 8.216471282164714e-06,
"loss": 2.3601,
"step": 2420
},
{
"epoch": 0.179901201799012,
"grad_norm": 0.37481680512428284,
"learning_rate": 8.201725282017253e-06,
"loss": 2.3579,
"step": 2440
},
{
"epoch": 0.181375801813758,
"grad_norm": 0.5035015344619751,
"learning_rate": 8.186979281869794e-06,
"loss": 2.366,
"step": 2460
},
{
"epoch": 0.182850401828504,
"grad_norm": 0.3541349768638611,
"learning_rate": 8.172233281722333e-06,
"loss": 2.368,
"step": 2480
},
{
"epoch": 0.18432500184325,
"grad_norm": 0.3855954110622406,
"learning_rate": 8.157487281574874e-06,
"loss": 2.3542,
"step": 2500
},
{
"epoch": 0.18579960185799602,
"grad_norm": 0.4045879542827606,
"learning_rate": 8.142741281427413e-06,
"loss": 2.3579,
"step": 2520
},
{
"epoch": 0.18727420187274202,
"grad_norm": 0.3711334764957428,
"learning_rate": 8.127995281279954e-06,
"loss": 2.3711,
"step": 2540
},
{
"epoch": 0.18874880188748802,
"grad_norm": 0.3625940978527069,
"learning_rate": 8.113249281132493e-06,
"loss": 2.3573,
"step": 2560
},
{
"epoch": 0.19022340190223402,
"grad_norm": 0.6012184619903564,
"learning_rate": 8.098503280985034e-06,
"loss": 2.3705,
"step": 2580
},
{
"epoch": 0.19169800191698003,
"grad_norm": 0.40935376286506653,
"learning_rate": 8.083757280837573e-06,
"loss": 2.3615,
"step": 2600
},
{
"epoch": 0.19317260193172603,
"grad_norm": 0.34611454606056213,
"learning_rate": 8.069011280690114e-06,
"loss": 2.358,
"step": 2620
},
{
"epoch": 0.19464720194647203,
"grad_norm": 0.36542612314224243,
"learning_rate": 8.054265280542653e-06,
"loss": 2.3656,
"step": 2640
},
{
"epoch": 0.196121801961218,
"grad_norm": 0.38626089692115784,
"learning_rate": 8.039519280395194e-06,
"loss": 2.3643,
"step": 2660
},
{
"epoch": 0.197596401975964,
"grad_norm": 0.39764684438705444,
"learning_rate": 8.024773280247733e-06,
"loss": 2.3556,
"step": 2680
},
{
"epoch": 0.19907100199071,
"grad_norm": 0.3800354301929474,
"learning_rate": 8.010027280100274e-06,
"loss": 2.3696,
"step": 2700
},
{
"epoch": 0.20054560200545601,
"grad_norm": 0.37549829483032227,
"learning_rate": 7.995281279952813e-06,
"loss": 2.3623,
"step": 2720
},
{
"epoch": 0.20202020202020202,
"grad_norm": 0.3357870280742645,
"learning_rate": 7.980535279805354e-06,
"loss": 2.3609,
"step": 2740
},
{
"epoch": 0.20349480203494802,
"grad_norm": 0.38587677478790283,
"learning_rate": 7.965789279657895e-06,
"loss": 2.3648,
"step": 2760
},
{
"epoch": 0.20496940204969402,
"grad_norm": 0.3734722137451172,
"learning_rate": 7.951043279510434e-06,
"loss": 2.3637,
"step": 2780
},
{
"epoch": 0.20644400206444002,
"grad_norm": 0.37882205843925476,
"learning_rate": 7.936297279362973e-06,
"loss": 2.3638,
"step": 2800
},
{
"epoch": 0.20791860207918603,
"grad_norm": 0.3540538549423218,
"learning_rate": 7.921551279215514e-06,
"loss": 2.3483,
"step": 2820
},
{
"epoch": 0.20939320209393203,
"grad_norm": 0.3712068796157837,
"learning_rate": 7.906805279068054e-06,
"loss": 2.3553,
"step": 2840
},
{
"epoch": 0.21086780210867803,
"grad_norm": 0.3830094039440155,
"learning_rate": 7.892059278920594e-06,
"loss": 2.3526,
"step": 2860
},
{
"epoch": 0.21234240212342403,
"grad_norm": 0.37301984429359436,
"learning_rate": 7.877313278773133e-06,
"loss": 2.3597,
"step": 2880
},
{
"epoch": 0.21381700213817,
"grad_norm": 0.3589264452457428,
"learning_rate": 7.862567278625674e-06,
"loss": 2.3526,
"step": 2900
},
{
"epoch": 0.215291602152916,
"grad_norm": 0.33692067861557007,
"learning_rate": 7.847821278478214e-06,
"loss": 2.3529,
"step": 2920
},
{
"epoch": 0.21676620216766201,
"grad_norm": 0.3636477589607239,
"learning_rate": 7.833075278330754e-06,
"loss": 2.3583,
"step": 2940
},
{
"epoch": 0.21824080218240802,
"grad_norm": 0.39444780349731445,
"learning_rate": 7.818329278183293e-06,
"loss": 2.3448,
"step": 2960
},
{
"epoch": 0.21971540219715402,
"grad_norm": 0.3866218626499176,
"learning_rate": 7.803583278035834e-06,
"loss": 2.3626,
"step": 2980
},
{
"epoch": 0.22119000221190002,
"grad_norm": 0.37035319209098816,
"learning_rate": 7.788837277888374e-06,
"loss": 2.3672,
"step": 3000
},
{
"epoch": 0.22266460222664602,
"grad_norm": 0.3661469519138336,
"learning_rate": 7.774091277740914e-06,
"loss": 2.3658,
"step": 3020
},
{
"epoch": 0.22413920224139203,
"grad_norm": 0.391837477684021,
"learning_rate": 7.759345277593453e-06,
"loss": 2.3672,
"step": 3040
},
{
"epoch": 0.22561380225613803,
"grad_norm": 0.36069273948669434,
"learning_rate": 7.744599277445994e-06,
"loss": 2.3685,
"step": 3060
},
{
"epoch": 0.22708840227088403,
"grad_norm": 0.3551023602485657,
"learning_rate": 7.729853277298534e-06,
"loss": 2.3697,
"step": 3080
},
{
"epoch": 0.22856300228563003,
"grad_norm": 0.3549511730670929,
"learning_rate": 7.715107277151073e-06,
"loss": 2.3708,
"step": 3100
},
{
"epoch": 0.230037602300376,
"grad_norm": 0.3584568202495575,
"learning_rate": 7.700361277003613e-06,
"loss": 2.3491,
"step": 3120
},
{
"epoch": 0.231512202315122,
"grad_norm": 0.3881671726703644,
"learning_rate": 7.685615276856153e-06,
"loss": 2.3603,
"step": 3140
},
{
"epoch": 0.23298680232986801,
"grad_norm": 0.37565991282463074,
"learning_rate": 7.670869276708693e-06,
"loss": 2.349,
"step": 3160
},
{
"epoch": 0.23446140234461402,
"grad_norm": 0.3800000548362732,
"learning_rate": 7.656123276561233e-06,
"loss": 2.3634,
"step": 3180
},
{
"epoch": 0.23593600235936002,
"grad_norm": 0.3795914053916931,
"learning_rate": 7.641377276413774e-06,
"loss": 2.3541,
"step": 3200
},
{
"epoch": 0.23741060237410602,
"grad_norm": 0.34739038348197937,
"learning_rate": 7.626631276266313e-06,
"loss": 2.364,
"step": 3220
},
{
"epoch": 0.23888520238885202,
"grad_norm": 0.3631921112537384,
"learning_rate": 7.611885276118853e-06,
"loss": 2.3472,
"step": 3240
},
{
"epoch": 0.24035980240359803,
"grad_norm": 0.34708309173583984,
"learning_rate": 7.597139275971393e-06,
"loss": 2.3745,
"step": 3260
},
{
"epoch": 0.24183440241834403,
"grad_norm": 0.39002010226249695,
"learning_rate": 7.582393275823933e-06,
"loss": 2.3496,
"step": 3280
},
{
"epoch": 0.24330900243309003,
"grad_norm": 0.3856920003890991,
"learning_rate": 7.5676472756764725e-06,
"loss": 2.3556,
"step": 3300
},
{
"epoch": 0.24478360244783604,
"grad_norm": 0.3604213297367096,
"learning_rate": 7.552901275529013e-06,
"loss": 2.3588,
"step": 3320
},
{
"epoch": 0.24625820246258204,
"grad_norm": 0.4610792100429535,
"learning_rate": 7.538155275381553e-06,
"loss": 2.3495,
"step": 3340
},
{
"epoch": 0.247732802477328,
"grad_norm": 0.37130406498908997,
"learning_rate": 7.523409275234093e-06,
"loss": 2.3654,
"step": 3360
},
{
"epoch": 0.24920740249207401,
"grad_norm": 0.4634559154510498,
"learning_rate": 7.5086632750866325e-06,
"loss": 2.3472,
"step": 3380
},
{
"epoch": 0.25068200250682005,
"grad_norm": 0.3728873133659363,
"learning_rate": 7.493917274939173e-06,
"loss": 2.3601,
"step": 3400
},
{
"epoch": 0.252156602521566,
"grad_norm": 0.35525408387184143,
"learning_rate": 7.479171274791713e-06,
"loss": 2.3483,
"step": 3420
},
{
"epoch": 0.25363120253631205,
"grad_norm": 0.35072821378707886,
"learning_rate": 7.464425274644253e-06,
"loss": 2.3575,
"step": 3440
},
{
"epoch": 0.255105802551058,
"grad_norm": 0.33984318375587463,
"learning_rate": 7.449679274496794e-06,
"loss": 2.3562,
"step": 3460
},
{
"epoch": 0.256580402565804,
"grad_norm": 0.3893386423587799,
"learning_rate": 7.434933274349333e-06,
"loss": 2.3417,
"step": 3480
},
{
"epoch": 0.25805500258055003,
"grad_norm": 0.3702589273452759,
"learning_rate": 7.420187274201873e-06,
"loss": 2.3575,
"step": 3500
},
{
"epoch": 0.259529602595296,
"grad_norm": 0.37328028678894043,
"learning_rate": 7.405441274054413e-06,
"loss": 2.3691,
"step": 3520
},
{
"epoch": 0.26100420261004204,
"grad_norm": 0.362596720457077,
"learning_rate": 7.390695273906954e-06,
"loss": 2.3516,
"step": 3540
},
{
"epoch": 0.262478802624788,
"grad_norm": 0.35985130071640015,
"learning_rate": 7.375949273759493e-06,
"loss": 2.3579,
"step": 3560
},
{
"epoch": 0.26395340263953404,
"grad_norm": 0.39259010553359985,
"learning_rate": 7.361203273612033e-06,
"loss": 2.3674,
"step": 3580
},
{
"epoch": 0.26542800265428,
"grad_norm": 0.3379274904727936,
"learning_rate": 7.346457273464573e-06,
"loss": 2.3409,
"step": 3600
},
{
"epoch": 0.26690260266902605,
"grad_norm": 0.38404515385627747,
"learning_rate": 7.331711273317113e-06,
"loss": 2.3493,
"step": 3620
},
{
"epoch": 0.268377202683772,
"grad_norm": 0.3580029606819153,
"learning_rate": 7.316965273169654e-06,
"loss": 2.3426,
"step": 3640
},
{
"epoch": 0.26985180269851805,
"grad_norm": 0.36124977469444275,
"learning_rate": 7.302219273022193e-06,
"loss": 2.3563,
"step": 3660
},
{
"epoch": 0.271326402713264,
"grad_norm": 0.37032002210617065,
"learning_rate": 7.287473272874733e-06,
"loss": 2.3558,
"step": 3680
},
{
"epoch": 0.27280100272801,
"grad_norm": 0.37544354796409607,
"learning_rate": 7.272727272727273e-06,
"loss": 2.353,
"step": 3700
},
{
"epoch": 0.27427560274275603,
"grad_norm": 0.38063186407089233,
"learning_rate": 7.257981272579814e-06,
"loss": 2.355,
"step": 3720
},
{
"epoch": 0.275750202757502,
"grad_norm": 0.34436681866645813,
"learning_rate": 7.243235272432353e-06,
"loss": 2.3671,
"step": 3740
},
{
"epoch": 0.27722480277224804,
"grad_norm": 0.3745051622390747,
"learning_rate": 7.228489272284893e-06,
"loss": 2.343,
"step": 3760
},
{
"epoch": 0.278699402786994,
"grad_norm": 0.36901938915252686,
"learning_rate": 7.213743272137433e-06,
"loss": 2.3367,
"step": 3780
},
{
"epoch": 0.28017400280174004,
"grad_norm": 0.38288217782974243,
"learning_rate": 7.198997271989974e-06,
"loss": 2.3598,
"step": 3800
},
{
"epoch": 0.281648602816486,
"grad_norm": 0.37386777997016907,
"learning_rate": 7.184251271842513e-06,
"loss": 2.3618,
"step": 3820
},
{
"epoch": 0.28312320283123205,
"grad_norm": 0.35673725605010986,
"learning_rate": 7.169505271695053e-06,
"loss": 2.3514,
"step": 3840
},
{
"epoch": 0.284597802845978,
"grad_norm": 0.38071954250335693,
"learning_rate": 7.154759271547593e-06,
"loss": 2.3564,
"step": 3860
},
{
"epoch": 0.28607240286072405,
"grad_norm": 0.3805045187473297,
"learning_rate": 7.140013271400134e-06,
"loss": 2.3599,
"step": 3880
},
{
"epoch": 0.28754700287547,
"grad_norm": 0.3432954251766205,
"learning_rate": 7.125267271252674e-06,
"loss": 2.3568,
"step": 3900
},
{
"epoch": 0.28902160289021606,
"grad_norm": 0.35117459297180176,
"learning_rate": 7.110521271105213e-06,
"loss": 2.3566,
"step": 3920
},
{
"epoch": 0.29049620290496203,
"grad_norm": 0.39861348271369934,
"learning_rate": 7.095775270957753e-06,
"loss": 2.3553,
"step": 3940
},
{
"epoch": 0.291970802919708,
"grad_norm": 0.3653263747692108,
"learning_rate": 7.081029270810294e-06,
"loss": 2.3781,
"step": 3960
},
{
"epoch": 0.29344540293445404,
"grad_norm": 0.3614581823348999,
"learning_rate": 7.066283270662834e-06,
"loss": 2.3407,
"step": 3980
},
{
"epoch": 0.2949200029492,
"grad_norm": 0.39455854892730713,
"learning_rate": 7.051537270515373e-06,
"loss": 2.3541,
"step": 4000
},
{
"epoch": 0.29639460296394604,
"grad_norm": 0.33767521381378174,
"learning_rate": 7.036791270367913e-06,
"loss": 2.3332,
"step": 4020
},
{
"epoch": 0.297869202978692,
"grad_norm": 0.36079707741737366,
"learning_rate": 7.022045270220453e-06,
"loss": 2.3546,
"step": 4040
},
{
"epoch": 0.29934380299343805,
"grad_norm": 0.395107626914978,
"learning_rate": 7.007299270072994e-06,
"loss": 2.3483,
"step": 4060
},
{
"epoch": 0.300818403008184,
"grad_norm": 0.3456408381462097,
"learning_rate": 6.992553269925533e-06,
"loss": 2.3657,
"step": 4080
},
{
"epoch": 0.30229300302293005,
"grad_norm": 0.4138198792934418,
"learning_rate": 6.977807269778073e-06,
"loss": 2.3616,
"step": 4100
},
{
"epoch": 0.303767603037676,
"grad_norm": 0.382722944021225,
"learning_rate": 6.963061269630613e-06,
"loss": 2.3607,
"step": 4120
},
{
"epoch": 0.30524220305242206,
"grad_norm": 0.4129588007926941,
"learning_rate": 6.948315269483154e-06,
"loss": 2.3582,
"step": 4140
},
{
"epoch": 0.30671680306716803,
"grad_norm": 0.35810399055480957,
"learning_rate": 6.933569269335694e-06,
"loss": 2.345,
"step": 4160
},
{
"epoch": 0.308191403081914,
"grad_norm": 0.3707892596721649,
"learning_rate": 6.918823269188233e-06,
"loss": 2.3389,
"step": 4180
},
{
"epoch": 0.30966600309666004,
"grad_norm": 0.43102672696113586,
"learning_rate": 6.904077269040773e-06,
"loss": 2.3636,
"step": 4200
},
{
"epoch": 0.311140603111406,
"grad_norm": 0.37081730365753174,
"learning_rate": 6.889331268893314e-06,
"loss": 2.3589,
"step": 4220
},
{
"epoch": 0.31261520312615204,
"grad_norm": 0.3554447591304779,
"learning_rate": 6.874585268745854e-06,
"loss": 2.3639,
"step": 4240
},
{
"epoch": 0.314089803140898,
"grad_norm": 0.3365320563316345,
"learning_rate": 6.859839268598393e-06,
"loss": 2.3604,
"step": 4260
},
{
"epoch": 0.31556440315564405,
"grad_norm": 0.40808168053627014,
"learning_rate": 6.845093268450933e-06,
"loss": 2.3496,
"step": 4280
},
{
"epoch": 0.31703900317039,
"grad_norm": 0.380753755569458,
"learning_rate": 6.830347268303474e-06,
"loss": 2.3478,
"step": 4300
},
{
"epoch": 0.31851360318513605,
"grad_norm": 0.3863361179828644,
"learning_rate": 6.815601268156014e-06,
"loss": 2.3381,
"step": 4320
},
{
"epoch": 0.319988203199882,
"grad_norm": 0.40554317831993103,
"learning_rate": 6.800855268008554e-06,
"loss": 2.3518,
"step": 4340
},
{
"epoch": 0.32146280321462806,
"grad_norm": 0.39253705739974976,
"learning_rate": 6.786109267861093e-06,
"loss": 2.3567,
"step": 4360
},
{
"epoch": 0.32293740322937403,
"grad_norm": 0.37108898162841797,
"learning_rate": 6.771363267713633e-06,
"loss": 2.3509,
"step": 4380
},
{
"epoch": 0.32441200324412,
"grad_norm": 0.36466971039772034,
"learning_rate": 6.756617267566174e-06,
"loss": 2.3627,
"step": 4400
},
{
"epoch": 0.32588660325886604,
"grad_norm": 0.37712493538856506,
"learning_rate": 6.741871267418714e-06,
"loss": 2.3754,
"step": 4420
},
{
"epoch": 0.327361203273612,
"grad_norm": 0.3712570071220398,
"learning_rate": 6.727125267271253e-06,
"loss": 2.3619,
"step": 4440
},
{
"epoch": 0.32883580328835804,
"grad_norm": 0.34880587458610535,
"learning_rate": 6.712379267123793e-06,
"loss": 2.3574,
"step": 4460
},
{
"epoch": 0.330310403303104,
"grad_norm": 0.4109377861022949,
"learning_rate": 6.697633266976334e-06,
"loss": 2.3574,
"step": 4480
},
{
"epoch": 0.33178500331785005,
"grad_norm": 0.3626454770565033,
"learning_rate": 6.682887266828874e-06,
"loss": 2.3527,
"step": 4500
},
{
"epoch": 0.333259603332596,
"grad_norm": 0.3580588400363922,
"learning_rate": 6.668141266681413e-06,
"loss": 2.348,
"step": 4520
},
{
"epoch": 0.33473420334734205,
"grad_norm": 0.3460945785045624,
"learning_rate": 6.653395266533953e-06,
"loss": 2.3461,
"step": 4540
},
{
"epoch": 0.336208803362088,
"grad_norm": 0.39259958267211914,
"learning_rate": 6.6386492663864936e-06,
"loss": 2.3818,
"step": 4560
},
{
"epoch": 0.33768340337683406,
"grad_norm": 0.44788721203804016,
"learning_rate": 6.6239032662390335e-06,
"loss": 2.3636,
"step": 4580
},
{
"epoch": 0.33915800339158003,
"grad_norm": 0.40668484568595886,
"learning_rate": 6.6091572660915735e-06,
"loss": 2.3542,
"step": 4600
},
{
"epoch": 0.340632603406326,
"grad_norm": 0.3672851026058197,
"learning_rate": 6.594411265944113e-06,
"loss": 2.3497,
"step": 4620
},
{
"epoch": 0.34210720342107204,
"grad_norm": 0.3685692846775055,
"learning_rate": 6.5796652657966535e-06,
"loss": 2.361,
"step": 4640
},
{
"epoch": 0.343581803435818,
"grad_norm": 0.4522005617618561,
"learning_rate": 6.5649192656491935e-06,
"loss": 2.3498,
"step": 4660
},
{
"epoch": 0.34505640345056404,
"grad_norm": 0.4089388847351074,
"learning_rate": 6.5501732655017335e-06,
"loss": 2.3479,
"step": 4680
},
{
"epoch": 0.34653100346531,
"grad_norm": 0.3874075710773468,
"learning_rate": 6.535427265354273e-06,
"loss": 2.3437,
"step": 4700
},
{
"epoch": 0.34800560348005605,
"grad_norm": 0.3733789622783661,
"learning_rate": 6.520681265206813e-06,
"loss": 2.3524,
"step": 4720
},
{
"epoch": 0.349480203494802,
"grad_norm": 0.3892102539539337,
"learning_rate": 6.5059352650593535e-06,
"loss": 2.346,
"step": 4740
},
{
"epoch": 0.35095480350954805,
"grad_norm": 0.36850956082344055,
"learning_rate": 6.4911892649118935e-06,
"loss": 2.3532,
"step": 4760
},
{
"epoch": 0.352429403524294,
"grad_norm": 0.3519984185695648,
"learning_rate": 6.476443264764433e-06,
"loss": 2.3429,
"step": 4780
},
{
"epoch": 0.35390400353904006,
"grad_norm": 0.36120033264160156,
"learning_rate": 6.461697264616973e-06,
"loss": 2.3397,
"step": 4800
},
{
"epoch": 0.35537860355378603,
"grad_norm": 0.40259799361228943,
"learning_rate": 6.446951264469513e-06,
"loss": 2.3443,
"step": 4820
},
{
"epoch": 0.35685320356853206,
"grad_norm": 0.370540976524353,
"learning_rate": 6.432205264322053e-06,
"loss": 2.3494,
"step": 4840
},
{
"epoch": 0.35832780358327804,
"grad_norm": 0.7460556626319885,
"learning_rate": 6.417459264174593e-06,
"loss": 2.3452,
"step": 4860
},
{
"epoch": 0.359802403598024,
"grad_norm": 0.3665451109409332,
"learning_rate": 6.4027132640271325e-06,
"loss": 2.3693,
"step": 4880
},
{
"epoch": 0.36127700361277004,
"grad_norm": 0.38104239106178284,
"learning_rate": 6.387967263879673e-06,
"loss": 2.3568,
"step": 4900
},
{
"epoch": 0.362751603627516,
"grad_norm": 0.36739110946655273,
"learning_rate": 6.373221263732213e-06,
"loss": 2.361,
"step": 4920
},
{
"epoch": 0.36422620364226205,
"grad_norm": 0.405453085899353,
"learning_rate": 6.358475263584753e-06,
"loss": 2.3448,
"step": 4940
},
{
"epoch": 0.365700803657008,
"grad_norm": 0.349317342042923,
"learning_rate": 6.3437292634372925e-06,
"loss": 2.3569,
"step": 4960
},
{
"epoch": 0.36717540367175405,
"grad_norm": 0.3882797360420227,
"learning_rate": 6.328983263289833e-06,
"loss": 2.3457,
"step": 4980
},
{
"epoch": 0.3686500036865,
"grad_norm": 0.42195364832878113,
"learning_rate": 6.314237263142373e-06,
"loss": 2.364,
"step": 5000
},
{
"epoch": 0.37012460370124606,
"grad_norm": 0.40603727102279663,
"learning_rate": 6.299491262994913e-06,
"loss": 2.3365,
"step": 5020
},
{
"epoch": 0.37159920371599203,
"grad_norm": 0.3686840534210205,
"learning_rate": 6.284745262847453e-06,
"loss": 2.3624,
"step": 5040
},
{
"epoch": 0.37307380373073806,
"grad_norm": 0.37392449378967285,
"learning_rate": 6.2699992626999924e-06,
"loss": 2.3597,
"step": 5060
},
{
"epoch": 0.37454840374548404,
"grad_norm": 0.3410843014717102,
"learning_rate": 6.255253262552533e-06,
"loss": 2.3552,
"step": 5080
},
{
"epoch": 0.37602300376023,
"grad_norm": 0.3532737195491791,
"learning_rate": 6.240507262405073e-06,
"loss": 2.3532,
"step": 5100
},
{
"epoch": 0.37749760377497604,
"grad_norm": 0.3908025026321411,
"learning_rate": 6.225761262257613e-06,
"loss": 2.3624,
"step": 5120
},
{
"epoch": 0.378972203789722,
"grad_norm": 0.3747566044330597,
"learning_rate": 6.211015262110152e-06,
"loss": 2.3542,
"step": 5140
},
{
"epoch": 0.38044680380446805,
"grad_norm": 0.37590813636779785,
"learning_rate": 6.196269261962693e-06,
"loss": 2.3523,
"step": 5160
},
{
"epoch": 0.381921403819214,
"grad_norm": 0.3960760533809662,
"learning_rate": 6.181523261815233e-06,
"loss": 2.367,
"step": 5180
},
{
"epoch": 0.38339600383396005,
"grad_norm": 0.34570005536079407,
"learning_rate": 6.166777261667773e-06,
"loss": 2.354,
"step": 5200
},
{
"epoch": 0.384870603848706,
"grad_norm": 0.3910704553127289,
"learning_rate": 6.152031261520312e-06,
"loss": 2.3657,
"step": 5220
},
{
"epoch": 0.38634520386345206,
"grad_norm": 0.35407891869544983,
"learning_rate": 6.137285261372853e-06,
"loss": 2.3494,
"step": 5240
},
{
"epoch": 0.38781980387819803,
"grad_norm": 0.37890905141830444,
"learning_rate": 6.122539261225393e-06,
"loss": 2.3476,
"step": 5260
},
{
"epoch": 0.38929440389294406,
"grad_norm": 0.388045072555542,
"learning_rate": 6.107793261077933e-06,
"loss": 2.3593,
"step": 5280
},
{
"epoch": 0.39076900390769004,
"grad_norm": 0.3695070743560791,
"learning_rate": 6.093047260930474e-06,
"loss": 2.3645,
"step": 5300
},
{
"epoch": 0.392243603922436,
"grad_norm": 0.3945058584213257,
"learning_rate": 6.078301260783013e-06,
"loss": 2.3439,
"step": 5320
},
{
"epoch": 0.39371820393718204,
"grad_norm": 0.3858231008052826,
"learning_rate": 6.063555260635553e-06,
"loss": 2.3648,
"step": 5340
},
{
"epoch": 0.395192803951928,
"grad_norm": 0.3549276292324066,
"learning_rate": 6.048809260488093e-06,
"loss": 2.3514,
"step": 5360
},
{
"epoch": 0.39666740396667405,
"grad_norm": 0.3770926296710968,
"learning_rate": 6.034063260340633e-06,
"loss": 2.3598,
"step": 5380
},
{
"epoch": 0.39814200398142,
"grad_norm": 0.37677156925201416,
"learning_rate": 6.019317260193172e-06,
"loss": 2.3539,
"step": 5400
},
{
"epoch": 0.39961660399616605,
"grad_norm": 0.3545861542224884,
"learning_rate": 6.004571260045713e-06,
"loss": 2.3535,
"step": 5420
},
{
"epoch": 0.40109120401091203,
"grad_norm": 0.3972468078136444,
"learning_rate": 5.989825259898253e-06,
"loss": 2.3533,
"step": 5440
},
{
"epoch": 0.40256580402565806,
"grad_norm": 0.368756502866745,
"learning_rate": 5.975079259750793e-06,
"loss": 2.3509,
"step": 5460
},
{
"epoch": 0.40404040404040403,
"grad_norm": 0.40190038084983826,
"learning_rate": 5.960333259603332e-06,
"loss": 2.3375,
"step": 5480
},
{
"epoch": 0.40551500405515006,
"grad_norm": 0.34512144327163696,
"learning_rate": 5.945587259455873e-06,
"loss": 2.3557,
"step": 5500
},
{
"epoch": 0.40698960406989604,
"grad_norm": 0.3768044114112854,
"learning_rate": 5.930841259308413e-06,
"loss": 2.3466,
"step": 5520
},
{
"epoch": 0.40846420408464207,
"grad_norm": 0.3744104504585266,
"learning_rate": 5.916095259160953e-06,
"loss": 2.3565,
"step": 5540
},
{
"epoch": 0.40993880409938804,
"grad_norm": 0.35657835006713867,
"learning_rate": 5.901349259013494e-06,
"loss": 2.3474,
"step": 5560
},
{
"epoch": 0.411413404114134,
"grad_norm": 0.40596744418144226,
"learning_rate": 5.886603258866033e-06,
"loss": 2.344,
"step": 5580
},
{
"epoch": 0.41288800412888005,
"grad_norm": 0.3615570366382599,
"learning_rate": 5.871857258718573e-06,
"loss": 2.3375,
"step": 5600
},
{
"epoch": 0.414362604143626,
"grad_norm": 0.3853433132171631,
"learning_rate": 5.857111258571113e-06,
"loss": 2.3536,
"step": 5620
},
{
"epoch": 0.41583720415837205,
"grad_norm": 0.4154307544231415,
"learning_rate": 5.842365258423654e-06,
"loss": 2.3399,
"step": 5640
},
{
"epoch": 0.41731180417311803,
"grad_norm": 0.4312744438648224,
"learning_rate": 5.827619258276193e-06,
"loss": 2.3442,
"step": 5660
},
{
"epoch": 0.41878640418786406,
"grad_norm": 0.402468740940094,
"learning_rate": 5.812873258128733e-06,
"loss": 2.3522,
"step": 5680
},
{
"epoch": 0.42026100420261003,
"grad_norm": 0.4803409278392792,
"learning_rate": 5.798127257981273e-06,
"loss": 2.3547,
"step": 5700
},
{
"epoch": 0.42173560421735606,
"grad_norm": 0.3646136522293091,
"learning_rate": 5.783381257833813e-06,
"loss": 2.3431,
"step": 5720
},
{
"epoch": 0.42321020423210204,
"grad_norm": 0.380462646484375,
"learning_rate": 5.768635257686354e-06,
"loss": 2.3564,
"step": 5740
},
{
"epoch": 0.42468480424684807,
"grad_norm": 0.3754338324069977,
"learning_rate": 5.753889257538893e-06,
"loss": 2.3464,
"step": 5760
},
{
"epoch": 0.42615940426159404,
"grad_norm": 0.3629322350025177,
"learning_rate": 5.739143257391433e-06,
"loss": 2.3563,
"step": 5780
},
{
"epoch": 0.42763400427634,
"grad_norm": 0.4097397029399872,
"learning_rate": 5.724397257243973e-06,
"loss": 2.3488,
"step": 5800
},
{
"epoch": 0.42910860429108605,
"grad_norm": 0.3598233163356781,
"learning_rate": 5.709651257096514e-06,
"loss": 2.3641,
"step": 5820
},
{
"epoch": 0.430583204305832,
"grad_norm": 0.39495351910591125,
"learning_rate": 5.694905256949053e-06,
"loss": 2.3584,
"step": 5840
},
{
"epoch": 0.43205780432057805,
"grad_norm": 0.3598216772079468,
"learning_rate": 5.680159256801593e-06,
"loss": 2.3519,
"step": 5860
},
{
"epoch": 0.43353240433532403,
"grad_norm": 0.4005551338195801,
"learning_rate": 5.665413256654133e-06,
"loss": 2.3455,
"step": 5880
},
{
"epoch": 0.43500700435007006,
"grad_norm": 0.39282020926475525,
"learning_rate": 5.650667256506674e-06,
"loss": 2.3562,
"step": 5900
},
{
"epoch": 0.43648160436481603,
"grad_norm": 0.3693186044692993,
"learning_rate": 5.635921256359213e-06,
"loss": 2.3404,
"step": 5920
},
{
"epoch": 0.43795620437956206,
"grad_norm": 0.3796662986278534,
"learning_rate": 5.621175256211753e-06,
"loss": 2.3545,
"step": 5940
},
{
"epoch": 0.43943080439430804,
"grad_norm": 0.3727371394634247,
"learning_rate": 5.606429256064293e-06,
"loss": 2.3613,
"step": 5960
},
{
"epoch": 0.44090540440905407,
"grad_norm": 0.38252395391464233,
"learning_rate": 5.591683255916834e-06,
"loss": 2.3477,
"step": 5980
},
{
"epoch": 0.44238000442380004,
"grad_norm": 0.36342570185661316,
"learning_rate": 5.576937255769374e-06,
"loss": 2.361,
"step": 6000
},
{
"epoch": 0.443854604438546,
"grad_norm": 0.37686145305633545,
"learning_rate": 5.562191255621913e-06,
"loss": 2.358,
"step": 6020
},
{
"epoch": 0.44532920445329205,
"grad_norm": 0.373463898897171,
"learning_rate": 5.547445255474453e-06,
"loss": 2.3528,
"step": 6040
},
{
"epoch": 0.446803804468038,
"grad_norm": 0.3747381567955017,
"learning_rate": 5.532699255326993e-06,
"loss": 2.3428,
"step": 6060
},
{
"epoch": 0.44827840448278405,
"grad_norm": 0.3892759382724762,
"learning_rate": 5.517953255179534e-06,
"loss": 2.3551,
"step": 6080
},
{
"epoch": 0.44975300449753003,
"grad_norm": 0.37567898631095886,
"learning_rate": 5.503207255032073e-06,
"loss": 2.3487,
"step": 6100
},
{
"epoch": 0.45122760451227606,
"grad_norm": 0.3452893793582916,
"learning_rate": 5.488461254884613e-06,
"loss": 2.3427,
"step": 6120
},
{
"epoch": 0.45270220452702203,
"grad_norm": 0.387379914522171,
"learning_rate": 5.473715254737153e-06,
"loss": 2.3445,
"step": 6140
},
{
"epoch": 0.45417680454176806,
"grad_norm": 0.36834776401519775,
"learning_rate": 5.4589692545896936e-06,
"loss": 2.3483,
"step": 6160
},
{
"epoch": 0.45565140455651404,
"grad_norm": 0.3641732633113861,
"learning_rate": 5.4442232544422335e-06,
"loss": 2.3514,
"step": 6180
},
{
"epoch": 0.45712600457126007,
"grad_norm": 0.41028374433517456,
"learning_rate": 5.429477254294773e-06,
"loss": 2.3425,
"step": 6200
},
{
"epoch": 0.45860060458600604,
"grad_norm": 0.3992222845554352,
"learning_rate": 5.414731254147313e-06,
"loss": 2.3565,
"step": 6220
},
{
"epoch": 0.460075204600752,
"grad_norm": 0.377287358045578,
"learning_rate": 5.3999852539998535e-06,
"loss": 2.3562,
"step": 6240
},
{
"epoch": 0.46154980461549805,
"grad_norm": 0.4046432077884674,
"learning_rate": 5.3852392538523935e-06,
"loss": 2.3483,
"step": 6260
},
{
"epoch": 0.463024404630244,
"grad_norm": 0.35515862703323364,
"learning_rate": 5.370493253704933e-06,
"loss": 2.353,
"step": 6280
},
{
"epoch": 0.46449900464499005,
"grad_norm": 0.36241665482521057,
"learning_rate": 5.355747253557473e-06,
"loss": 2.3389,
"step": 6300
},
{
"epoch": 0.46597360465973603,
"grad_norm": 0.3970908522605896,
"learning_rate": 5.3410012534100135e-06,
"loss": 2.3639,
"step": 6320
},
{
"epoch": 0.46744820467448206,
"grad_norm": 0.36760082840919495,
"learning_rate": 5.3262552532625535e-06,
"loss": 2.3546,
"step": 6340
},
{
"epoch": 0.46892280468922803,
"grad_norm": 0.34653639793395996,
"learning_rate": 5.311509253115093e-06,
"loss": 2.3448,
"step": 6360
},
{
"epoch": 0.47039740470397406,
"grad_norm": 0.35380202531814575,
"learning_rate": 5.296763252967633e-06,
"loss": 2.3461,
"step": 6380
},
{
"epoch": 0.47187200471872004,
"grad_norm": 0.3386562764644623,
"learning_rate": 5.282017252820173e-06,
"loss": 2.3489,
"step": 6400
},
{
"epoch": 0.47334660473346607,
"grad_norm": 0.3794664144515991,
"learning_rate": 5.267271252672713e-06,
"loss": 2.3565,
"step": 6420
},
{
"epoch": 0.47482120474821204,
"grad_norm": 0.3754018545150757,
"learning_rate": 5.252525252525253e-06,
"loss": 2.3443,
"step": 6440
},
{
"epoch": 0.4762958047629581,
"grad_norm": 0.41498491168022156,
"learning_rate": 5.2377792523777926e-06,
"loss": 2.3501,
"step": 6460
},
{
"epoch": 0.47777040477770405,
"grad_norm": 0.4399779140949249,
"learning_rate": 5.2230332522303325e-06,
"loss": 2.3434,
"step": 6480
},
{
"epoch": 0.47924500479245,
"grad_norm": 0.3858584463596344,
"learning_rate": 5.208287252082873e-06,
"loss": 2.3612,
"step": 6500
},
{
"epoch": 0.48071960480719605,
"grad_norm": 0.379436731338501,
"learning_rate": 5.193541251935413e-06,
"loss": 2.3534,
"step": 6520
},
{
"epoch": 0.48219420482194203,
"grad_norm": 0.36703166365623474,
"learning_rate": 5.1787952517879525e-06,
"loss": 2.3385,
"step": 6540
},
{
"epoch": 0.48366880483668806,
"grad_norm": 0.365614652633667,
"learning_rate": 5.1640492516404925e-06,
"loss": 2.3417,
"step": 6560
},
{
"epoch": 0.48514340485143403,
"grad_norm": 0.3665941655635834,
"learning_rate": 5.149303251493033e-06,
"loss": 2.3478,
"step": 6580
},
{
"epoch": 0.48661800486618007,
"grad_norm": 0.37497478723526,
"learning_rate": 5.134557251345573e-06,
"loss": 2.3713,
"step": 6600
},
{
"epoch": 0.48809260488092604,
"grad_norm": 0.37431296706199646,
"learning_rate": 5.1198112511981125e-06,
"loss": 2.3544,
"step": 6620
},
{
"epoch": 0.48956720489567207,
"grad_norm": 0.3486333191394806,
"learning_rate": 5.1050652510506525e-06,
"loss": 2.3578,
"step": 6640
},
{
"epoch": 0.49104180491041804,
"grad_norm": 0.36677151918411255,
"learning_rate": 5.090319250903193e-06,
"loss": 2.3437,
"step": 6660
},
{
"epoch": 0.4925164049251641,
"grad_norm": 0.36574506759643555,
"learning_rate": 5.075573250755733e-06,
"loss": 2.3574,
"step": 6680
},
{
"epoch": 0.49399100493991005,
"grad_norm": 0.47523924708366394,
"learning_rate": 5.060827250608273e-06,
"loss": 2.3535,
"step": 6700
},
{
"epoch": 0.495465604954656,
"grad_norm": 0.39526769518852234,
"learning_rate": 5.046081250460812e-06,
"loss": 2.3536,
"step": 6720
},
{
"epoch": 0.49694020496940206,
"grad_norm": 0.38120681047439575,
"learning_rate": 5.031335250313352e-06,
"loss": 2.342,
"step": 6740
},
{
"epoch": 0.49841480498414803,
"grad_norm": 0.3704290986061096,
"learning_rate": 5.016589250165893e-06,
"loss": 2.3455,
"step": 6760
},
{
"epoch": 0.49988940499889406,
"grad_norm": 0.3897051215171814,
"learning_rate": 5.001843250018433e-06,
"loss": 2.3538,
"step": 6780
},
{
"epoch": 0.5013640050136401,
"grad_norm": 0.3682934045791626,
"learning_rate": 4.987097249870973e-06,
"loss": 2.3647,
"step": 6800
},
{
"epoch": 0.5028386050283861,
"grad_norm": 0.370624840259552,
"learning_rate": 4.972351249723512e-06,
"loss": 2.3561,
"step": 6820
},
{
"epoch": 0.504313205043132,
"grad_norm": 0.3740212321281433,
"learning_rate": 4.957605249576053e-06,
"loss": 2.3297,
"step": 6840
},
{
"epoch": 0.505787805057878,
"grad_norm": 0.3610004484653473,
"learning_rate": 4.942859249428592e-06,
"loss": 2.3645,
"step": 6860
},
{
"epoch": 0.5072624050726241,
"grad_norm": 0.47170379757881165,
"learning_rate": 4.928113249281133e-06,
"loss": 2.3497,
"step": 6880
},
{
"epoch": 0.5087370050873701,
"grad_norm": 0.36539992690086365,
"learning_rate": 4.913367249133673e-06,
"loss": 2.357,
"step": 6900
},
{
"epoch": 0.510211605102116,
"grad_norm": 0.4011884927749634,
"learning_rate": 4.898621248986213e-06,
"loss": 2.3517,
"step": 6920
},
{
"epoch": 0.511686205116862,
"grad_norm": 0.37110114097595215,
"learning_rate": 4.883875248838753e-06,
"loss": 2.359,
"step": 6940
},
{
"epoch": 0.513160805131608,
"grad_norm": 0.36303210258483887,
"learning_rate": 4.869129248691293e-06,
"loss": 2.3638,
"step": 6960
},
{
"epoch": 0.5146354051463541,
"grad_norm": 0.39052852988243103,
"learning_rate": 4.854383248543833e-06,
"loss": 2.3573,
"step": 6980
},
{
"epoch": 0.5161100051611001,
"grad_norm": 0.3620651066303253,
"learning_rate": 4.839637248396373e-06,
"loss": 2.3413,
"step": 7000
},
{
"epoch": 0.517584605175846,
"grad_norm": 0.35670602321624756,
"learning_rate": 4.824891248248913e-06,
"loss": 2.358,
"step": 7020
},
{
"epoch": 0.519059205190592,
"grad_norm": 0.3757779002189636,
"learning_rate": 4.810145248101453e-06,
"loss": 2.3408,
"step": 7040
},
{
"epoch": 0.5205338052053381,
"grad_norm": 0.3540636897087097,
"learning_rate": 4.795399247953993e-06,
"loss": 2.3386,
"step": 7060
},
{
"epoch": 0.5220084052200841,
"grad_norm": 0.3777630031108856,
"learning_rate": 4.780653247806532e-06,
"loss": 2.3498,
"step": 7080
},
{
"epoch": 0.52348300523483,
"grad_norm": 0.38715338706970215,
"learning_rate": 4.765907247659073e-06,
"loss": 2.3357,
"step": 7100
},
{
"epoch": 0.524957605249576,
"grad_norm": 0.37200599908828735,
"learning_rate": 4.751161247511613e-06,
"loss": 2.3409,
"step": 7120
},
{
"epoch": 0.5264322052643221,
"grad_norm": 0.34266674518585205,
"learning_rate": 4.736415247364153e-06,
"loss": 2.3489,
"step": 7140
},
{
"epoch": 0.5279068052790681,
"grad_norm": 0.39111700654029846,
"learning_rate": 4.721669247216693e-06,
"loss": 2.3399,
"step": 7160
},
{
"epoch": 0.5293814052938141,
"grad_norm": 0.34332647919654846,
"learning_rate": 4.706923247069233e-06,
"loss": 2.3407,
"step": 7180
},
{
"epoch": 0.53085600530856,
"grad_norm": 0.3694305717945099,
"learning_rate": 4.692177246921773e-06,
"loss": 2.3507,
"step": 7200
},
{
"epoch": 0.532330605323306,
"grad_norm": 0.3671371042728424,
"learning_rate": 4.677431246774313e-06,
"loss": 2.3511,
"step": 7220
},
{
"epoch": 0.5338052053380521,
"grad_norm": 0.35831600427627563,
"learning_rate": 4.662685246626853e-06,
"loss": 2.3409,
"step": 7240
},
{
"epoch": 0.5352798053527981,
"grad_norm": 0.388938307762146,
"learning_rate": 4.647939246479393e-06,
"loss": 2.3491,
"step": 7260
},
{
"epoch": 0.536754405367544,
"grad_norm": 0.3723289668560028,
"learning_rate": 4.633193246331933e-06,
"loss": 2.3538,
"step": 7280
},
{
"epoch": 0.53822900538229,
"grad_norm": 0.3653968870639801,
"learning_rate": 4.618447246184473e-06,
"loss": 2.3477,
"step": 7300
},
{
"epoch": 0.5397036053970361,
"grad_norm": 0.3933035731315613,
"learning_rate": 4.603701246037013e-06,
"loss": 2.3452,
"step": 7320
},
{
"epoch": 0.5411782054117821,
"grad_norm": 0.36271172761917114,
"learning_rate": 4.588955245889553e-06,
"loss": 2.3538,
"step": 7340
},
{
"epoch": 0.542652805426528,
"grad_norm": 0.3978424072265625,
"learning_rate": 4.574209245742093e-06,
"loss": 2.3462,
"step": 7360
},
{
"epoch": 0.544127405441274,
"grad_norm": 0.35292622447013855,
"learning_rate": 4.559463245594633e-06,
"loss": 2.3517,
"step": 7380
},
{
"epoch": 0.54560200545602,
"grad_norm": 0.3812173008918762,
"learning_rate": 4.544717245447173e-06,
"loss": 2.3494,
"step": 7400
},
{
"epoch": 0.5470766054707661,
"grad_norm": 0.3730863034725189,
"learning_rate": 4.529971245299713e-06,
"loss": 2.3468,
"step": 7420
},
{
"epoch": 0.5485512054855121,
"grad_norm": 0.3639664053916931,
"learning_rate": 4.515225245152253e-06,
"loss": 2.3509,
"step": 7440
},
{
"epoch": 0.550025805500258,
"grad_norm": 0.3697253167629242,
"learning_rate": 4.500479245004793e-06,
"loss": 2.3563,
"step": 7460
},
{
"epoch": 0.551500405515004,
"grad_norm": 0.44058653712272644,
"learning_rate": 4.485733244857333e-06,
"loss": 2.3463,
"step": 7480
},
{
"epoch": 0.5529750055297501,
"grad_norm": 0.3621077835559845,
"learning_rate": 4.470987244709873e-06,
"loss": 2.3484,
"step": 7500
},
{
"epoch": 0.5544496055444961,
"grad_norm": 0.3550095558166504,
"learning_rate": 4.456241244562413e-06,
"loss": 2.3503,
"step": 7520
},
{
"epoch": 0.555924205559242,
"grad_norm": 0.40449610352516174,
"learning_rate": 4.441495244414953e-06,
"loss": 2.3392,
"step": 7540
},
{
"epoch": 0.557398805573988,
"grad_norm": 0.41954267024993896,
"learning_rate": 4.426749244267493e-06,
"loss": 2.3544,
"step": 7560
},
{
"epoch": 0.5588734055887341,
"grad_norm": 0.3714821934700012,
"learning_rate": 4.412003244120033e-06,
"loss": 2.3517,
"step": 7580
},
{
"epoch": 0.5603480056034801,
"grad_norm": 0.35409021377563477,
"learning_rate": 4.397257243972573e-06,
"loss": 2.3368,
"step": 7600
},
{
"epoch": 0.5618226056182261,
"grad_norm": 0.38718315958976746,
"learning_rate": 4.382511243825113e-06,
"loss": 2.349,
"step": 7620
},
{
"epoch": 0.563297205632972,
"grad_norm": 0.3989495038986206,
"learning_rate": 4.367765243677653e-06,
"loss": 2.3622,
"step": 7640
},
{
"epoch": 0.564771805647718,
"grad_norm": 0.38206225633621216,
"learning_rate": 4.353019243530193e-06,
"loss": 2.348,
"step": 7660
},
{
"epoch": 0.5662464056624641,
"grad_norm": 0.4094175100326538,
"learning_rate": 4.338273243382733e-06,
"loss": 2.337,
"step": 7680
},
{
"epoch": 0.5677210056772101,
"grad_norm": 0.3633534908294678,
"learning_rate": 4.323527243235273e-06,
"loss": 2.3413,
"step": 7700
},
{
"epoch": 0.569195605691956,
"grad_norm": 0.3575294613838196,
"learning_rate": 4.308781243087813e-06,
"loss": 2.3388,
"step": 7720
},
{
"epoch": 0.570670205706702,
"grad_norm": 0.3401530683040619,
"learning_rate": 4.294035242940353e-06,
"loss": 2.337,
"step": 7740
},
{
"epoch": 0.5721448057214481,
"grad_norm": 0.37117597460746765,
"learning_rate": 4.279289242792893e-06,
"loss": 2.3489,
"step": 7760
},
{
"epoch": 0.5736194057361941,
"grad_norm": 0.4254913330078125,
"learning_rate": 4.264543242645433e-06,
"loss": 2.3592,
"step": 7780
},
{
"epoch": 0.57509400575094,
"grad_norm": 0.3747338354587555,
"learning_rate": 4.249797242497973e-06,
"loss": 2.3525,
"step": 7800
},
{
"epoch": 0.576568605765686,
"grad_norm": 0.4081355035305023,
"learning_rate": 4.235051242350513e-06,
"loss": 2.3482,
"step": 7820
},
{
"epoch": 0.5780432057804321,
"grad_norm": 0.3858603835105896,
"learning_rate": 4.220305242203053e-06,
"loss": 2.3443,
"step": 7840
},
{
"epoch": 0.5795178057951781,
"grad_norm": 0.3556371033191681,
"learning_rate": 4.205559242055593e-06,
"loss": 2.3328,
"step": 7860
},
{
"epoch": 0.5809924058099241,
"grad_norm": 0.3800757825374603,
"learning_rate": 4.190813241908133e-06,
"loss": 2.3494,
"step": 7880
},
{
"epoch": 0.58246700582467,
"grad_norm": 0.3651157021522522,
"learning_rate": 4.176067241760673e-06,
"loss": 2.3476,
"step": 7900
},
{
"epoch": 0.583941605839416,
"grad_norm": 0.35027289390563965,
"learning_rate": 4.161321241613213e-06,
"loss": 2.362,
"step": 7920
},
{
"epoch": 0.5854162058541621,
"grad_norm": 0.37928810715675354,
"learning_rate": 4.146575241465753e-06,
"loss": 2.3471,
"step": 7940
},
{
"epoch": 0.5868908058689081,
"grad_norm": 0.3614574372768402,
"learning_rate": 4.131829241318293e-06,
"loss": 2.3469,
"step": 7960
},
{
"epoch": 0.588365405883654,
"grad_norm": 0.3692425489425659,
"learning_rate": 4.117083241170833e-06,
"loss": 2.3517,
"step": 7980
},
{
"epoch": 0.5898400058984,
"grad_norm": 0.3615201413631439,
"learning_rate": 4.102337241023373e-06,
"loss": 2.3385,
"step": 8000
},
{
"epoch": 0.5913146059131461,
"grad_norm": 0.37288710474967957,
"learning_rate": 4.0875912408759126e-06,
"loss": 2.3592,
"step": 8020
},
{
"epoch": 0.5927892059278921,
"grad_norm": 0.3470204472541809,
"learning_rate": 4.072845240728453e-06,
"loss": 2.3546,
"step": 8040
},
{
"epoch": 0.5942638059426381,
"grad_norm": 0.38719677925109863,
"learning_rate": 4.0580992405809925e-06,
"loss": 2.3439,
"step": 8060
},
{
"epoch": 0.595738405957384,
"grad_norm": 0.3788948953151703,
"learning_rate": 4.0433532404335325e-06,
"loss": 2.3555,
"step": 8080
},
{
"epoch": 0.59721300597213,
"grad_norm": 0.42110002040863037,
"learning_rate": 4.0286072402860725e-06,
"loss": 2.3502,
"step": 8100
},
{
"epoch": 0.5986876059868761,
"grad_norm": 0.4007836580276489,
"learning_rate": 4.0138612401386125e-06,
"loss": 2.3571,
"step": 8120
},
{
"epoch": 0.6001622060016221,
"grad_norm": 0.36043021082878113,
"learning_rate": 3.9991152399911525e-06,
"loss": 2.3589,
"step": 8140
},
{
"epoch": 0.601636806016368,
"grad_norm": 0.4028465151786804,
"learning_rate": 3.9843692398436925e-06,
"loss": 2.3399,
"step": 8160
},
{
"epoch": 0.603111406031114,
"grad_norm": 0.39534154534339905,
"learning_rate": 3.9696232396962325e-06,
"loss": 2.3571,
"step": 8180
},
{
"epoch": 0.6045860060458601,
"grad_norm": 0.361286997795105,
"learning_rate": 3.9548772395487725e-06,
"loss": 2.3358,
"step": 8200
},
{
"epoch": 0.6060606060606061,
"grad_norm": 0.3674760162830353,
"learning_rate": 3.9401312394013125e-06,
"loss": 2.3449,
"step": 8220
},
{
"epoch": 0.607535206075352,
"grad_norm": 0.39205583930015564,
"learning_rate": 3.9253852392538525e-06,
"loss": 2.3558,
"step": 8240
},
{
"epoch": 0.609009806090098,
"grad_norm": 0.3901711404323578,
"learning_rate": 3.910639239106393e-06,
"loss": 2.3565,
"step": 8260
},
{
"epoch": 0.6104844061048441,
"grad_norm": 0.3791930377483368,
"learning_rate": 3.8958932389589324e-06,
"loss": 2.3425,
"step": 8280
},
{
"epoch": 0.6119590061195901,
"grad_norm": 0.36277633905410767,
"learning_rate": 3.881147238811473e-06,
"loss": 2.3485,
"step": 8300
},
{
"epoch": 0.6134336061343361,
"grad_norm": 0.35051462054252625,
"learning_rate": 3.866401238664012e-06,
"loss": 2.3486,
"step": 8320
},
{
"epoch": 0.614908206149082,
"grad_norm": 0.3554931581020355,
"learning_rate": 3.851655238516553e-06,
"loss": 2.3626,
"step": 8340
},
{
"epoch": 0.616382806163828,
"grad_norm": 0.36661261320114136,
"learning_rate": 3.836909238369092e-06,
"loss": 2.3405,
"step": 8360
},
{
"epoch": 0.6178574061785741,
"grad_norm": 0.3876403570175171,
"learning_rate": 3.822163238221633e-06,
"loss": 2.3422,
"step": 8380
},
{
"epoch": 0.6193320061933201,
"grad_norm": 0.3810766637325287,
"learning_rate": 3.8074172380741724e-06,
"loss": 2.3466,
"step": 8400
},
{
"epoch": 0.620806606208066,
"grad_norm": 0.3949829638004303,
"learning_rate": 3.7926712379267128e-06,
"loss": 2.3501,
"step": 8420
},
{
"epoch": 0.622281206222812,
"grad_norm": 0.39543384313583374,
"learning_rate": 3.7779252377792523e-06,
"loss": 2.3365,
"step": 8440
},
{
"epoch": 0.6237558062375581,
"grad_norm": 0.3472473919391632,
"learning_rate": 3.7631792376317928e-06,
"loss": 2.3544,
"step": 8460
},
{
"epoch": 0.6252304062523041,
"grad_norm": 0.3589697778224945,
"learning_rate": 3.7484332374843323e-06,
"loss": 2.3402,
"step": 8480
},
{
"epoch": 0.6267050062670501,
"grad_norm": 0.37059295177459717,
"learning_rate": 3.7336872373368727e-06,
"loss": 2.3462,
"step": 8500
},
{
"epoch": 0.628179606281796,
"grad_norm": 0.40565216541290283,
"learning_rate": 3.7189412371894127e-06,
"loss": 2.3501,
"step": 8520
},
{
"epoch": 0.6296542062965421,
"grad_norm": 0.36283183097839355,
"learning_rate": 3.7041952370419527e-06,
"loss": 2.3423,
"step": 8540
},
{
"epoch": 0.6311288063112881,
"grad_norm": 0.3886597156524658,
"learning_rate": 3.6894492368944927e-06,
"loss": 2.3386,
"step": 8560
},
{
"epoch": 0.6326034063260341,
"grad_norm": 0.3656463027000427,
"learning_rate": 3.6747032367470327e-06,
"loss": 2.3526,
"step": 8580
},
{
"epoch": 0.63407800634078,
"grad_norm": 0.4167777895927429,
"learning_rate": 3.6599572365995727e-06,
"loss": 2.3581,
"step": 8600
},
{
"epoch": 0.635552606355526,
"grad_norm": 0.3539351522922516,
"learning_rate": 3.6452112364521127e-06,
"loss": 2.3396,
"step": 8620
},
{
"epoch": 0.6370272063702721,
"grad_norm": 0.3550587296485901,
"learning_rate": 3.6304652363046527e-06,
"loss": 2.344,
"step": 8640
},
{
"epoch": 0.6385018063850181,
"grad_norm": 0.3939066529273987,
"learning_rate": 3.6157192361571927e-06,
"loss": 2.357,
"step": 8660
},
{
"epoch": 0.639976406399764,
"grad_norm": 0.3614286482334137,
"learning_rate": 3.6009732360097326e-06,
"loss": 2.3433,
"step": 8680
},
{
"epoch": 0.64145100641451,
"grad_norm": 0.402174711227417,
"learning_rate": 3.586227235862272e-06,
"loss": 2.3329,
"step": 8700
},
{
"epoch": 0.6429256064292561,
"grad_norm": 0.4092716574668884,
"learning_rate": 3.5714812357148126e-06,
"loss": 2.3489,
"step": 8720
},
{
"epoch": 0.6444002064440021,
"grad_norm": 0.3816792666912079,
"learning_rate": 3.556735235567353e-06,
"loss": 2.3493,
"step": 8740
},
{
"epoch": 0.6458748064587481,
"grad_norm": 0.34877556562423706,
"learning_rate": 3.5419892354198926e-06,
"loss": 2.3694,
"step": 8760
},
{
"epoch": 0.647349406473494,
"grad_norm": 0.39949148893356323,
"learning_rate": 3.527243235272433e-06,
"loss": 2.3366,
"step": 8780
},
{
"epoch": 0.64882400648824,
"grad_norm": 0.3934047222137451,
"learning_rate": 3.5124972351249726e-06,
"loss": 2.3412,
"step": 8800
},
{
"epoch": 0.6502986065029861,
"grad_norm": 0.3608020842075348,
"learning_rate": 3.497751234977513e-06,
"loss": 2.3325,
"step": 8820
},
{
"epoch": 0.6517732065177321,
"grad_norm": 0.3528784215450287,
"learning_rate": 3.4830052348300526e-06,
"loss": 2.3539,
"step": 8840
},
{
"epoch": 0.653247806532478,
"grad_norm": 0.34913375973701477,
"learning_rate": 3.4682592346825925e-06,
"loss": 2.3501,
"step": 8860
},
{
"epoch": 0.654722406547224,
"grad_norm": 0.39281442761421204,
"learning_rate": 3.4535132345351325e-06,
"loss": 2.3413,
"step": 8880
},
{
"epoch": 0.6561970065619701,
"grad_norm": 0.352173775434494,
"learning_rate": 3.4387672343876725e-06,
"loss": 2.3636,
"step": 8900
},
{
"epoch": 0.6576716065767161,
"grad_norm": 0.44138431549072266,
"learning_rate": 3.4240212342402125e-06,
"loss": 2.3365,
"step": 8920
},
{
"epoch": 0.6591462065914621,
"grad_norm": 0.38679641485214233,
"learning_rate": 3.4092752340927525e-06,
"loss": 2.3473,
"step": 8940
},
{
"epoch": 0.660620806606208,
"grad_norm": 0.37204068899154663,
"learning_rate": 3.394529233945293e-06,
"loss": 2.3422,
"step": 8960
},
{
"epoch": 0.6620954066209541,
"grad_norm": 0.36612871289253235,
"learning_rate": 3.3797832337978325e-06,
"loss": 2.3423,
"step": 8980
},
{
"epoch": 0.6635700066357001,
"grad_norm": 0.36784979701042175,
"learning_rate": 3.365037233650373e-06,
"loss": 2.3458,
"step": 9000
},
{
"epoch": 0.6650446066504461,
"grad_norm": 0.3783581852912903,
"learning_rate": 3.3502912335029125e-06,
"loss": 2.351,
"step": 9020
},
{
"epoch": 0.666519206665192,
"grad_norm": 0.4059109389781952,
"learning_rate": 3.335545233355453e-06,
"loss": 2.3504,
"step": 9040
},
{
"epoch": 0.667993806679938,
"grad_norm": 0.39486610889434814,
"learning_rate": 3.3207992332079924e-06,
"loss": 2.3425,
"step": 9060
},
{
"epoch": 0.6694684066946841,
"grad_norm": 0.35696399211883545,
"learning_rate": 3.306053233060533e-06,
"loss": 2.3668,
"step": 9080
},
{
"epoch": 0.6709430067094301,
"grad_norm": 0.3573276102542877,
"learning_rate": 3.2913072329130724e-06,
"loss": 2.3503,
"step": 9100
},
{
"epoch": 0.672417606724176,
"grad_norm": 0.4014319181442261,
"learning_rate": 3.276561232765613e-06,
"loss": 2.3364,
"step": 9120
},
{
"epoch": 0.673892206738922,
"grad_norm": 0.3681239187717438,
"learning_rate": 3.2618152326181524e-06,
"loss": 2.3451,
"step": 9140
},
{
"epoch": 0.6753668067536681,
"grad_norm": 0.4503883421421051,
"learning_rate": 3.247069232470693e-06,
"loss": 2.3408,
"step": 9160
},
{
"epoch": 0.6768414067684141,
"grad_norm": 0.37346574664115906,
"learning_rate": 3.232323232323233e-06,
"loss": 2.3598,
"step": 9180
},
{
"epoch": 0.6783160067831601,
"grad_norm": 0.3615313470363617,
"learning_rate": 3.2175772321757724e-06,
"loss": 2.3434,
"step": 9200
},
{
"epoch": 0.679790606797906,
"grad_norm": 0.3929205536842346,
"learning_rate": 3.2028312320283128e-06,
"loss": 2.3439,
"step": 9220
},
{
"epoch": 0.681265206812652,
"grad_norm": 0.37737640738487244,
"learning_rate": 3.1880852318808523e-06,
"loss": 2.349,
"step": 9240
},
{
"epoch": 0.6827398068273981,
"grad_norm": 0.41763532161712646,
"learning_rate": 3.1733392317333928e-06,
"loss": 2.3422,
"step": 9260
},
{
"epoch": 0.6842144068421441,
"grad_norm": 0.4058336615562439,
"learning_rate": 3.1585932315859323e-06,
"loss": 2.3459,
"step": 9280
},
{
"epoch": 0.68568900685689,
"grad_norm": 0.3860774636268616,
"learning_rate": 3.1438472314384727e-06,
"loss": 2.3524,
"step": 9300
},
{
"epoch": 0.687163606871636,
"grad_norm": 0.401663213968277,
"learning_rate": 3.1291012312910123e-06,
"loss": 2.3326,
"step": 9320
},
{
"epoch": 0.6886382068863821,
"grad_norm": 0.3691912889480591,
"learning_rate": 3.1143552311435527e-06,
"loss": 2.337,
"step": 9340
},
{
"epoch": 0.6901128069011281,
"grad_norm": 0.4243042767047882,
"learning_rate": 3.0996092309960923e-06,
"loss": 2.3432,
"step": 9360
},
{
"epoch": 0.6915874069158741,
"grad_norm": 0.3880211412906647,
"learning_rate": 3.0848632308486327e-06,
"loss": 2.3664,
"step": 9380
},
{
"epoch": 0.69306200693062,
"grad_norm": 0.3545699119567871,
"learning_rate": 3.0701172307011723e-06,
"loss": 2.3448,
"step": 9400
},
{
"epoch": 0.6945366069453661,
"grad_norm": 0.38192018866539,
"learning_rate": 3.0553712305537127e-06,
"loss": 2.3564,
"step": 9420
},
{
"epoch": 0.6960112069601121,
"grad_norm": 0.39317330718040466,
"learning_rate": 3.0406252304062527e-06,
"loss": 2.3676,
"step": 9440
},
{
"epoch": 0.6974858069748581,
"grad_norm": 0.3686807453632355,
"learning_rate": 3.0258792302587927e-06,
"loss": 2.3513,
"step": 9460
},
{
"epoch": 0.698960406989604,
"grad_norm": 0.43928787112236023,
"learning_rate": 3.0111332301113326e-06,
"loss": 2.3368,
"step": 9480
},
{
"epoch": 0.70043500700435,
"grad_norm": 0.3794805705547333,
"learning_rate": 2.9963872299638726e-06,
"loss": 2.3504,
"step": 9500
},
{
"epoch": 0.7019096070190961,
"grad_norm": 0.391825407743454,
"learning_rate": 2.9816412298164126e-06,
"loss": 2.355,
"step": 9520
},
{
"epoch": 0.7033842070338421,
"grad_norm": 0.37879860401153564,
"learning_rate": 2.966895229668952e-06,
"loss": 2.3454,
"step": 9540
},
{
"epoch": 0.704858807048588,
"grad_norm": 0.4129418134689331,
"learning_rate": 2.9521492295214926e-06,
"loss": 2.3496,
"step": 9560
},
{
"epoch": 0.706333407063334,
"grad_norm": 0.4124239683151245,
"learning_rate": 2.937403229374032e-06,
"loss": 2.353,
"step": 9580
},
{
"epoch": 0.7078080070780801,
"grad_norm": 0.391527384519577,
"learning_rate": 2.9226572292265726e-06,
"loss": 2.3397,
"step": 9600
},
{
"epoch": 0.7092826070928261,
"grad_norm": 0.4149307906627655,
"learning_rate": 2.907911229079112e-06,
"loss": 2.3408,
"step": 9620
},
{
"epoch": 0.7107572071075721,
"grad_norm": 0.3845043480396271,
"learning_rate": 2.8931652289316526e-06,
"loss": 2.3498,
"step": 9640
},
{
"epoch": 0.712231807122318,
"grad_norm": 0.355175256729126,
"learning_rate": 2.878419228784193e-06,
"loss": 2.3558,
"step": 9660
},
{
"epoch": 0.7137064071370641,
"grad_norm": 0.37544727325439453,
"learning_rate": 2.8636732286367325e-06,
"loss": 2.3629,
"step": 9680
},
{
"epoch": 0.7151810071518101,
"grad_norm": 0.394980251789093,
"learning_rate": 2.8489272284892725e-06,
"loss": 2.3456,
"step": 9700
},
{
"epoch": 0.7166556071665561,
"grad_norm": 0.39948976039886475,
"learning_rate": 2.8341812283418125e-06,
"loss": 2.3542,
"step": 9720
},
{
"epoch": 0.718130207181302,
"grad_norm": 0.3865370750427246,
"learning_rate": 2.8194352281943525e-06,
"loss": 2.3262,
"step": 9740
},
{
"epoch": 0.719604807196048,
"grad_norm": 0.36873430013656616,
"learning_rate": 2.8046892280468925e-06,
"loss": 2.343,
"step": 9760
},
{
"epoch": 0.7210794072107941,
"grad_norm": 0.37330904603004456,
"learning_rate": 2.7899432278994325e-06,
"loss": 2.3629,
"step": 9780
},
{
"epoch": 0.7225540072255401,
"grad_norm": 0.392511785030365,
"learning_rate": 2.7751972277519725e-06,
"loss": 2.3364,
"step": 9800
},
{
"epoch": 0.7240286072402861,
"grad_norm": 0.4280540943145752,
"learning_rate": 2.7604512276045125e-06,
"loss": 2.344,
"step": 9820
},
{
"epoch": 0.725503207255032,
"grad_norm": 0.36759504675865173,
"learning_rate": 2.7457052274570525e-06,
"loss": 2.3639,
"step": 9840
},
{
"epoch": 0.7269778072697781,
"grad_norm": 0.3967907428741455,
"learning_rate": 2.7309592273095924e-06,
"loss": 2.3459,
"step": 9860
},
{
"epoch": 0.7284524072845241,
"grad_norm": 0.3818022906780243,
"learning_rate": 2.716213227162133e-06,
"loss": 2.3471,
"step": 9880
},
{
"epoch": 0.7299270072992701,
"grad_norm": 0.3735957443714142,
"learning_rate": 2.7014672270146724e-06,
"loss": 2.3324,
"step": 9900
},
{
"epoch": 0.731401607314016,
"grad_norm": 0.3850245773792267,
"learning_rate": 2.686721226867213e-06,
"loss": 2.3563,
"step": 9920
},
{
"epoch": 0.732876207328762,
"grad_norm": 0.3557223081588745,
"learning_rate": 2.6719752267197524e-06,
"loss": 2.3416,
"step": 9940
},
{
"epoch": 0.7343508073435081,
"grad_norm": 0.3680027723312378,
"learning_rate": 2.657229226572293e-06,
"loss": 2.3486,
"step": 9960
},
{
"epoch": 0.7358254073582541,
"grad_norm": 0.39317336678504944,
"learning_rate": 2.6424832264248324e-06,
"loss": 2.3489,
"step": 9980
},
{
"epoch": 0.737300007373,
"grad_norm": 0.44913551211357117,
"learning_rate": 2.627737226277373e-06,
"loss": 2.3476,
"step": 10000
},
{
"epoch": 0.738774607387746,
"grad_norm": 0.4089840054512024,
"learning_rate": 2.6129912261299124e-06,
"loss": 2.344,
"step": 10020
},
{
"epoch": 0.7402492074024921,
"grad_norm": 0.37188851833343506,
"learning_rate": 2.5982452259824523e-06,
"loss": 2.3488,
"step": 10040
},
{
"epoch": 0.7417238074172381,
"grad_norm": 0.39748087525367737,
"learning_rate": 2.5834992258349923e-06,
"loss": 2.3651,
"step": 10060
},
{
"epoch": 0.7431984074319841,
"grad_norm": 0.413461297750473,
"learning_rate": 2.5687532256875323e-06,
"loss": 2.3484,
"step": 10080
},
{
"epoch": 0.74467300744673,
"grad_norm": 0.3835908770561218,
"learning_rate": 2.5540072255400723e-06,
"loss": 2.3542,
"step": 10100
},
{
"epoch": 0.7461476074614761,
"grad_norm": 0.3700994551181793,
"learning_rate": 2.5392612253926123e-06,
"loss": 2.3559,
"step": 10120
},
{
"epoch": 0.7476222074762221,
"grad_norm": 0.3828827738761902,
"learning_rate": 2.5245152252451527e-06,
"loss": 2.3453,
"step": 10140
},
{
"epoch": 0.7490968074909681,
"grad_norm": 0.36642786860466003,
"learning_rate": 2.5097692250976923e-06,
"loss": 2.3426,
"step": 10160
},
{
"epoch": 0.750571407505714,
"grad_norm": 0.44964683055877686,
"learning_rate": 2.4950232249502323e-06,
"loss": 2.3403,
"step": 10180
},
{
"epoch": 0.75204600752046,
"grad_norm": 0.36480045318603516,
"learning_rate": 2.4802772248027723e-06,
"loss": 2.3374,
"step": 10200
},
{
"epoch": 0.7535206075352061,
"grad_norm": 0.378704309463501,
"learning_rate": 2.4655312246553127e-06,
"loss": 2.3421,
"step": 10220
},
{
"epoch": 0.7549952075499521,
"grad_norm": 0.3538469970226288,
"learning_rate": 2.4507852245078527e-06,
"loss": 2.3389,
"step": 10240
},
{
"epoch": 0.7564698075646981,
"grad_norm": 0.3797251284122467,
"learning_rate": 2.4360392243603927e-06,
"loss": 2.3336,
"step": 10260
},
{
"epoch": 0.757944407579444,
"grad_norm": 0.4074282944202423,
"learning_rate": 2.4212932242129326e-06,
"loss": 2.3509,
"step": 10280
},
{
"epoch": 0.7594190075941901,
"grad_norm": 0.38486722111701965,
"learning_rate": 2.4065472240654726e-06,
"loss": 2.3461,
"step": 10300
},
{
"epoch": 0.7608936076089361,
"grad_norm": 0.3682025372982025,
"learning_rate": 2.3918012239180126e-06,
"loss": 2.3539,
"step": 10320
},
{
"epoch": 0.7623682076236821,
"grad_norm": 0.36342653632164,
"learning_rate": 2.3770552237705526e-06,
"loss": 2.3449,
"step": 10340
},
{
"epoch": 0.763842807638428,
"grad_norm": 0.3666427433490753,
"learning_rate": 2.3623092236230926e-06,
"loss": 2.3422,
"step": 10360
},
{
"epoch": 0.7653174076531741,
"grad_norm": 0.4068521559238434,
"learning_rate": 2.347563223475632e-06,
"loss": 2.3419,
"step": 10380
},
{
"epoch": 0.7667920076679201,
"grad_norm": 0.3707336485385895,
"learning_rate": 2.332817223328172e-06,
"loss": 2.3598,
"step": 10400
},
{
"epoch": 0.7682666076826661,
"grad_norm": 0.3835085332393646,
"learning_rate": 2.318071223180712e-06,
"loss": 2.3303,
"step": 10420
},
{
"epoch": 0.769741207697412,
"grad_norm": 0.35998964309692383,
"learning_rate": 2.303325223033252e-06,
"loss": 2.347,
"step": 10440
},
{
"epoch": 0.771215807712158,
"grad_norm": 0.6574367880821228,
"learning_rate": 2.2885792228857925e-06,
"loss": 2.3422,
"step": 10460
},
{
"epoch": 0.7726904077269041,
"grad_norm": 0.391596257686615,
"learning_rate": 2.2738332227383325e-06,
"loss": 2.3322,
"step": 10480
},
{
"epoch": 0.7741650077416501,
"grad_norm": 0.37120455503463745,
"learning_rate": 2.2590872225908725e-06,
"loss": 2.344,
"step": 10500
},
{
"epoch": 0.7756396077563961,
"grad_norm": 0.43302685022354126,
"learning_rate": 2.2443412224434125e-06,
"loss": 2.3523,
"step": 10520
},
{
"epoch": 0.777114207771142,
"grad_norm": 0.3854134678840637,
"learning_rate": 2.2295952222959525e-06,
"loss": 2.3581,
"step": 10540
},
{
"epoch": 0.7785888077858881,
"grad_norm": 0.37082743644714355,
"learning_rate": 2.2148492221484925e-06,
"loss": 2.3417,
"step": 10560
},
{
"epoch": 0.7800634078006341,
"grad_norm": 0.3822407126426697,
"learning_rate": 2.2001032220010325e-06,
"loss": 2.3432,
"step": 10580
},
{
"epoch": 0.7815380078153801,
"grad_norm": 0.36346620321273804,
"learning_rate": 2.1853572218535725e-06,
"loss": 2.3525,
"step": 10600
},
{
"epoch": 0.783012607830126,
"grad_norm": 0.3793860375881195,
"learning_rate": 2.1706112217061125e-06,
"loss": 2.3372,
"step": 10620
},
{
"epoch": 0.784487207844872,
"grad_norm": 0.36953747272491455,
"learning_rate": 2.1558652215586525e-06,
"loss": 2.3496,
"step": 10640
},
{
"epoch": 0.7859618078596181,
"grad_norm": 0.38170909881591797,
"learning_rate": 2.1411192214111924e-06,
"loss": 2.3468,
"step": 10660
},
{
"epoch": 0.7874364078743641,
"grad_norm": 0.3705514073371887,
"learning_rate": 2.1263732212637324e-06,
"loss": 2.3288,
"step": 10680
},
{
"epoch": 0.7889110078891101,
"grad_norm": 0.3680097460746765,
"learning_rate": 2.1116272211162724e-06,
"loss": 2.3361,
"step": 10700
},
{
"epoch": 0.790385607903856,
"grad_norm": 0.35381895303726196,
"learning_rate": 2.0968812209688124e-06,
"loss": 2.3523,
"step": 10720
},
{
"epoch": 0.7918602079186021,
"grad_norm": 0.3935386836528778,
"learning_rate": 2.0821352208213524e-06,
"loss": 2.3489,
"step": 10740
},
{
"epoch": 0.7933348079333481,
"grad_norm": 0.4096679985523224,
"learning_rate": 2.0673892206738924e-06,
"loss": 2.3416,
"step": 10760
},
{
"epoch": 0.7948094079480941,
"grad_norm": 0.37470847368240356,
"learning_rate": 2.0526432205264324e-06,
"loss": 2.3371,
"step": 10780
},
{
"epoch": 0.79628400796284,
"grad_norm": 0.34806546568870544,
"learning_rate": 2.0378972203789724e-06,
"loss": 2.342,
"step": 10800
},
{
"epoch": 0.7977586079775861,
"grad_norm": 0.39850929379463196,
"learning_rate": 2.0231512202315124e-06,
"loss": 2.3342,
"step": 10820
},
{
"epoch": 0.7992332079923321,
"grad_norm": 0.3578292429447174,
"learning_rate": 2.0084052200840523e-06,
"loss": 2.3459,
"step": 10840
},
{
"epoch": 0.8007078080070781,
"grad_norm": 0.38795024156570435,
"learning_rate": 1.9936592199365923e-06,
"loss": 2.345,
"step": 10860
},
{
"epoch": 0.8021824080218241,
"grad_norm": 0.43707677721977234,
"learning_rate": 1.9789132197891323e-06,
"loss": 2.3461,
"step": 10880
},
{
"epoch": 0.80365700803657,
"grad_norm": 0.38418900966644287,
"learning_rate": 1.9641672196416723e-06,
"loss": 2.342,
"step": 10900
},
{
"epoch": 0.8051316080513161,
"grad_norm": 0.3943639099597931,
"learning_rate": 1.9494212194942123e-06,
"loss": 2.35,
"step": 10920
},
{
"epoch": 0.8066062080660621,
"grad_norm": 0.36821678280830383,
"learning_rate": 1.9346752193467523e-06,
"loss": 2.3396,
"step": 10940
},
{
"epoch": 0.8080808080808081,
"grad_norm": 0.39869850873947144,
"learning_rate": 1.9199292191992923e-06,
"loss": 2.3631,
"step": 10960
},
{
"epoch": 0.809555408095554,
"grad_norm": 0.3753523528575897,
"learning_rate": 1.9051832190518325e-06,
"loss": 2.3482,
"step": 10980
},
{
"epoch": 0.8110300081103001,
"grad_norm": 0.4102868139743805,
"learning_rate": 1.8904372189043725e-06,
"loss": 2.3503,
"step": 11000
},
{
"epoch": 0.8125046081250461,
"grad_norm": 0.37794923782348633,
"learning_rate": 1.8756912187569122e-06,
"loss": 2.334,
"step": 11020
},
{
"epoch": 0.8139792081397921,
"grad_norm": 0.3869600296020508,
"learning_rate": 1.8609452186094522e-06,
"loss": 2.3432,
"step": 11040
},
{
"epoch": 0.815453808154538,
"grad_norm": 0.3850298821926117,
"learning_rate": 1.8461992184619922e-06,
"loss": 2.3408,
"step": 11060
},
{
"epoch": 0.8169284081692841,
"grad_norm": 0.36297619342803955,
"learning_rate": 1.8314532183145322e-06,
"loss": 2.3476,
"step": 11080
},
{
"epoch": 0.8184030081840301,
"grad_norm": 0.3866339325904846,
"learning_rate": 1.8167072181670722e-06,
"loss": 2.3553,
"step": 11100
},
{
"epoch": 0.8198776081987761,
"grad_norm": 0.41206780076026917,
"learning_rate": 1.8019612180196122e-06,
"loss": 2.3523,
"step": 11120
},
{
"epoch": 0.8213522082135221,
"grad_norm": 0.35867977142333984,
"learning_rate": 1.7872152178721524e-06,
"loss": 2.3478,
"step": 11140
},
{
"epoch": 0.822826808228268,
"grad_norm": 0.3772488236427307,
"learning_rate": 1.7724692177246924e-06,
"loss": 2.3411,
"step": 11160
},
{
"epoch": 0.8243014082430141,
"grad_norm": 0.4143722355365753,
"learning_rate": 1.7577232175772324e-06,
"loss": 2.3466,
"step": 11180
},
{
"epoch": 0.8257760082577601,
"grad_norm": 0.3593420684337616,
"learning_rate": 1.7429772174297724e-06,
"loss": 2.3542,
"step": 11200
},
{
"epoch": 0.8272506082725061,
"grad_norm": 0.3915468454360962,
"learning_rate": 1.7282312172823124e-06,
"loss": 2.3357,
"step": 11220
},
{
"epoch": 0.828725208287252,
"grad_norm": 0.3736458718776703,
"learning_rate": 1.7134852171348523e-06,
"loss": 2.3371,
"step": 11240
},
{
"epoch": 0.8301998083019981,
"grad_norm": 0.3699648976325989,
"learning_rate": 1.6987392169873923e-06,
"loss": 2.3514,
"step": 11260
},
{
"epoch": 0.8316744083167441,
"grad_norm": 0.37435752153396606,
"learning_rate": 1.6839932168399323e-06,
"loss": 2.3535,
"step": 11280
},
{
"epoch": 0.8331490083314901,
"grad_norm": 0.4178158640861511,
"learning_rate": 1.6692472166924723e-06,
"loss": 2.3212,
"step": 11300
},
{
"epoch": 0.8346236083462361,
"grad_norm": 0.38455960154533386,
"learning_rate": 1.6545012165450123e-06,
"loss": 2.3473,
"step": 11320
},
{
"epoch": 0.836098208360982,
"grad_norm": 0.3831127882003784,
"learning_rate": 1.6397552163975523e-06,
"loss": 2.3565,
"step": 11340
},
{
"epoch": 0.8375728083757281,
"grad_norm": 0.4265178442001343,
"learning_rate": 1.625009216250092e-06,
"loss": 2.341,
"step": 11360
},
{
"epoch": 0.8390474083904741,
"grad_norm": 0.3953835666179657,
"learning_rate": 1.6102632161026325e-06,
"loss": 2.3405,
"step": 11380
},
{
"epoch": 0.8405220084052201,
"grad_norm": 0.39547884464263916,
"learning_rate": 1.5955172159551725e-06,
"loss": 2.3385,
"step": 11400
},
{
"epoch": 0.841996608419966,
"grad_norm": 0.352119505405426,
"learning_rate": 1.5807712158077125e-06,
"loss": 2.3277,
"step": 11420
},
{
"epoch": 0.8434712084347121,
"grad_norm": 0.4068032205104828,
"learning_rate": 1.5660252156602522e-06,
"loss": 2.3498,
"step": 11440
},
{
"epoch": 0.8449458084494581,
"grad_norm": 0.4023716151714325,
"learning_rate": 1.5512792155127922e-06,
"loss": 2.373,
"step": 11460
},
{
"epoch": 0.8464204084642041,
"grad_norm": 0.36872199177742004,
"learning_rate": 1.5365332153653322e-06,
"loss": 2.346,
"step": 11480
},
{
"epoch": 0.84789500847895,
"grad_norm": 0.37465929985046387,
"learning_rate": 1.5217872152178722e-06,
"loss": 2.3464,
"step": 11500
},
{
"epoch": 0.8493696084936961,
"grad_norm": 0.37545257806777954,
"learning_rate": 1.5070412150704122e-06,
"loss": 2.3493,
"step": 11520
},
{
"epoch": 0.8508442085084421,
"grad_norm": 0.36542752385139465,
"learning_rate": 1.4922952149229522e-06,
"loss": 2.348,
"step": 11540
},
{
"epoch": 0.8523188085231881,
"grad_norm": 0.36369800567626953,
"learning_rate": 1.4775492147754922e-06,
"loss": 2.3648,
"step": 11560
},
{
"epoch": 0.8537934085379341,
"grad_norm": 0.3845520615577698,
"learning_rate": 1.4628032146280322e-06,
"loss": 2.3521,
"step": 11580
},
{
"epoch": 0.85526800855268,
"grad_norm": 0.3865341246128082,
"learning_rate": 1.4480572144805722e-06,
"loss": 2.3442,
"step": 11600
},
{
"epoch": 0.8567426085674261,
"grad_norm": 0.37881407141685486,
"learning_rate": 1.4333112143331124e-06,
"loss": 2.3469,
"step": 11620
},
{
"epoch": 0.8582172085821721,
"grad_norm": 0.38905656337738037,
"learning_rate": 1.4185652141856523e-06,
"loss": 2.357,
"step": 11640
},
{
"epoch": 0.8596918085969181,
"grad_norm": 0.40676259994506836,
"learning_rate": 1.4038192140381923e-06,
"loss": 2.34,
"step": 11660
},
{
"epoch": 0.861166408611664,
"grad_norm": 0.3595060110092163,
"learning_rate": 1.3890732138907323e-06,
"loss": 2.3351,
"step": 11680
},
{
"epoch": 0.8626410086264101,
"grad_norm": 0.39331743121147156,
"learning_rate": 1.3743272137432723e-06,
"loss": 2.3463,
"step": 11700
},
{
"epoch": 0.8641156086411561,
"grad_norm": 0.3540342450141907,
"learning_rate": 1.3595812135958123e-06,
"loss": 2.3438,
"step": 11720
},
{
"epoch": 0.8655902086559021,
"grad_norm": 0.40179315209388733,
"learning_rate": 1.3448352134483523e-06,
"loss": 2.3518,
"step": 11740
},
{
"epoch": 0.8670648086706481,
"grad_norm": 0.3446930944919586,
"learning_rate": 1.3300892133008923e-06,
"loss": 2.3446,
"step": 11760
},
{
"epoch": 0.868539408685394,
"grad_norm": 0.3802293539047241,
"learning_rate": 1.3153432131534323e-06,
"loss": 2.3442,
"step": 11780
},
{
"epoch": 0.8700140087001401,
"grad_norm": 0.4139614403247833,
"learning_rate": 1.300597213005972e-06,
"loss": 2.3283,
"step": 11800
},
{
"epoch": 0.8714886087148861,
"grad_norm": 0.36472469568252563,
"learning_rate": 1.285851212858512e-06,
"loss": 2.3493,
"step": 11820
},
{
"epoch": 0.8729632087296321,
"grad_norm": 0.36495742201805115,
"learning_rate": 1.2711052127110524e-06,
"loss": 2.3424,
"step": 11840
},
{
"epoch": 0.874437808744378,
"grad_norm": 0.3818816840648651,
"learning_rate": 1.2563592125635924e-06,
"loss": 2.355,
"step": 11860
},
{
"epoch": 0.8759124087591241,
"grad_norm": 0.4109640419483185,
"learning_rate": 1.2416132124161322e-06,
"loss": 2.3367,
"step": 11880
},
{
"epoch": 0.8773870087738701,
"grad_norm": 0.38012924790382385,
"learning_rate": 1.2268672122686722e-06,
"loss": 2.342,
"step": 11900
},
{
"epoch": 0.8788616087886161,
"grad_norm": 0.4005228877067566,
"learning_rate": 1.2121212121212122e-06,
"loss": 2.3479,
"step": 11920
},
{
"epoch": 0.880336208803362,
"grad_norm": 0.3996869623661041,
"learning_rate": 1.1973752119737522e-06,
"loss": 2.3539,
"step": 11940
},
{
"epoch": 0.8818108088181081,
"grad_norm": 0.4269565939903259,
"learning_rate": 1.1826292118262922e-06,
"loss": 2.3413,
"step": 11960
},
{
"epoch": 0.8832854088328541,
"grad_norm": 0.3505631983280182,
"learning_rate": 1.1678832116788322e-06,
"loss": 2.3542,
"step": 11980
},
{
"epoch": 0.8847600088476001,
"grad_norm": 0.3690703213214874,
"learning_rate": 1.1531372115313722e-06,
"loss": 2.3461,
"step": 12000
},
{
"epoch": 0.8862346088623461,
"grad_norm": 0.40082600712776184,
"learning_rate": 1.1383912113839124e-06,
"loss": 2.3473,
"step": 12020
},
{
"epoch": 0.887709208877092,
"grad_norm": 0.3835630714893341,
"learning_rate": 1.1236452112364521e-06,
"loss": 2.3459,
"step": 12040
},
{
"epoch": 0.8891838088918381,
"grad_norm": 0.41259172558784485,
"learning_rate": 1.1088992110889921e-06,
"loss": 2.3486,
"step": 12060
},
{
"epoch": 0.8906584089065841,
"grad_norm": 0.40323716402053833,
"learning_rate": 1.0941532109415321e-06,
"loss": 2.3371,
"step": 12080
},
{
"epoch": 0.8921330089213301,
"grad_norm": 0.36476635932922363,
"learning_rate": 1.079407210794072e-06,
"loss": 2.3514,
"step": 12100
},
{
"epoch": 0.893607608936076,
"grad_norm": 0.38329634070396423,
"learning_rate": 1.064661210646612e-06,
"loss": 2.3238,
"step": 12120
},
{
"epoch": 0.8950822089508221,
"grad_norm": 0.38826659321784973,
"learning_rate": 1.0499152104991523e-06,
"loss": 2.3481,
"step": 12140
},
{
"epoch": 0.8965568089655681,
"grad_norm": 0.43189194798469543,
"learning_rate": 1.0351692103516923e-06,
"loss": 2.3562,
"step": 12160
},
{
"epoch": 0.8980314089803141,
"grad_norm": 0.3478831946849823,
"learning_rate": 1.0204232102042323e-06,
"loss": 2.3451,
"step": 12180
},
{
"epoch": 0.8995060089950601,
"grad_norm": 0.3830432891845703,
"learning_rate": 1.0056772100567723e-06,
"loss": 2.3351,
"step": 12200
},
{
"epoch": 0.9009806090098061,
"grad_norm": 0.3779089152812958,
"learning_rate": 9.90931209909312e-07,
"loss": 2.3421,
"step": 12220
},
{
"epoch": 0.9024552090245521,
"grad_norm": 0.37689492106437683,
"learning_rate": 9.761852097618522e-07,
"loss": 2.3456,
"step": 12240
},
{
"epoch": 0.9039298090392981,
"grad_norm": 0.38662415742874146,
"learning_rate": 9.614392096143922e-07,
"loss": 2.3414,
"step": 12260
},
{
"epoch": 0.9054044090540441,
"grad_norm": 0.3387671411037445,
"learning_rate": 9.466932094669322e-07,
"loss": 2.3408,
"step": 12280
},
{
"epoch": 0.90687900906879,
"grad_norm": 0.37837815284729004,
"learning_rate": 9.319472093194722e-07,
"loss": 2.3413,
"step": 12300
},
{
"epoch": 0.9083536090835361,
"grad_norm": 0.35393086075782776,
"learning_rate": 9.172012091720121e-07,
"loss": 2.348,
"step": 12320
},
{
"epoch": 0.9098282090982821,
"grad_norm": 0.4034173786640167,
"learning_rate": 9.024552090245521e-07,
"loss": 2.3728,
"step": 12340
},
{
"epoch": 0.9113028091130281,
"grad_norm": 0.37714606523513794,
"learning_rate": 8.877092088770922e-07,
"loss": 2.3473,
"step": 12360
},
{
"epoch": 0.912777409127774,
"grad_norm": 0.41862747073173523,
"learning_rate": 8.729632087296322e-07,
"loss": 2.3446,
"step": 12380
},
{
"epoch": 0.9142520091425201,
"grad_norm": 0.3840957283973694,
"learning_rate": 8.582172085821722e-07,
"loss": 2.3374,
"step": 12400
},
{
"epoch": 0.9157266091572661,
"grad_norm": 0.3914671838283539,
"learning_rate": 8.434712084347121e-07,
"loss": 2.3307,
"step": 12420
},
{
"epoch": 0.9172012091720121,
"grad_norm": 0.38131779432296753,
"learning_rate": 8.287252082872521e-07,
"loss": 2.3293,
"step": 12440
},
{
"epoch": 0.9186758091867581,
"grad_norm": 0.35891827940940857,
"learning_rate": 8.139792081397921e-07,
"loss": 2.3502,
"step": 12460
},
{
"epoch": 0.920150409201504,
"grad_norm": 0.3831949532032013,
"learning_rate": 7.992332079923322e-07,
"loss": 2.3375,
"step": 12480
},
{
"epoch": 0.9216250092162501,
"grad_norm": 0.3593050241470337,
"learning_rate": 7.844872078448722e-07,
"loss": 2.3435,
"step": 12500
},
{
"epoch": 0.9230996092309961,
"grad_norm": 0.4091593027114868,
"learning_rate": 7.697412076974122e-07,
"loss": 2.3461,
"step": 12520
},
{
"epoch": 0.9245742092457421,
"grad_norm": 0.3784448206424713,
"learning_rate": 7.549952075499521e-07,
"loss": 2.352,
"step": 12540
},
{
"epoch": 0.926048809260488,
"grad_norm": 0.3828209936618805,
"learning_rate": 7.402492074024921e-07,
"loss": 2.3456,
"step": 12560
},
{
"epoch": 0.9275234092752341,
"grad_norm": 0.3510684072971344,
"learning_rate": 7.255032072550321e-07,
"loss": 2.3501,
"step": 12580
},
{
"epoch": 0.9289980092899801,
"grad_norm": 0.3757297694683075,
"learning_rate": 7.107572071075722e-07,
"loss": 2.3365,
"step": 12600
},
{
"epoch": 0.9304726093047261,
"grad_norm": 0.38082218170166016,
"learning_rate": 6.960112069601121e-07,
"loss": 2.3467,
"step": 12620
},
{
"epoch": 0.9319472093194721,
"grad_norm": 0.3628241717815399,
"learning_rate": 6.812652068126521e-07,
"loss": 2.3403,
"step": 12640
},
{
"epoch": 0.9334218093342181,
"grad_norm": 0.3504602611064911,
"learning_rate": 6.665192066651921e-07,
"loss": 2.3363,
"step": 12660
},
{
"epoch": 0.9348964093489641,
"grad_norm": 0.38298091292381287,
"learning_rate": 6.517732065177321e-07,
"loss": 2.3473,
"step": 12680
},
{
"epoch": 0.9363710093637101,
"grad_norm": 0.36593878269195557,
"learning_rate": 6.370272063702722e-07,
"loss": 2.3467,
"step": 12700
},
{
"epoch": 0.9378456093784561,
"grad_norm": 0.3678169250488281,
"learning_rate": 6.222812062228121e-07,
"loss": 2.3318,
"step": 12720
},
{
"epoch": 0.939320209393202,
"grad_norm": 0.36956238746643066,
"learning_rate": 6.075352060753521e-07,
"loss": 2.3183,
"step": 12740
},
{
"epoch": 0.9407948094079481,
"grad_norm": 0.34040504693984985,
"learning_rate": 5.927892059278921e-07,
"loss": 2.3532,
"step": 12760
},
{
"epoch": 0.9422694094226941,
"grad_norm": 0.3800647258758545,
"learning_rate": 5.780432057804321e-07,
"loss": 2.346,
"step": 12780
},
{
"epoch": 0.9437440094374401,
"grad_norm": 0.376245379447937,
"learning_rate": 5.632972056329722e-07,
"loss": 2.3461,
"step": 12800
},
{
"epoch": 0.945218609452186,
"grad_norm": 0.39852550625801086,
"learning_rate": 5.485512054855121e-07,
"loss": 2.3454,
"step": 12820
},
{
"epoch": 0.9466932094669321,
"grad_norm": 0.3686327338218689,
"learning_rate": 5.33805205338052e-07,
"loss": 2.3398,
"step": 12840
},
{
"epoch": 0.9481678094816781,
"grad_norm": 0.44276946783065796,
"learning_rate": 5.190592051905921e-07,
"loss": 2.3512,
"step": 12860
},
{
"epoch": 0.9496424094964241,
"grad_norm": 0.3805672824382782,
"learning_rate": 5.043132050431321e-07,
"loss": 2.3428,
"step": 12880
},
{
"epoch": 0.9511170095111701,
"grad_norm": 0.3791063129901886,
"learning_rate": 4.895672048956721e-07,
"loss": 2.3548,
"step": 12900
},
{
"epoch": 0.9525916095259161,
"grad_norm": 0.35641756653785706,
"learning_rate": 4.748212047482121e-07,
"loss": 2.3361,
"step": 12920
},
{
"epoch": 0.9540662095406621,
"grad_norm": 0.41041725873947144,
"learning_rate": 4.600752046007521e-07,
"loss": 2.3399,
"step": 12940
},
{
"epoch": 0.9555408095554081,
"grad_norm": 0.3670080304145813,
"learning_rate": 4.453292044532921e-07,
"loss": 2.3488,
"step": 12960
},
{
"epoch": 0.9570154095701541,
"grad_norm": 0.4006000757217407,
"learning_rate": 4.305832043058321e-07,
"loss": 2.3352,
"step": 12980
},
{
"epoch": 0.9584900095849,
"grad_norm": 0.3790174424648285,
"learning_rate": 4.1583720415837205e-07,
"loss": 2.3378,
"step": 13000
},
{
"epoch": 0.9599646095996461,
"grad_norm": 0.39817535877227783,
"learning_rate": 4.010912040109121e-07,
"loss": 2.3444,
"step": 13020
},
{
"epoch": 0.9614392096143921,
"grad_norm": 0.39086398482322693,
"learning_rate": 3.863452038634521e-07,
"loss": 2.3446,
"step": 13040
},
{
"epoch": 0.9629138096291381,
"grad_norm": 0.34894123673439026,
"learning_rate": 3.7159920371599207e-07,
"loss": 2.3554,
"step": 13060
},
{
"epoch": 0.9643884096438841,
"grad_norm": 0.37704578042030334,
"learning_rate": 3.568532035685321e-07,
"loss": 2.3389,
"step": 13080
},
{
"epoch": 0.9658630096586301,
"grad_norm": 0.4316023588180542,
"learning_rate": 3.4210720342107205e-07,
"loss": 2.339,
"step": 13100
},
{
"epoch": 0.9673376096733761,
"grad_norm": 0.38732075691223145,
"learning_rate": 3.2736120327361203e-07,
"loss": 2.3363,
"step": 13120
},
{
"epoch": 0.9688122096881221,
"grad_norm": 0.451651394367218,
"learning_rate": 3.126152031261521e-07,
"loss": 2.3435,
"step": 13140
},
{
"epoch": 0.9702868097028681,
"grad_norm": 0.41907384991645813,
"learning_rate": 2.9786920297869207e-07,
"loss": 2.3503,
"step": 13160
},
{
"epoch": 0.971761409717614,
"grad_norm": 0.3828534185886383,
"learning_rate": 2.8312320283123206e-07,
"loss": 2.364,
"step": 13180
},
{
"epoch": 0.9732360097323601,
"grad_norm": 0.3919944763183594,
"learning_rate": 2.6837720268377204e-07,
"loss": 2.3461,
"step": 13200
},
{
"epoch": 0.9747106097471061,
"grad_norm": 0.3798130452632904,
"learning_rate": 2.5363120253631203e-07,
"loss": 2.3376,
"step": 13220
},
{
"epoch": 0.9761852097618521,
"grad_norm": 0.3798838257789612,
"learning_rate": 2.388852023888521e-07,
"loss": 2.3473,
"step": 13240
},
{
"epoch": 0.977659809776598,
"grad_norm": 0.39622053503990173,
"learning_rate": 2.24139202241392e-07,
"loss": 2.3545,
"step": 13260
},
{
"epoch": 0.9791344097913441,
"grad_norm": 0.4032035768032074,
"learning_rate": 2.0939320209393203e-07,
"loss": 2.3571,
"step": 13280
},
{
"epoch": 0.9806090098060901,
"grad_norm": 0.38117462396621704,
"learning_rate": 1.9464720194647204e-07,
"loss": 2.3389,
"step": 13300
},
{
"epoch": 0.9820836098208361,
"grad_norm": 0.38328099250793457,
"learning_rate": 1.7990120179901206e-07,
"loss": 2.3606,
"step": 13320
},
{
"epoch": 0.9835582098355821,
"grad_norm": 0.3704865276813507,
"learning_rate": 1.6515520165155202e-07,
"loss": 2.344,
"step": 13340
},
{
"epoch": 0.9850328098503282,
"grad_norm": 0.3868178427219391,
"learning_rate": 1.5040920150409204e-07,
"loss": 2.3395,
"step": 13360
},
{
"epoch": 0.9865074098650741,
"grad_norm": 0.3584325313568115,
"learning_rate": 1.3566320135663203e-07,
"loss": 2.3508,
"step": 13380
},
{
"epoch": 0.9879820098798201,
"grad_norm": 0.389412522315979,
"learning_rate": 1.2091720120917202e-07,
"loss": 2.3484,
"step": 13400
},
{
"epoch": 0.9894566098945661,
"grad_norm": 0.3739551603794098,
"learning_rate": 1.0617120106171202e-07,
"loss": 2.3594,
"step": 13420
},
{
"epoch": 0.990931209909312,
"grad_norm": 0.3698684871196747,
"learning_rate": 9.142520091425201e-08,
"loss": 2.3275,
"step": 13440
},
{
"epoch": 0.9924058099240581,
"grad_norm": 0.4226621687412262,
"learning_rate": 7.667920076679201e-08,
"loss": 2.3412,
"step": 13460
},
{
"epoch": 0.9938804099388041,
"grad_norm": 0.4069538414478302,
"learning_rate": 6.1933200619332e-08,
"loss": 2.3561,
"step": 13480
},
{
"epoch": 0.9953550099535501,
"grad_norm": 0.36342284083366394,
"learning_rate": 4.7187200471872005e-08,
"loss": 2.3422,
"step": 13500
},
{
"epoch": 0.9968296099682961,
"grad_norm": 0.38066765666007996,
"learning_rate": 3.244120032441201e-08,
"loss": 2.3472,
"step": 13520
},
{
"epoch": 0.9983042099830421,
"grad_norm": 0.4299827218055725,
"learning_rate": 1.7695200176952003e-08,
"loss": 2.3462,
"step": 13540
},
{
"epoch": 0.9997788099977881,
"grad_norm": 0.4048897325992584,
"learning_rate": 2.9492000294920003e-09,
"loss": 2.3342,
"step": 13560
},
{
"epoch": 1.0,
"step": 13563,
"total_flos": 5.80008589556659e+18,
"train_loss": 2.354225961330374,
"train_runtime": 12020.9549,
"train_samples_per_second": 216.628,
"train_steps_per_second": 1.128
}
],
"logging_steps": 20,
"max_steps": 13563,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.80008589556659e+18,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}