| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 13563, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014746000147460002, | |
| "grad_norm": 0.4404066503047943, | |
| "learning_rate": 9.985991299859913e-06, | |
| "loss": 2.4466, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0029492000294920003, | |
| "grad_norm": 0.5653194785118103, | |
| "learning_rate": 9.971245299712454e-06, | |
| "loss": 2.4307, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004423800044238001, | |
| "grad_norm": 0.4618721902370453, | |
| "learning_rate": 9.956499299564993e-06, | |
| "loss": 2.4367, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005898400058984001, | |
| "grad_norm": 0.4391142427921295, | |
| "learning_rate": 9.941753299417534e-06, | |
| "loss": 2.4183, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.007373000073730001, | |
| "grad_norm": 0.474587619304657, | |
| "learning_rate": 9.927007299270073e-06, | |
| "loss": 2.42, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008847600088476001, | |
| "grad_norm": 0.420256644487381, | |
| "learning_rate": 9.912261299122614e-06, | |
| "loss": 2.4164, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.010322200103222, | |
| "grad_norm": 0.40069517493247986, | |
| "learning_rate": 9.897515298975153e-06, | |
| "loss": 2.3962, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.011796800117968001, | |
| "grad_norm": 0.36043497920036316, | |
| "learning_rate": 9.882769298827694e-06, | |
| "loss": 2.4026, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.013271400132714002, | |
| "grad_norm": 0.354769229888916, | |
| "learning_rate": 9.868023298680233e-06, | |
| "loss": 2.4013, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.014746000147460001, | |
| "grad_norm": 0.40864863991737366, | |
| "learning_rate": 9.853277298532774e-06, | |
| "loss": 2.4077, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.016220600162206, | |
| "grad_norm": 0.37636885046958923, | |
| "learning_rate": 9.838531298385315e-06, | |
| "loss": 2.4058, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.017695200176952003, | |
| "grad_norm": 0.37770289182662964, | |
| "learning_rate": 9.823785298237854e-06, | |
| "loss": 2.3789, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.019169800191698002, | |
| "grad_norm": 0.4997946321964264, | |
| "learning_rate": 9.809039298090393e-06, | |
| "loss": 2.3921, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.020644400206444, | |
| "grad_norm": 0.42146745324134827, | |
| "learning_rate": 9.794293297942934e-06, | |
| "loss": 2.389, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.022119000221190004, | |
| "grad_norm": 0.31817182898521423, | |
| "learning_rate": 9.779547297795475e-06, | |
| "loss": 2.3969, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.023593600235936003, | |
| "grad_norm": 0.41462913155555725, | |
| "learning_rate": 9.764801297648014e-06, | |
| "loss": 2.3903, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.025068200250682002, | |
| "grad_norm": 0.35277777910232544, | |
| "learning_rate": 9.750055297500553e-06, | |
| "loss": 2.3886, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.026542800265428004, | |
| "grad_norm": 0.39531123638153076, | |
| "learning_rate": 9.735309297353094e-06, | |
| "loss": 2.3847, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.028017400280174003, | |
| "grad_norm": 0.3687122166156769, | |
| "learning_rate": 9.720563297205633e-06, | |
| "loss": 2.3824, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.029492000294920002, | |
| "grad_norm": 0.5362212061882019, | |
| "learning_rate": 9.705817297058172e-06, | |
| "loss": 2.3861, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.030966600309666, | |
| "grad_norm": 0.5793041586875916, | |
| "learning_rate": 9.691071296910713e-06, | |
| "loss": 2.4069, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.032441200324412, | |
| "grad_norm": 0.3618432581424713, | |
| "learning_rate": 9.676325296763254e-06, | |
| "loss": 2.379, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.033915800339158, | |
| "grad_norm": 0.39441928267478943, | |
| "learning_rate": 9.661579296615793e-06, | |
| "loss": 2.3878, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.035390400353904006, | |
| "grad_norm": 0.3998846113681793, | |
| "learning_rate": 9.646833296468334e-06, | |
| "loss": 2.4028, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03686500036865, | |
| "grad_norm": 0.354809045791626, | |
| "learning_rate": 9.632087296320873e-06, | |
| "loss": 2.384, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.038339600383396004, | |
| "grad_norm": 0.6007148623466492, | |
| "learning_rate": 9.617341296173414e-06, | |
| "loss": 2.3974, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.039814200398142006, | |
| "grad_norm": 0.37758493423461914, | |
| "learning_rate": 9.602595296025953e-06, | |
| "loss": 2.3762, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.041288800412888, | |
| "grad_norm": 0.4141014516353607, | |
| "learning_rate": 9.587849295878494e-06, | |
| "loss": 2.3831, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.042763400427634005, | |
| "grad_norm": 0.48960772156715393, | |
| "learning_rate": 9.573103295731033e-06, | |
| "loss": 2.3961, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04423800044238001, | |
| "grad_norm": 0.3811470568180084, | |
| "learning_rate": 9.558357295583574e-06, | |
| "loss": 2.3751, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.045712600457126, | |
| "grad_norm": 0.35424861311912537, | |
| "learning_rate": 9.543611295436113e-06, | |
| "loss": 2.3725, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.047187200471872005, | |
| "grad_norm": 0.36190420389175415, | |
| "learning_rate": 9.528865295288654e-06, | |
| "loss": 2.3944, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.04866180048661801, | |
| "grad_norm": 0.42110538482666016, | |
| "learning_rate": 9.514119295141195e-06, | |
| "loss": 2.3909, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.050136400501364004, | |
| "grad_norm": 0.4361230134963989, | |
| "learning_rate": 9.499373294993734e-06, | |
| "loss": 2.3752, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.051611000516110006, | |
| "grad_norm": 0.3302260935306549, | |
| "learning_rate": 9.484627294846273e-06, | |
| "loss": 2.3699, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05308560053085601, | |
| "grad_norm": 0.3521312177181244, | |
| "learning_rate": 9.469881294698814e-06, | |
| "loss": 2.3772, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.054560200545602004, | |
| "grad_norm": 0.3700699806213379, | |
| "learning_rate": 9.455135294551355e-06, | |
| "loss": 2.3751, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.05603480056034801, | |
| "grad_norm": 0.3583256006240845, | |
| "learning_rate": 9.440389294403894e-06, | |
| "loss": 2.3675, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.057509400575094, | |
| "grad_norm": 0.33290043473243713, | |
| "learning_rate": 9.425643294256433e-06, | |
| "loss": 2.3784, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.058984000589840005, | |
| "grad_norm": 0.3490481674671173, | |
| "learning_rate": 9.410897294108974e-06, | |
| "loss": 2.3597, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06045860060458601, | |
| "grad_norm": 0.479775071144104, | |
| "learning_rate": 9.396151293961515e-06, | |
| "loss": 2.3852, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.061933200619332, | |
| "grad_norm": 0.36794590950012207, | |
| "learning_rate": 9.381405293814054e-06, | |
| "loss": 2.3748, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.06340780063407801, | |
| "grad_norm": 0.38288605213165283, | |
| "learning_rate": 9.366659293666593e-06, | |
| "loss": 2.3673, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.064882400648824, | |
| "grad_norm": 0.40629106760025024, | |
| "learning_rate": 9.351913293519134e-06, | |
| "loss": 2.3906, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.06635700066357, | |
| "grad_norm": 0.3594074249267578, | |
| "learning_rate": 9.337167293371675e-06, | |
| "loss": 2.3773, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.067831600678316, | |
| "grad_norm": 0.463144451379776, | |
| "learning_rate": 9.322421293224214e-06, | |
| "loss": 2.3846, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.06930620069306201, | |
| "grad_norm": 0.35561177134513855, | |
| "learning_rate": 9.307675293076753e-06, | |
| "loss": 2.3723, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.07078080070780801, | |
| "grad_norm": 0.3446957767009735, | |
| "learning_rate": 9.292929292929294e-06, | |
| "loss": 2.4011, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.07225540072255401, | |
| "grad_norm": 0.5307037830352783, | |
| "learning_rate": 9.278183292781835e-06, | |
| "loss": 2.3747, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.0737300007373, | |
| "grad_norm": 0.3605501055717468, | |
| "learning_rate": 9.263437292634374e-06, | |
| "loss": 2.3523, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.075204600752046, | |
| "grad_norm": 0.3705900013446808, | |
| "learning_rate": 9.248691292486913e-06, | |
| "loss": 2.3693, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.07667920076679201, | |
| "grad_norm": 0.3397590219974518, | |
| "learning_rate": 9.233945292339454e-06, | |
| "loss": 2.362, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.07815380078153801, | |
| "grad_norm": 0.32325974106788635, | |
| "learning_rate": 9.219199292191993e-06, | |
| "loss": 2.3863, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.07962840079628401, | |
| "grad_norm": 0.3661843240261078, | |
| "learning_rate": 9.204453292044534e-06, | |
| "loss": 2.3769, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.08110300081103, | |
| "grad_norm": 0.3777139186859131, | |
| "learning_rate": 9.189707291897075e-06, | |
| "loss": 2.3626, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.082577600825776, | |
| "grad_norm": 0.37038654088974, | |
| "learning_rate": 9.174961291749614e-06, | |
| "loss": 2.3895, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.084052200840522, | |
| "grad_norm": 0.3628358542919159, | |
| "learning_rate": 9.160215291602153e-06, | |
| "loss": 2.3649, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.08552680085526801, | |
| "grad_norm": 0.3579985201358795, | |
| "learning_rate": 9.145469291454694e-06, | |
| "loss": 2.3696, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.08700140087001401, | |
| "grad_norm": 0.42436483502388, | |
| "learning_rate": 9.130723291307235e-06, | |
| "loss": 2.3845, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.08847600088476001, | |
| "grad_norm": 0.3851209580898285, | |
| "learning_rate": 9.115977291159774e-06, | |
| "loss": 2.36, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.089950600899506, | |
| "grad_norm": 0.5885825157165527, | |
| "learning_rate": 9.101231291012313e-06, | |
| "loss": 2.3776, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.091425200914252, | |
| "grad_norm": 0.42304036021232605, | |
| "learning_rate": 9.086485290864854e-06, | |
| "loss": 2.3632, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.09289980092899801, | |
| "grad_norm": 0.4534706473350525, | |
| "learning_rate": 9.071739290717394e-06, | |
| "loss": 2.3531, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.09437440094374401, | |
| "grad_norm": 0.38540026545524597, | |
| "learning_rate": 9.056993290569934e-06, | |
| "loss": 2.3755, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.09584900095849001, | |
| "grad_norm": 0.38401320576667786, | |
| "learning_rate": 9.042247290422473e-06, | |
| "loss": 2.3614, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.09732360097323602, | |
| "grad_norm": 0.35363873839378357, | |
| "learning_rate": 9.027501290275014e-06, | |
| "loss": 2.3655, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.098798200987982, | |
| "grad_norm": 0.36643800139427185, | |
| "learning_rate": 9.012755290127554e-06, | |
| "loss": 2.369, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.10027280100272801, | |
| "grad_norm": 0.3596145212650299, | |
| "learning_rate": 8.998009289980094e-06, | |
| "loss": 2.3703, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.10174740101747401, | |
| "grad_norm": 0.554706871509552, | |
| "learning_rate": 8.983263289832633e-06, | |
| "loss": 2.3631, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.10322200103222001, | |
| "grad_norm": 0.3995848298072815, | |
| "learning_rate": 8.968517289685174e-06, | |
| "loss": 2.3671, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.10469660104696601, | |
| "grad_norm": 0.3653299808502197, | |
| "learning_rate": 8.953771289537714e-06, | |
| "loss": 2.3783, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.10617120106171202, | |
| "grad_norm": 0.39819827675819397, | |
| "learning_rate": 8.939025289390254e-06, | |
| "loss": 2.3617, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.107645801076458, | |
| "grad_norm": 0.3512992262840271, | |
| "learning_rate": 8.924279289242793e-06, | |
| "loss": 2.3649, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.10912040109120401, | |
| "grad_norm": 0.43283718824386597, | |
| "learning_rate": 8.909533289095333e-06, | |
| "loss": 2.3624, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.11059500110595001, | |
| "grad_norm": 0.3857913613319397, | |
| "learning_rate": 8.894787288947874e-06, | |
| "loss": 2.3686, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.11206960112069601, | |
| "grad_norm": 0.3893970251083374, | |
| "learning_rate": 8.880041288800413e-06, | |
| "loss": 2.3722, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.11354420113544202, | |
| "grad_norm": 0.6659526228904724, | |
| "learning_rate": 8.865295288652953e-06, | |
| "loss": 2.347, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.115018801150188, | |
| "grad_norm": 0.3801191449165344, | |
| "learning_rate": 8.850549288505493e-06, | |
| "loss": 2.3595, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.11649340116493401, | |
| "grad_norm": 0.3556024432182312, | |
| "learning_rate": 8.835803288358034e-06, | |
| "loss": 2.3597, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.11796800117968001, | |
| "grad_norm": 0.45088592171669006, | |
| "learning_rate": 8.821057288210573e-06, | |
| "loss": 2.3623, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.11944260119442601, | |
| "grad_norm": 0.3952132761478424, | |
| "learning_rate": 8.806311288063114e-06, | |
| "loss": 2.3616, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.12091720120917201, | |
| "grad_norm": 0.4083469808101654, | |
| "learning_rate": 8.791565287915653e-06, | |
| "loss": 2.3597, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.12239180122391802, | |
| "grad_norm": 0.374976247549057, | |
| "learning_rate": 8.776819287768194e-06, | |
| "loss": 2.3656, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.123866401238664, | |
| "grad_norm": 0.37194111943244934, | |
| "learning_rate": 8.762073287620733e-06, | |
| "loss": 2.368, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.12534100125341002, | |
| "grad_norm": 0.35863035917282104, | |
| "learning_rate": 8.747327287473274e-06, | |
| "loss": 2.3648, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.12681560126815603, | |
| "grad_norm": 0.43522682785987854, | |
| "learning_rate": 8.732581287325813e-06, | |
| "loss": 2.3663, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.128290201282902, | |
| "grad_norm": 0.3582712411880493, | |
| "learning_rate": 8.717835287178354e-06, | |
| "loss": 2.3684, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.129764801297648, | |
| "grad_norm": 0.40035149455070496, | |
| "learning_rate": 8.703089287030893e-06, | |
| "loss": 2.3679, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.131239401312394, | |
| "grad_norm": 0.36125797033309937, | |
| "learning_rate": 8.688343286883434e-06, | |
| "loss": 2.3501, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.13271400132714, | |
| "grad_norm": 0.3568665087223053, | |
| "learning_rate": 8.673597286735973e-06, | |
| "loss": 2.3594, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.134188601341886, | |
| "grad_norm": 0.4135202169418335, | |
| "learning_rate": 8.658851286588512e-06, | |
| "loss": 2.387, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.135663201356632, | |
| "grad_norm": 0.3586069345474243, | |
| "learning_rate": 8.644105286441053e-06, | |
| "loss": 2.3702, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.13713780137137802, | |
| "grad_norm": 0.47354263067245483, | |
| "learning_rate": 8.629359286293594e-06, | |
| "loss": 2.3709, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.13861240138612402, | |
| "grad_norm": 0.3735561668872833, | |
| "learning_rate": 8.614613286146133e-06, | |
| "loss": 2.3639, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.14008700140087002, | |
| "grad_norm": 0.351646363735199, | |
| "learning_rate": 8.599867285998672e-06, | |
| "loss": 2.3678, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.14156160141561602, | |
| "grad_norm": 0.3624926507472992, | |
| "learning_rate": 8.585121285851213e-06, | |
| "loss": 2.3725, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.14303620143036203, | |
| "grad_norm": 0.41470956802368164, | |
| "learning_rate": 8.570375285703754e-06, | |
| "loss": 2.364, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.14451080144510803, | |
| "grad_norm": 0.3892461657524109, | |
| "learning_rate": 8.555629285556293e-06, | |
| "loss": 2.3623, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 0.37670454382896423, | |
| "learning_rate": 8.540883285408832e-06, | |
| "loss": 2.3649, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.1474600014746, | |
| "grad_norm": 0.36948758363723755, | |
| "learning_rate": 8.526137285261373e-06, | |
| "loss": 2.3689, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.148934601489346, | |
| "grad_norm": 0.3593231737613678, | |
| "learning_rate": 8.511391285113914e-06, | |
| "loss": 2.3693, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.150409201504092, | |
| "grad_norm": 0.3645029366016388, | |
| "learning_rate": 8.496645284966453e-06, | |
| "loss": 2.3607, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.151883801518838, | |
| "grad_norm": 0.761593759059906, | |
| "learning_rate": 8.481899284818994e-06, | |
| "loss": 2.362, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.15335840153358402, | |
| "grad_norm": 0.3621225953102112, | |
| "learning_rate": 8.467153284671533e-06, | |
| "loss": 2.3566, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.15483300154833002, | |
| "grad_norm": 0.353289395570755, | |
| "learning_rate": 8.452407284524074e-06, | |
| "loss": 2.3565, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.15630760156307602, | |
| "grad_norm": 0.4106830060482025, | |
| "learning_rate": 8.437661284376613e-06, | |
| "loss": 2.3536, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.15778220157782202, | |
| "grad_norm": 0.41519981622695923, | |
| "learning_rate": 8.422915284229154e-06, | |
| "loss": 2.3532, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.15925680159256803, | |
| "grad_norm": 0.3569793701171875, | |
| "learning_rate": 8.408169284081693e-06, | |
| "loss": 2.3648, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.16073140160731403, | |
| "grad_norm": 0.3916598856449127, | |
| "learning_rate": 8.393423283934234e-06, | |
| "loss": 2.3675, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.16220600162206, | |
| "grad_norm": 0.3435116708278656, | |
| "learning_rate": 8.378677283786773e-06, | |
| "loss": 2.3685, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.163680601636806, | |
| "grad_norm": 0.47191643714904785, | |
| "learning_rate": 8.363931283639314e-06, | |
| "loss": 2.3596, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.165155201651552, | |
| "grad_norm": 0.54694002866745, | |
| "learning_rate": 8.349185283491853e-06, | |
| "loss": 2.3551, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.166629801666298, | |
| "grad_norm": 0.9548392295837402, | |
| "learning_rate": 8.334439283344394e-06, | |
| "loss": 2.3784, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.168104401681044, | |
| "grad_norm": 0.3765574097633362, | |
| "learning_rate": 8.319693283196933e-06, | |
| "loss": 2.3668, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.16957900169579002, | |
| "grad_norm": 0.748131275177002, | |
| "learning_rate": 8.304947283049474e-06, | |
| "loss": 2.3492, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.17105360171053602, | |
| "grad_norm": 0.3873535096645355, | |
| "learning_rate": 8.290201282902015e-06, | |
| "loss": 2.3592, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.17252820172528202, | |
| "grad_norm": 0.3429403305053711, | |
| "learning_rate": 8.275455282754554e-06, | |
| "loss": 2.3688, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.17400280174002802, | |
| "grad_norm": 0.3973939120769501, | |
| "learning_rate": 8.260709282607093e-06, | |
| "loss": 2.3666, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.17547740175477403, | |
| "grad_norm": 0.35062074661254883, | |
| "learning_rate": 8.245963282459634e-06, | |
| "loss": 2.3534, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.17695200176952003, | |
| "grad_norm": 0.3548290431499481, | |
| "learning_rate": 8.231217282312175e-06, | |
| "loss": 2.3477, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.17842660178426603, | |
| "grad_norm": 0.4035143256187439, | |
| "learning_rate": 8.216471282164714e-06, | |
| "loss": 2.3601, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.179901201799012, | |
| "grad_norm": 0.37481680512428284, | |
| "learning_rate": 8.201725282017253e-06, | |
| "loss": 2.3579, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.181375801813758, | |
| "grad_norm": 0.5035015344619751, | |
| "learning_rate": 8.186979281869794e-06, | |
| "loss": 2.366, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.182850401828504, | |
| "grad_norm": 0.3541349768638611, | |
| "learning_rate": 8.172233281722333e-06, | |
| "loss": 2.368, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.18432500184325, | |
| "grad_norm": 0.3855954110622406, | |
| "learning_rate": 8.157487281574874e-06, | |
| "loss": 2.3542, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.18579960185799602, | |
| "grad_norm": 0.4045879542827606, | |
| "learning_rate": 8.142741281427413e-06, | |
| "loss": 2.3579, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.18727420187274202, | |
| "grad_norm": 0.3711334764957428, | |
| "learning_rate": 8.127995281279954e-06, | |
| "loss": 2.3711, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.18874880188748802, | |
| "grad_norm": 0.3625940978527069, | |
| "learning_rate": 8.113249281132493e-06, | |
| "loss": 2.3573, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.19022340190223402, | |
| "grad_norm": 0.6012184619903564, | |
| "learning_rate": 8.098503280985034e-06, | |
| "loss": 2.3705, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.19169800191698003, | |
| "grad_norm": 0.40935376286506653, | |
| "learning_rate": 8.083757280837573e-06, | |
| "loss": 2.3615, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.19317260193172603, | |
| "grad_norm": 0.34611454606056213, | |
| "learning_rate": 8.069011280690114e-06, | |
| "loss": 2.358, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.19464720194647203, | |
| "grad_norm": 0.36542612314224243, | |
| "learning_rate": 8.054265280542653e-06, | |
| "loss": 2.3656, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.196121801961218, | |
| "grad_norm": 0.38626089692115784, | |
| "learning_rate": 8.039519280395194e-06, | |
| "loss": 2.3643, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.197596401975964, | |
| "grad_norm": 0.39764684438705444, | |
| "learning_rate": 8.024773280247733e-06, | |
| "loss": 2.3556, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.19907100199071, | |
| "grad_norm": 0.3800354301929474, | |
| "learning_rate": 8.010027280100274e-06, | |
| "loss": 2.3696, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.20054560200545601, | |
| "grad_norm": 0.37549829483032227, | |
| "learning_rate": 7.995281279952813e-06, | |
| "loss": 2.3623, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 0.3357870280742645, | |
| "learning_rate": 7.980535279805354e-06, | |
| "loss": 2.3609, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.20349480203494802, | |
| "grad_norm": 0.38587677478790283, | |
| "learning_rate": 7.965789279657895e-06, | |
| "loss": 2.3648, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.20496940204969402, | |
| "grad_norm": 0.3734722137451172, | |
| "learning_rate": 7.951043279510434e-06, | |
| "loss": 2.3637, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.20644400206444002, | |
| "grad_norm": 0.37882205843925476, | |
| "learning_rate": 7.936297279362973e-06, | |
| "loss": 2.3638, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.20791860207918603, | |
| "grad_norm": 0.3540538549423218, | |
| "learning_rate": 7.921551279215514e-06, | |
| "loss": 2.3483, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.20939320209393203, | |
| "grad_norm": 0.3712068796157837, | |
| "learning_rate": 7.906805279068054e-06, | |
| "loss": 2.3553, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.21086780210867803, | |
| "grad_norm": 0.3830094039440155, | |
| "learning_rate": 7.892059278920594e-06, | |
| "loss": 2.3526, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.21234240212342403, | |
| "grad_norm": 0.37301984429359436, | |
| "learning_rate": 7.877313278773133e-06, | |
| "loss": 2.3597, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.21381700213817, | |
| "grad_norm": 0.3589264452457428, | |
| "learning_rate": 7.862567278625674e-06, | |
| "loss": 2.3526, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.215291602152916, | |
| "grad_norm": 0.33692067861557007, | |
| "learning_rate": 7.847821278478214e-06, | |
| "loss": 2.3529, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.21676620216766201, | |
| "grad_norm": 0.3636477589607239, | |
| "learning_rate": 7.833075278330754e-06, | |
| "loss": 2.3583, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.21824080218240802, | |
| "grad_norm": 0.39444780349731445, | |
| "learning_rate": 7.818329278183293e-06, | |
| "loss": 2.3448, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.21971540219715402, | |
| "grad_norm": 0.3866218626499176, | |
| "learning_rate": 7.803583278035834e-06, | |
| "loss": 2.3626, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.22119000221190002, | |
| "grad_norm": 0.37035319209098816, | |
| "learning_rate": 7.788837277888374e-06, | |
| "loss": 2.3672, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.22266460222664602, | |
| "grad_norm": 0.3661469519138336, | |
| "learning_rate": 7.774091277740914e-06, | |
| "loss": 2.3658, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.22413920224139203, | |
| "grad_norm": 0.391837477684021, | |
| "learning_rate": 7.759345277593453e-06, | |
| "loss": 2.3672, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.22561380225613803, | |
| "grad_norm": 0.36069273948669434, | |
| "learning_rate": 7.744599277445994e-06, | |
| "loss": 2.3685, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.22708840227088403, | |
| "grad_norm": 0.3551023602485657, | |
| "learning_rate": 7.729853277298534e-06, | |
| "loss": 2.3697, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.22856300228563003, | |
| "grad_norm": 0.3549511730670929, | |
| "learning_rate": 7.715107277151073e-06, | |
| "loss": 2.3708, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.230037602300376, | |
| "grad_norm": 0.3584568202495575, | |
| "learning_rate": 7.700361277003613e-06, | |
| "loss": 2.3491, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.231512202315122, | |
| "grad_norm": 0.3881671726703644, | |
| "learning_rate": 7.685615276856153e-06, | |
| "loss": 2.3603, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.23298680232986801, | |
| "grad_norm": 0.37565991282463074, | |
| "learning_rate": 7.670869276708693e-06, | |
| "loss": 2.349, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.23446140234461402, | |
| "grad_norm": 0.3800000548362732, | |
| "learning_rate": 7.656123276561233e-06, | |
| "loss": 2.3634, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.23593600235936002, | |
| "grad_norm": 0.3795914053916931, | |
| "learning_rate": 7.641377276413774e-06, | |
| "loss": 2.3541, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.23741060237410602, | |
| "grad_norm": 0.34739038348197937, | |
| "learning_rate": 7.626631276266313e-06, | |
| "loss": 2.364, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.23888520238885202, | |
| "grad_norm": 0.3631921112537384, | |
| "learning_rate": 7.611885276118853e-06, | |
| "loss": 2.3472, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.24035980240359803, | |
| "grad_norm": 0.34708309173583984, | |
| "learning_rate": 7.597139275971393e-06, | |
| "loss": 2.3745, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.24183440241834403, | |
| "grad_norm": 0.39002010226249695, | |
| "learning_rate": 7.582393275823933e-06, | |
| "loss": 2.3496, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.24330900243309003, | |
| "grad_norm": 0.3856920003890991, | |
| "learning_rate": 7.5676472756764725e-06, | |
| "loss": 2.3556, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.24478360244783604, | |
| "grad_norm": 0.3604213297367096, | |
| "learning_rate": 7.552901275529013e-06, | |
| "loss": 2.3588, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.24625820246258204, | |
| "grad_norm": 0.4610792100429535, | |
| "learning_rate": 7.538155275381553e-06, | |
| "loss": 2.3495, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.247732802477328, | |
| "grad_norm": 0.37130406498908997, | |
| "learning_rate": 7.523409275234093e-06, | |
| "loss": 2.3654, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.24920740249207401, | |
| "grad_norm": 0.4634559154510498, | |
| "learning_rate": 7.5086632750866325e-06, | |
| "loss": 2.3472, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.25068200250682005, | |
| "grad_norm": 0.3728873133659363, | |
| "learning_rate": 7.493917274939173e-06, | |
| "loss": 2.3601, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.252156602521566, | |
| "grad_norm": 0.35525408387184143, | |
| "learning_rate": 7.479171274791713e-06, | |
| "loss": 2.3483, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.25363120253631205, | |
| "grad_norm": 0.35072821378707886, | |
| "learning_rate": 7.464425274644253e-06, | |
| "loss": 2.3575, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.255105802551058, | |
| "grad_norm": 0.33984318375587463, | |
| "learning_rate": 7.449679274496794e-06, | |
| "loss": 2.3562, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.256580402565804, | |
| "grad_norm": 0.3893386423587799, | |
| "learning_rate": 7.434933274349333e-06, | |
| "loss": 2.3417, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.25805500258055003, | |
| "grad_norm": 0.3702589273452759, | |
| "learning_rate": 7.420187274201873e-06, | |
| "loss": 2.3575, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.259529602595296, | |
| "grad_norm": 0.37328028678894043, | |
| "learning_rate": 7.405441274054413e-06, | |
| "loss": 2.3691, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.26100420261004204, | |
| "grad_norm": 0.362596720457077, | |
| "learning_rate": 7.390695273906954e-06, | |
| "loss": 2.3516, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.262478802624788, | |
| "grad_norm": 0.35985130071640015, | |
| "learning_rate": 7.375949273759493e-06, | |
| "loss": 2.3579, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.26395340263953404, | |
| "grad_norm": 0.39259010553359985, | |
| "learning_rate": 7.361203273612033e-06, | |
| "loss": 2.3674, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.26542800265428, | |
| "grad_norm": 0.3379274904727936, | |
| "learning_rate": 7.346457273464573e-06, | |
| "loss": 2.3409, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.26690260266902605, | |
| "grad_norm": 0.38404515385627747, | |
| "learning_rate": 7.331711273317113e-06, | |
| "loss": 2.3493, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.268377202683772, | |
| "grad_norm": 0.3580029606819153, | |
| "learning_rate": 7.316965273169654e-06, | |
| "loss": 2.3426, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.26985180269851805, | |
| "grad_norm": 0.36124977469444275, | |
| "learning_rate": 7.302219273022193e-06, | |
| "loss": 2.3563, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.271326402713264, | |
| "grad_norm": 0.37032002210617065, | |
| "learning_rate": 7.287473272874733e-06, | |
| "loss": 2.3558, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.27280100272801, | |
| "grad_norm": 0.37544354796409607, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 2.353, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.27427560274275603, | |
| "grad_norm": 0.38063186407089233, | |
| "learning_rate": 7.257981272579814e-06, | |
| "loss": 2.355, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.275750202757502, | |
| "grad_norm": 0.34436681866645813, | |
| "learning_rate": 7.243235272432353e-06, | |
| "loss": 2.3671, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.27722480277224804, | |
| "grad_norm": 0.3745051622390747, | |
| "learning_rate": 7.228489272284893e-06, | |
| "loss": 2.343, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.278699402786994, | |
| "grad_norm": 0.36901938915252686, | |
| "learning_rate": 7.213743272137433e-06, | |
| "loss": 2.3367, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.28017400280174004, | |
| "grad_norm": 0.38288217782974243, | |
| "learning_rate": 7.198997271989974e-06, | |
| "loss": 2.3598, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.281648602816486, | |
| "grad_norm": 0.37386777997016907, | |
| "learning_rate": 7.184251271842513e-06, | |
| "loss": 2.3618, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.28312320283123205, | |
| "grad_norm": 0.35673725605010986, | |
| "learning_rate": 7.169505271695053e-06, | |
| "loss": 2.3514, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.284597802845978, | |
| "grad_norm": 0.38071954250335693, | |
| "learning_rate": 7.154759271547593e-06, | |
| "loss": 2.3564, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.28607240286072405, | |
| "grad_norm": 0.3805045187473297, | |
| "learning_rate": 7.140013271400134e-06, | |
| "loss": 2.3599, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.28754700287547, | |
| "grad_norm": 0.3432954251766205, | |
| "learning_rate": 7.125267271252674e-06, | |
| "loss": 2.3568, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.28902160289021606, | |
| "grad_norm": 0.35117459297180176, | |
| "learning_rate": 7.110521271105213e-06, | |
| "loss": 2.3566, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.29049620290496203, | |
| "grad_norm": 0.39861348271369934, | |
| "learning_rate": 7.095775270957753e-06, | |
| "loss": 2.3553, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 0.3653263747692108, | |
| "learning_rate": 7.081029270810294e-06, | |
| "loss": 2.3781, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.29344540293445404, | |
| "grad_norm": 0.3614581823348999, | |
| "learning_rate": 7.066283270662834e-06, | |
| "loss": 2.3407, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.2949200029492, | |
| "grad_norm": 0.39455854892730713, | |
| "learning_rate": 7.051537270515373e-06, | |
| "loss": 2.3541, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.29639460296394604, | |
| "grad_norm": 0.33767521381378174, | |
| "learning_rate": 7.036791270367913e-06, | |
| "loss": 2.3332, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.297869202978692, | |
| "grad_norm": 0.36079707741737366, | |
| "learning_rate": 7.022045270220453e-06, | |
| "loss": 2.3546, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.29934380299343805, | |
| "grad_norm": 0.395107626914978, | |
| "learning_rate": 7.007299270072994e-06, | |
| "loss": 2.3483, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.300818403008184, | |
| "grad_norm": 0.3456408381462097, | |
| "learning_rate": 6.992553269925533e-06, | |
| "loss": 2.3657, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.30229300302293005, | |
| "grad_norm": 0.4138198792934418, | |
| "learning_rate": 6.977807269778073e-06, | |
| "loss": 2.3616, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.303767603037676, | |
| "grad_norm": 0.382722944021225, | |
| "learning_rate": 6.963061269630613e-06, | |
| "loss": 2.3607, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.30524220305242206, | |
| "grad_norm": 0.4129588007926941, | |
| "learning_rate": 6.948315269483154e-06, | |
| "loss": 2.3582, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.30671680306716803, | |
| "grad_norm": 0.35810399055480957, | |
| "learning_rate": 6.933569269335694e-06, | |
| "loss": 2.345, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.308191403081914, | |
| "grad_norm": 0.3707892596721649, | |
| "learning_rate": 6.918823269188233e-06, | |
| "loss": 2.3389, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.30966600309666004, | |
| "grad_norm": 0.43102672696113586, | |
| "learning_rate": 6.904077269040773e-06, | |
| "loss": 2.3636, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.311140603111406, | |
| "grad_norm": 0.37081730365753174, | |
| "learning_rate": 6.889331268893314e-06, | |
| "loss": 2.3589, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.31261520312615204, | |
| "grad_norm": 0.3554447591304779, | |
| "learning_rate": 6.874585268745854e-06, | |
| "loss": 2.3639, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.314089803140898, | |
| "grad_norm": 0.3365320563316345, | |
| "learning_rate": 6.859839268598393e-06, | |
| "loss": 2.3604, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.31556440315564405, | |
| "grad_norm": 0.40808168053627014, | |
| "learning_rate": 6.845093268450933e-06, | |
| "loss": 2.3496, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.31703900317039, | |
| "grad_norm": 0.380753755569458, | |
| "learning_rate": 6.830347268303474e-06, | |
| "loss": 2.3478, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.31851360318513605, | |
| "grad_norm": 0.3863361179828644, | |
| "learning_rate": 6.815601268156014e-06, | |
| "loss": 2.3381, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.319988203199882, | |
| "grad_norm": 0.40554317831993103, | |
| "learning_rate": 6.800855268008554e-06, | |
| "loss": 2.3518, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.32146280321462806, | |
| "grad_norm": 0.39253705739974976, | |
| "learning_rate": 6.786109267861093e-06, | |
| "loss": 2.3567, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.32293740322937403, | |
| "grad_norm": 0.37108898162841797, | |
| "learning_rate": 6.771363267713633e-06, | |
| "loss": 2.3509, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.32441200324412, | |
| "grad_norm": 0.36466971039772034, | |
| "learning_rate": 6.756617267566174e-06, | |
| "loss": 2.3627, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.32588660325886604, | |
| "grad_norm": 0.37712493538856506, | |
| "learning_rate": 6.741871267418714e-06, | |
| "loss": 2.3754, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.327361203273612, | |
| "grad_norm": 0.3712570071220398, | |
| "learning_rate": 6.727125267271253e-06, | |
| "loss": 2.3619, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.32883580328835804, | |
| "grad_norm": 0.34880587458610535, | |
| "learning_rate": 6.712379267123793e-06, | |
| "loss": 2.3574, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.330310403303104, | |
| "grad_norm": 0.4109377861022949, | |
| "learning_rate": 6.697633266976334e-06, | |
| "loss": 2.3574, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.33178500331785005, | |
| "grad_norm": 0.3626454770565033, | |
| "learning_rate": 6.682887266828874e-06, | |
| "loss": 2.3527, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.333259603332596, | |
| "grad_norm": 0.3580588400363922, | |
| "learning_rate": 6.668141266681413e-06, | |
| "loss": 2.348, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.33473420334734205, | |
| "grad_norm": 0.3460945785045624, | |
| "learning_rate": 6.653395266533953e-06, | |
| "loss": 2.3461, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.336208803362088, | |
| "grad_norm": 0.39259958267211914, | |
| "learning_rate": 6.6386492663864936e-06, | |
| "loss": 2.3818, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.33768340337683406, | |
| "grad_norm": 0.44788721203804016, | |
| "learning_rate": 6.6239032662390335e-06, | |
| "loss": 2.3636, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.33915800339158003, | |
| "grad_norm": 0.40668484568595886, | |
| "learning_rate": 6.6091572660915735e-06, | |
| "loss": 2.3542, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.340632603406326, | |
| "grad_norm": 0.3672851026058197, | |
| "learning_rate": 6.594411265944113e-06, | |
| "loss": 2.3497, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.34210720342107204, | |
| "grad_norm": 0.3685692846775055, | |
| "learning_rate": 6.5796652657966535e-06, | |
| "loss": 2.361, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.343581803435818, | |
| "grad_norm": 0.4522005617618561, | |
| "learning_rate": 6.5649192656491935e-06, | |
| "loss": 2.3498, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.34505640345056404, | |
| "grad_norm": 0.4089388847351074, | |
| "learning_rate": 6.5501732655017335e-06, | |
| "loss": 2.3479, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.34653100346531, | |
| "grad_norm": 0.3874075710773468, | |
| "learning_rate": 6.535427265354273e-06, | |
| "loss": 2.3437, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.34800560348005605, | |
| "grad_norm": 0.3733789622783661, | |
| "learning_rate": 6.520681265206813e-06, | |
| "loss": 2.3524, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.349480203494802, | |
| "grad_norm": 0.3892102539539337, | |
| "learning_rate": 6.5059352650593535e-06, | |
| "loss": 2.346, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.35095480350954805, | |
| "grad_norm": 0.36850956082344055, | |
| "learning_rate": 6.4911892649118935e-06, | |
| "loss": 2.3532, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.352429403524294, | |
| "grad_norm": 0.3519984185695648, | |
| "learning_rate": 6.476443264764433e-06, | |
| "loss": 2.3429, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.35390400353904006, | |
| "grad_norm": 0.36120033264160156, | |
| "learning_rate": 6.461697264616973e-06, | |
| "loss": 2.3397, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.35537860355378603, | |
| "grad_norm": 0.40259799361228943, | |
| "learning_rate": 6.446951264469513e-06, | |
| "loss": 2.3443, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.35685320356853206, | |
| "grad_norm": 0.370540976524353, | |
| "learning_rate": 6.432205264322053e-06, | |
| "loss": 2.3494, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.35832780358327804, | |
| "grad_norm": 0.7460556626319885, | |
| "learning_rate": 6.417459264174593e-06, | |
| "loss": 2.3452, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.359802403598024, | |
| "grad_norm": 0.3665451109409332, | |
| "learning_rate": 6.4027132640271325e-06, | |
| "loss": 2.3693, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.36127700361277004, | |
| "grad_norm": 0.38104239106178284, | |
| "learning_rate": 6.387967263879673e-06, | |
| "loss": 2.3568, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.362751603627516, | |
| "grad_norm": 0.36739110946655273, | |
| "learning_rate": 6.373221263732213e-06, | |
| "loss": 2.361, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.36422620364226205, | |
| "grad_norm": 0.405453085899353, | |
| "learning_rate": 6.358475263584753e-06, | |
| "loss": 2.3448, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.365700803657008, | |
| "grad_norm": 0.349317342042923, | |
| "learning_rate": 6.3437292634372925e-06, | |
| "loss": 2.3569, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.36717540367175405, | |
| "grad_norm": 0.3882797360420227, | |
| "learning_rate": 6.328983263289833e-06, | |
| "loss": 2.3457, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.3686500036865, | |
| "grad_norm": 0.42195364832878113, | |
| "learning_rate": 6.314237263142373e-06, | |
| "loss": 2.364, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.37012460370124606, | |
| "grad_norm": 0.40603727102279663, | |
| "learning_rate": 6.299491262994913e-06, | |
| "loss": 2.3365, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.37159920371599203, | |
| "grad_norm": 0.3686840534210205, | |
| "learning_rate": 6.284745262847453e-06, | |
| "loss": 2.3624, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.37307380373073806, | |
| "grad_norm": 0.37392449378967285, | |
| "learning_rate": 6.2699992626999924e-06, | |
| "loss": 2.3597, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.37454840374548404, | |
| "grad_norm": 0.3410843014717102, | |
| "learning_rate": 6.255253262552533e-06, | |
| "loss": 2.3552, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.37602300376023, | |
| "grad_norm": 0.3532737195491791, | |
| "learning_rate": 6.240507262405073e-06, | |
| "loss": 2.3532, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.37749760377497604, | |
| "grad_norm": 0.3908025026321411, | |
| "learning_rate": 6.225761262257613e-06, | |
| "loss": 2.3624, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.378972203789722, | |
| "grad_norm": 0.3747566044330597, | |
| "learning_rate": 6.211015262110152e-06, | |
| "loss": 2.3542, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.38044680380446805, | |
| "grad_norm": 0.37590813636779785, | |
| "learning_rate": 6.196269261962693e-06, | |
| "loss": 2.3523, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.381921403819214, | |
| "grad_norm": 0.3960760533809662, | |
| "learning_rate": 6.181523261815233e-06, | |
| "loss": 2.367, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.38339600383396005, | |
| "grad_norm": 0.34570005536079407, | |
| "learning_rate": 6.166777261667773e-06, | |
| "loss": 2.354, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.384870603848706, | |
| "grad_norm": 0.3910704553127289, | |
| "learning_rate": 6.152031261520312e-06, | |
| "loss": 2.3657, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.38634520386345206, | |
| "grad_norm": 0.35407891869544983, | |
| "learning_rate": 6.137285261372853e-06, | |
| "loss": 2.3494, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.38781980387819803, | |
| "grad_norm": 0.37890905141830444, | |
| "learning_rate": 6.122539261225393e-06, | |
| "loss": 2.3476, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.38929440389294406, | |
| "grad_norm": 0.388045072555542, | |
| "learning_rate": 6.107793261077933e-06, | |
| "loss": 2.3593, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.39076900390769004, | |
| "grad_norm": 0.3695070743560791, | |
| "learning_rate": 6.093047260930474e-06, | |
| "loss": 2.3645, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.392243603922436, | |
| "grad_norm": 0.3945058584213257, | |
| "learning_rate": 6.078301260783013e-06, | |
| "loss": 2.3439, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.39371820393718204, | |
| "grad_norm": 0.3858231008052826, | |
| "learning_rate": 6.063555260635553e-06, | |
| "loss": 2.3648, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.395192803951928, | |
| "grad_norm": 0.3549276292324066, | |
| "learning_rate": 6.048809260488093e-06, | |
| "loss": 2.3514, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.39666740396667405, | |
| "grad_norm": 0.3770926296710968, | |
| "learning_rate": 6.034063260340633e-06, | |
| "loss": 2.3598, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.39814200398142, | |
| "grad_norm": 0.37677156925201416, | |
| "learning_rate": 6.019317260193172e-06, | |
| "loss": 2.3539, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.39961660399616605, | |
| "grad_norm": 0.3545861542224884, | |
| "learning_rate": 6.004571260045713e-06, | |
| "loss": 2.3535, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.40109120401091203, | |
| "grad_norm": 0.3972468078136444, | |
| "learning_rate": 5.989825259898253e-06, | |
| "loss": 2.3533, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.40256580402565806, | |
| "grad_norm": 0.368756502866745, | |
| "learning_rate": 5.975079259750793e-06, | |
| "loss": 2.3509, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 0.40190038084983826, | |
| "learning_rate": 5.960333259603332e-06, | |
| "loss": 2.3375, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.40551500405515006, | |
| "grad_norm": 0.34512144327163696, | |
| "learning_rate": 5.945587259455873e-06, | |
| "loss": 2.3557, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.40698960406989604, | |
| "grad_norm": 0.3768044114112854, | |
| "learning_rate": 5.930841259308413e-06, | |
| "loss": 2.3466, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.40846420408464207, | |
| "grad_norm": 0.3744104504585266, | |
| "learning_rate": 5.916095259160953e-06, | |
| "loss": 2.3565, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.40993880409938804, | |
| "grad_norm": 0.35657835006713867, | |
| "learning_rate": 5.901349259013494e-06, | |
| "loss": 2.3474, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.411413404114134, | |
| "grad_norm": 0.40596744418144226, | |
| "learning_rate": 5.886603258866033e-06, | |
| "loss": 2.344, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.41288800412888005, | |
| "grad_norm": 0.3615570366382599, | |
| "learning_rate": 5.871857258718573e-06, | |
| "loss": 2.3375, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.414362604143626, | |
| "grad_norm": 0.3853433132171631, | |
| "learning_rate": 5.857111258571113e-06, | |
| "loss": 2.3536, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.41583720415837205, | |
| "grad_norm": 0.4154307544231415, | |
| "learning_rate": 5.842365258423654e-06, | |
| "loss": 2.3399, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.41731180417311803, | |
| "grad_norm": 0.4312744438648224, | |
| "learning_rate": 5.827619258276193e-06, | |
| "loss": 2.3442, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.41878640418786406, | |
| "grad_norm": 0.402468740940094, | |
| "learning_rate": 5.812873258128733e-06, | |
| "loss": 2.3522, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.42026100420261003, | |
| "grad_norm": 0.4803409278392792, | |
| "learning_rate": 5.798127257981273e-06, | |
| "loss": 2.3547, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.42173560421735606, | |
| "grad_norm": 0.3646136522293091, | |
| "learning_rate": 5.783381257833813e-06, | |
| "loss": 2.3431, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.42321020423210204, | |
| "grad_norm": 0.380462646484375, | |
| "learning_rate": 5.768635257686354e-06, | |
| "loss": 2.3564, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.42468480424684807, | |
| "grad_norm": 0.3754338324069977, | |
| "learning_rate": 5.753889257538893e-06, | |
| "loss": 2.3464, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.42615940426159404, | |
| "grad_norm": 0.3629322350025177, | |
| "learning_rate": 5.739143257391433e-06, | |
| "loss": 2.3563, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.42763400427634, | |
| "grad_norm": 0.4097397029399872, | |
| "learning_rate": 5.724397257243973e-06, | |
| "loss": 2.3488, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.42910860429108605, | |
| "grad_norm": 0.3598233163356781, | |
| "learning_rate": 5.709651257096514e-06, | |
| "loss": 2.3641, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.430583204305832, | |
| "grad_norm": 0.39495351910591125, | |
| "learning_rate": 5.694905256949053e-06, | |
| "loss": 2.3584, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.43205780432057805, | |
| "grad_norm": 0.3598216772079468, | |
| "learning_rate": 5.680159256801593e-06, | |
| "loss": 2.3519, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.43353240433532403, | |
| "grad_norm": 0.4005551338195801, | |
| "learning_rate": 5.665413256654133e-06, | |
| "loss": 2.3455, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.43500700435007006, | |
| "grad_norm": 0.39282020926475525, | |
| "learning_rate": 5.650667256506674e-06, | |
| "loss": 2.3562, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.43648160436481603, | |
| "grad_norm": 0.3693186044692993, | |
| "learning_rate": 5.635921256359213e-06, | |
| "loss": 2.3404, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 0.3796662986278534, | |
| "learning_rate": 5.621175256211753e-06, | |
| "loss": 2.3545, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.43943080439430804, | |
| "grad_norm": 0.3727371394634247, | |
| "learning_rate": 5.606429256064293e-06, | |
| "loss": 2.3613, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.44090540440905407, | |
| "grad_norm": 0.38252395391464233, | |
| "learning_rate": 5.591683255916834e-06, | |
| "loss": 2.3477, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.44238000442380004, | |
| "grad_norm": 0.36342570185661316, | |
| "learning_rate": 5.576937255769374e-06, | |
| "loss": 2.361, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.443854604438546, | |
| "grad_norm": 0.37686145305633545, | |
| "learning_rate": 5.562191255621913e-06, | |
| "loss": 2.358, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.44532920445329205, | |
| "grad_norm": 0.373463898897171, | |
| "learning_rate": 5.547445255474453e-06, | |
| "loss": 2.3528, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.446803804468038, | |
| "grad_norm": 0.3747381567955017, | |
| "learning_rate": 5.532699255326993e-06, | |
| "loss": 2.3428, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.44827840448278405, | |
| "grad_norm": 0.3892759382724762, | |
| "learning_rate": 5.517953255179534e-06, | |
| "loss": 2.3551, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.44975300449753003, | |
| "grad_norm": 0.37567898631095886, | |
| "learning_rate": 5.503207255032073e-06, | |
| "loss": 2.3487, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.45122760451227606, | |
| "grad_norm": 0.3452893793582916, | |
| "learning_rate": 5.488461254884613e-06, | |
| "loss": 2.3427, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.45270220452702203, | |
| "grad_norm": 0.387379914522171, | |
| "learning_rate": 5.473715254737153e-06, | |
| "loss": 2.3445, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.45417680454176806, | |
| "grad_norm": 0.36834776401519775, | |
| "learning_rate": 5.4589692545896936e-06, | |
| "loss": 2.3483, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.45565140455651404, | |
| "grad_norm": 0.3641732633113861, | |
| "learning_rate": 5.4442232544422335e-06, | |
| "loss": 2.3514, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.45712600457126007, | |
| "grad_norm": 0.41028374433517456, | |
| "learning_rate": 5.429477254294773e-06, | |
| "loss": 2.3425, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.45860060458600604, | |
| "grad_norm": 0.3992222845554352, | |
| "learning_rate": 5.414731254147313e-06, | |
| "loss": 2.3565, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.460075204600752, | |
| "grad_norm": 0.377287358045578, | |
| "learning_rate": 5.3999852539998535e-06, | |
| "loss": 2.3562, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.46154980461549805, | |
| "grad_norm": 0.4046432077884674, | |
| "learning_rate": 5.3852392538523935e-06, | |
| "loss": 2.3483, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.463024404630244, | |
| "grad_norm": 0.35515862703323364, | |
| "learning_rate": 5.370493253704933e-06, | |
| "loss": 2.353, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.46449900464499005, | |
| "grad_norm": 0.36241665482521057, | |
| "learning_rate": 5.355747253557473e-06, | |
| "loss": 2.3389, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.46597360465973603, | |
| "grad_norm": 0.3970908522605896, | |
| "learning_rate": 5.3410012534100135e-06, | |
| "loss": 2.3639, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.46744820467448206, | |
| "grad_norm": 0.36760082840919495, | |
| "learning_rate": 5.3262552532625535e-06, | |
| "loss": 2.3546, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.46892280468922803, | |
| "grad_norm": 0.34653639793395996, | |
| "learning_rate": 5.311509253115093e-06, | |
| "loss": 2.3448, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.47039740470397406, | |
| "grad_norm": 0.35380202531814575, | |
| "learning_rate": 5.296763252967633e-06, | |
| "loss": 2.3461, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.47187200471872004, | |
| "grad_norm": 0.3386562764644623, | |
| "learning_rate": 5.282017252820173e-06, | |
| "loss": 2.3489, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.47334660473346607, | |
| "grad_norm": 0.3794664144515991, | |
| "learning_rate": 5.267271252672713e-06, | |
| "loss": 2.3565, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.47482120474821204, | |
| "grad_norm": 0.3754018545150757, | |
| "learning_rate": 5.252525252525253e-06, | |
| "loss": 2.3443, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.4762958047629581, | |
| "grad_norm": 0.41498491168022156, | |
| "learning_rate": 5.2377792523777926e-06, | |
| "loss": 2.3501, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.47777040477770405, | |
| "grad_norm": 0.4399779140949249, | |
| "learning_rate": 5.2230332522303325e-06, | |
| "loss": 2.3434, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.47924500479245, | |
| "grad_norm": 0.3858584463596344, | |
| "learning_rate": 5.208287252082873e-06, | |
| "loss": 2.3612, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.48071960480719605, | |
| "grad_norm": 0.379436731338501, | |
| "learning_rate": 5.193541251935413e-06, | |
| "loss": 2.3534, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.48219420482194203, | |
| "grad_norm": 0.36703166365623474, | |
| "learning_rate": 5.1787952517879525e-06, | |
| "loss": 2.3385, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.48366880483668806, | |
| "grad_norm": 0.365614652633667, | |
| "learning_rate": 5.1640492516404925e-06, | |
| "loss": 2.3417, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.48514340485143403, | |
| "grad_norm": 0.3665941655635834, | |
| "learning_rate": 5.149303251493033e-06, | |
| "loss": 2.3478, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.48661800486618007, | |
| "grad_norm": 0.37497478723526, | |
| "learning_rate": 5.134557251345573e-06, | |
| "loss": 2.3713, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.48809260488092604, | |
| "grad_norm": 0.37431296706199646, | |
| "learning_rate": 5.1198112511981125e-06, | |
| "loss": 2.3544, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.48956720489567207, | |
| "grad_norm": 0.3486333191394806, | |
| "learning_rate": 5.1050652510506525e-06, | |
| "loss": 2.3578, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.49104180491041804, | |
| "grad_norm": 0.36677151918411255, | |
| "learning_rate": 5.090319250903193e-06, | |
| "loss": 2.3437, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.4925164049251641, | |
| "grad_norm": 0.36574506759643555, | |
| "learning_rate": 5.075573250755733e-06, | |
| "loss": 2.3574, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.49399100493991005, | |
| "grad_norm": 0.47523924708366394, | |
| "learning_rate": 5.060827250608273e-06, | |
| "loss": 2.3535, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.495465604954656, | |
| "grad_norm": 0.39526769518852234, | |
| "learning_rate": 5.046081250460812e-06, | |
| "loss": 2.3536, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.49694020496940206, | |
| "grad_norm": 0.38120681047439575, | |
| "learning_rate": 5.031335250313352e-06, | |
| "loss": 2.342, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.49841480498414803, | |
| "grad_norm": 0.3704290986061096, | |
| "learning_rate": 5.016589250165893e-06, | |
| "loss": 2.3455, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.49988940499889406, | |
| "grad_norm": 0.3897051215171814, | |
| "learning_rate": 5.001843250018433e-06, | |
| "loss": 2.3538, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.5013640050136401, | |
| "grad_norm": 0.3682934045791626, | |
| "learning_rate": 4.987097249870973e-06, | |
| "loss": 2.3647, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5028386050283861, | |
| "grad_norm": 0.370624840259552, | |
| "learning_rate": 4.972351249723512e-06, | |
| "loss": 2.3561, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.504313205043132, | |
| "grad_norm": 0.3740212321281433, | |
| "learning_rate": 4.957605249576053e-06, | |
| "loss": 2.3297, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.505787805057878, | |
| "grad_norm": 0.3610004484653473, | |
| "learning_rate": 4.942859249428592e-06, | |
| "loss": 2.3645, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.5072624050726241, | |
| "grad_norm": 0.47170379757881165, | |
| "learning_rate": 4.928113249281133e-06, | |
| "loss": 2.3497, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.5087370050873701, | |
| "grad_norm": 0.36539992690086365, | |
| "learning_rate": 4.913367249133673e-06, | |
| "loss": 2.357, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.510211605102116, | |
| "grad_norm": 0.4011884927749634, | |
| "learning_rate": 4.898621248986213e-06, | |
| "loss": 2.3517, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.511686205116862, | |
| "grad_norm": 0.37110114097595215, | |
| "learning_rate": 4.883875248838753e-06, | |
| "loss": 2.359, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.513160805131608, | |
| "grad_norm": 0.36303210258483887, | |
| "learning_rate": 4.869129248691293e-06, | |
| "loss": 2.3638, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.5146354051463541, | |
| "grad_norm": 0.39052852988243103, | |
| "learning_rate": 4.854383248543833e-06, | |
| "loss": 2.3573, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.5161100051611001, | |
| "grad_norm": 0.3620651066303253, | |
| "learning_rate": 4.839637248396373e-06, | |
| "loss": 2.3413, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.517584605175846, | |
| "grad_norm": 0.35670602321624756, | |
| "learning_rate": 4.824891248248913e-06, | |
| "loss": 2.358, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.519059205190592, | |
| "grad_norm": 0.3757779002189636, | |
| "learning_rate": 4.810145248101453e-06, | |
| "loss": 2.3408, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.5205338052053381, | |
| "grad_norm": 0.3540636897087097, | |
| "learning_rate": 4.795399247953993e-06, | |
| "loss": 2.3386, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.5220084052200841, | |
| "grad_norm": 0.3777630031108856, | |
| "learning_rate": 4.780653247806532e-06, | |
| "loss": 2.3498, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.52348300523483, | |
| "grad_norm": 0.38715338706970215, | |
| "learning_rate": 4.765907247659073e-06, | |
| "loss": 2.3357, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.524957605249576, | |
| "grad_norm": 0.37200599908828735, | |
| "learning_rate": 4.751161247511613e-06, | |
| "loss": 2.3409, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.5264322052643221, | |
| "grad_norm": 0.34266674518585205, | |
| "learning_rate": 4.736415247364153e-06, | |
| "loss": 2.3489, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.5279068052790681, | |
| "grad_norm": 0.39111700654029846, | |
| "learning_rate": 4.721669247216693e-06, | |
| "loss": 2.3399, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.5293814052938141, | |
| "grad_norm": 0.34332647919654846, | |
| "learning_rate": 4.706923247069233e-06, | |
| "loss": 2.3407, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.53085600530856, | |
| "grad_norm": 0.3694305717945099, | |
| "learning_rate": 4.692177246921773e-06, | |
| "loss": 2.3507, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.532330605323306, | |
| "grad_norm": 0.3671371042728424, | |
| "learning_rate": 4.677431246774313e-06, | |
| "loss": 2.3511, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.5338052053380521, | |
| "grad_norm": 0.35831600427627563, | |
| "learning_rate": 4.662685246626853e-06, | |
| "loss": 2.3409, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.5352798053527981, | |
| "grad_norm": 0.388938307762146, | |
| "learning_rate": 4.647939246479393e-06, | |
| "loss": 2.3491, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.536754405367544, | |
| "grad_norm": 0.3723289668560028, | |
| "learning_rate": 4.633193246331933e-06, | |
| "loss": 2.3538, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.53822900538229, | |
| "grad_norm": 0.3653968870639801, | |
| "learning_rate": 4.618447246184473e-06, | |
| "loss": 2.3477, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.5397036053970361, | |
| "grad_norm": 0.3933035731315613, | |
| "learning_rate": 4.603701246037013e-06, | |
| "loss": 2.3452, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.5411782054117821, | |
| "grad_norm": 0.36271172761917114, | |
| "learning_rate": 4.588955245889553e-06, | |
| "loss": 2.3538, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.542652805426528, | |
| "grad_norm": 0.3978424072265625, | |
| "learning_rate": 4.574209245742093e-06, | |
| "loss": 2.3462, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.544127405441274, | |
| "grad_norm": 0.35292622447013855, | |
| "learning_rate": 4.559463245594633e-06, | |
| "loss": 2.3517, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.54560200545602, | |
| "grad_norm": 0.3812173008918762, | |
| "learning_rate": 4.544717245447173e-06, | |
| "loss": 2.3494, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.5470766054707661, | |
| "grad_norm": 0.3730863034725189, | |
| "learning_rate": 4.529971245299713e-06, | |
| "loss": 2.3468, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.5485512054855121, | |
| "grad_norm": 0.3639664053916931, | |
| "learning_rate": 4.515225245152253e-06, | |
| "loss": 2.3509, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.550025805500258, | |
| "grad_norm": 0.3697253167629242, | |
| "learning_rate": 4.500479245004793e-06, | |
| "loss": 2.3563, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.551500405515004, | |
| "grad_norm": 0.44058653712272644, | |
| "learning_rate": 4.485733244857333e-06, | |
| "loss": 2.3463, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.5529750055297501, | |
| "grad_norm": 0.3621077835559845, | |
| "learning_rate": 4.470987244709873e-06, | |
| "loss": 2.3484, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.5544496055444961, | |
| "grad_norm": 0.3550095558166504, | |
| "learning_rate": 4.456241244562413e-06, | |
| "loss": 2.3503, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.555924205559242, | |
| "grad_norm": 0.40449610352516174, | |
| "learning_rate": 4.441495244414953e-06, | |
| "loss": 2.3392, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.557398805573988, | |
| "grad_norm": 0.41954267024993896, | |
| "learning_rate": 4.426749244267493e-06, | |
| "loss": 2.3544, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.5588734055887341, | |
| "grad_norm": 0.3714821934700012, | |
| "learning_rate": 4.412003244120033e-06, | |
| "loss": 2.3517, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.5603480056034801, | |
| "grad_norm": 0.35409021377563477, | |
| "learning_rate": 4.397257243972573e-06, | |
| "loss": 2.3368, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.5618226056182261, | |
| "grad_norm": 0.38718315958976746, | |
| "learning_rate": 4.382511243825113e-06, | |
| "loss": 2.349, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.563297205632972, | |
| "grad_norm": 0.3989495038986206, | |
| "learning_rate": 4.367765243677653e-06, | |
| "loss": 2.3622, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.564771805647718, | |
| "grad_norm": 0.38206225633621216, | |
| "learning_rate": 4.353019243530193e-06, | |
| "loss": 2.348, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.5662464056624641, | |
| "grad_norm": 0.4094175100326538, | |
| "learning_rate": 4.338273243382733e-06, | |
| "loss": 2.337, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.5677210056772101, | |
| "grad_norm": 0.3633534908294678, | |
| "learning_rate": 4.323527243235273e-06, | |
| "loss": 2.3413, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.569195605691956, | |
| "grad_norm": 0.3575294613838196, | |
| "learning_rate": 4.308781243087813e-06, | |
| "loss": 2.3388, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.570670205706702, | |
| "grad_norm": 0.3401530683040619, | |
| "learning_rate": 4.294035242940353e-06, | |
| "loss": 2.337, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.5721448057214481, | |
| "grad_norm": 0.37117597460746765, | |
| "learning_rate": 4.279289242792893e-06, | |
| "loss": 2.3489, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.5736194057361941, | |
| "grad_norm": 0.4254913330078125, | |
| "learning_rate": 4.264543242645433e-06, | |
| "loss": 2.3592, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.57509400575094, | |
| "grad_norm": 0.3747338354587555, | |
| "learning_rate": 4.249797242497973e-06, | |
| "loss": 2.3525, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.576568605765686, | |
| "grad_norm": 0.4081355035305023, | |
| "learning_rate": 4.235051242350513e-06, | |
| "loss": 2.3482, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.5780432057804321, | |
| "grad_norm": 0.3858603835105896, | |
| "learning_rate": 4.220305242203053e-06, | |
| "loss": 2.3443, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.5795178057951781, | |
| "grad_norm": 0.3556371033191681, | |
| "learning_rate": 4.205559242055593e-06, | |
| "loss": 2.3328, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.5809924058099241, | |
| "grad_norm": 0.3800757825374603, | |
| "learning_rate": 4.190813241908133e-06, | |
| "loss": 2.3494, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.58246700582467, | |
| "grad_norm": 0.3651157021522522, | |
| "learning_rate": 4.176067241760673e-06, | |
| "loss": 2.3476, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 0.35027289390563965, | |
| "learning_rate": 4.161321241613213e-06, | |
| "loss": 2.362, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.5854162058541621, | |
| "grad_norm": 0.37928810715675354, | |
| "learning_rate": 4.146575241465753e-06, | |
| "loss": 2.3471, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.5868908058689081, | |
| "grad_norm": 0.3614574372768402, | |
| "learning_rate": 4.131829241318293e-06, | |
| "loss": 2.3469, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.588365405883654, | |
| "grad_norm": 0.3692425489425659, | |
| "learning_rate": 4.117083241170833e-06, | |
| "loss": 2.3517, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.5898400058984, | |
| "grad_norm": 0.3615201413631439, | |
| "learning_rate": 4.102337241023373e-06, | |
| "loss": 2.3385, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.5913146059131461, | |
| "grad_norm": 0.37288710474967957, | |
| "learning_rate": 4.0875912408759126e-06, | |
| "loss": 2.3592, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.5927892059278921, | |
| "grad_norm": 0.3470204472541809, | |
| "learning_rate": 4.072845240728453e-06, | |
| "loss": 2.3546, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.5942638059426381, | |
| "grad_norm": 0.38719677925109863, | |
| "learning_rate": 4.0580992405809925e-06, | |
| "loss": 2.3439, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.595738405957384, | |
| "grad_norm": 0.3788948953151703, | |
| "learning_rate": 4.0433532404335325e-06, | |
| "loss": 2.3555, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.59721300597213, | |
| "grad_norm": 0.42110002040863037, | |
| "learning_rate": 4.0286072402860725e-06, | |
| "loss": 2.3502, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.5986876059868761, | |
| "grad_norm": 0.4007836580276489, | |
| "learning_rate": 4.0138612401386125e-06, | |
| "loss": 2.3571, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.6001622060016221, | |
| "grad_norm": 0.36043021082878113, | |
| "learning_rate": 3.9991152399911525e-06, | |
| "loss": 2.3589, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.601636806016368, | |
| "grad_norm": 0.4028465151786804, | |
| "learning_rate": 3.9843692398436925e-06, | |
| "loss": 2.3399, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.603111406031114, | |
| "grad_norm": 0.39534154534339905, | |
| "learning_rate": 3.9696232396962325e-06, | |
| "loss": 2.3571, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.6045860060458601, | |
| "grad_norm": 0.361286997795105, | |
| "learning_rate": 3.9548772395487725e-06, | |
| "loss": 2.3358, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 0.3674760162830353, | |
| "learning_rate": 3.9401312394013125e-06, | |
| "loss": 2.3449, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.607535206075352, | |
| "grad_norm": 0.39205583930015564, | |
| "learning_rate": 3.9253852392538525e-06, | |
| "loss": 2.3558, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.609009806090098, | |
| "grad_norm": 0.3901711404323578, | |
| "learning_rate": 3.910639239106393e-06, | |
| "loss": 2.3565, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.6104844061048441, | |
| "grad_norm": 0.3791930377483368, | |
| "learning_rate": 3.8958932389589324e-06, | |
| "loss": 2.3425, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.6119590061195901, | |
| "grad_norm": 0.36277633905410767, | |
| "learning_rate": 3.881147238811473e-06, | |
| "loss": 2.3485, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.6134336061343361, | |
| "grad_norm": 0.35051462054252625, | |
| "learning_rate": 3.866401238664012e-06, | |
| "loss": 2.3486, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.614908206149082, | |
| "grad_norm": 0.3554931581020355, | |
| "learning_rate": 3.851655238516553e-06, | |
| "loss": 2.3626, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.616382806163828, | |
| "grad_norm": 0.36661261320114136, | |
| "learning_rate": 3.836909238369092e-06, | |
| "loss": 2.3405, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.6178574061785741, | |
| "grad_norm": 0.3876403570175171, | |
| "learning_rate": 3.822163238221633e-06, | |
| "loss": 2.3422, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.6193320061933201, | |
| "grad_norm": 0.3810766637325287, | |
| "learning_rate": 3.8074172380741724e-06, | |
| "loss": 2.3466, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.620806606208066, | |
| "grad_norm": 0.3949829638004303, | |
| "learning_rate": 3.7926712379267128e-06, | |
| "loss": 2.3501, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.622281206222812, | |
| "grad_norm": 0.39543384313583374, | |
| "learning_rate": 3.7779252377792523e-06, | |
| "loss": 2.3365, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.6237558062375581, | |
| "grad_norm": 0.3472473919391632, | |
| "learning_rate": 3.7631792376317928e-06, | |
| "loss": 2.3544, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.6252304062523041, | |
| "grad_norm": 0.3589697778224945, | |
| "learning_rate": 3.7484332374843323e-06, | |
| "loss": 2.3402, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.6267050062670501, | |
| "grad_norm": 0.37059295177459717, | |
| "learning_rate": 3.7336872373368727e-06, | |
| "loss": 2.3462, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.628179606281796, | |
| "grad_norm": 0.40565216541290283, | |
| "learning_rate": 3.7189412371894127e-06, | |
| "loss": 2.3501, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.6296542062965421, | |
| "grad_norm": 0.36283183097839355, | |
| "learning_rate": 3.7041952370419527e-06, | |
| "loss": 2.3423, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.6311288063112881, | |
| "grad_norm": 0.3886597156524658, | |
| "learning_rate": 3.6894492368944927e-06, | |
| "loss": 2.3386, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.6326034063260341, | |
| "grad_norm": 0.3656463027000427, | |
| "learning_rate": 3.6747032367470327e-06, | |
| "loss": 2.3526, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.63407800634078, | |
| "grad_norm": 0.4167777895927429, | |
| "learning_rate": 3.6599572365995727e-06, | |
| "loss": 2.3581, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.635552606355526, | |
| "grad_norm": 0.3539351522922516, | |
| "learning_rate": 3.6452112364521127e-06, | |
| "loss": 2.3396, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.6370272063702721, | |
| "grad_norm": 0.3550587296485901, | |
| "learning_rate": 3.6304652363046527e-06, | |
| "loss": 2.344, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.6385018063850181, | |
| "grad_norm": 0.3939066529273987, | |
| "learning_rate": 3.6157192361571927e-06, | |
| "loss": 2.357, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.639976406399764, | |
| "grad_norm": 0.3614286482334137, | |
| "learning_rate": 3.6009732360097326e-06, | |
| "loss": 2.3433, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.64145100641451, | |
| "grad_norm": 0.402174711227417, | |
| "learning_rate": 3.586227235862272e-06, | |
| "loss": 2.3329, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.6429256064292561, | |
| "grad_norm": 0.4092716574668884, | |
| "learning_rate": 3.5714812357148126e-06, | |
| "loss": 2.3489, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.6444002064440021, | |
| "grad_norm": 0.3816792666912079, | |
| "learning_rate": 3.556735235567353e-06, | |
| "loss": 2.3493, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.6458748064587481, | |
| "grad_norm": 0.34877556562423706, | |
| "learning_rate": 3.5419892354198926e-06, | |
| "loss": 2.3694, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.647349406473494, | |
| "grad_norm": 0.39949148893356323, | |
| "learning_rate": 3.527243235272433e-06, | |
| "loss": 2.3366, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.64882400648824, | |
| "grad_norm": 0.3934047222137451, | |
| "learning_rate": 3.5124972351249726e-06, | |
| "loss": 2.3412, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.6502986065029861, | |
| "grad_norm": 0.3608020842075348, | |
| "learning_rate": 3.497751234977513e-06, | |
| "loss": 2.3325, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.6517732065177321, | |
| "grad_norm": 0.3528784215450287, | |
| "learning_rate": 3.4830052348300526e-06, | |
| "loss": 2.3539, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.653247806532478, | |
| "grad_norm": 0.34913375973701477, | |
| "learning_rate": 3.4682592346825925e-06, | |
| "loss": 2.3501, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.654722406547224, | |
| "grad_norm": 0.39281442761421204, | |
| "learning_rate": 3.4535132345351325e-06, | |
| "loss": 2.3413, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.6561970065619701, | |
| "grad_norm": 0.352173775434494, | |
| "learning_rate": 3.4387672343876725e-06, | |
| "loss": 2.3636, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.6576716065767161, | |
| "grad_norm": 0.44138431549072266, | |
| "learning_rate": 3.4240212342402125e-06, | |
| "loss": 2.3365, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.6591462065914621, | |
| "grad_norm": 0.38679641485214233, | |
| "learning_rate": 3.4092752340927525e-06, | |
| "loss": 2.3473, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.660620806606208, | |
| "grad_norm": 0.37204068899154663, | |
| "learning_rate": 3.394529233945293e-06, | |
| "loss": 2.3422, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.6620954066209541, | |
| "grad_norm": 0.36612871289253235, | |
| "learning_rate": 3.3797832337978325e-06, | |
| "loss": 2.3423, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.6635700066357001, | |
| "grad_norm": 0.36784979701042175, | |
| "learning_rate": 3.365037233650373e-06, | |
| "loss": 2.3458, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.6650446066504461, | |
| "grad_norm": 0.3783581852912903, | |
| "learning_rate": 3.3502912335029125e-06, | |
| "loss": 2.351, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.666519206665192, | |
| "grad_norm": 0.4059109389781952, | |
| "learning_rate": 3.335545233355453e-06, | |
| "loss": 2.3504, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.667993806679938, | |
| "grad_norm": 0.39486610889434814, | |
| "learning_rate": 3.3207992332079924e-06, | |
| "loss": 2.3425, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.6694684066946841, | |
| "grad_norm": 0.35696399211883545, | |
| "learning_rate": 3.306053233060533e-06, | |
| "loss": 2.3668, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.6709430067094301, | |
| "grad_norm": 0.3573276102542877, | |
| "learning_rate": 3.2913072329130724e-06, | |
| "loss": 2.3503, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.672417606724176, | |
| "grad_norm": 0.4014319181442261, | |
| "learning_rate": 3.276561232765613e-06, | |
| "loss": 2.3364, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.673892206738922, | |
| "grad_norm": 0.3681239187717438, | |
| "learning_rate": 3.2618152326181524e-06, | |
| "loss": 2.3451, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 0.6753668067536681, | |
| "grad_norm": 0.4503883421421051, | |
| "learning_rate": 3.247069232470693e-06, | |
| "loss": 2.3408, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 0.6768414067684141, | |
| "grad_norm": 0.37346574664115906, | |
| "learning_rate": 3.232323232323233e-06, | |
| "loss": 2.3598, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.6783160067831601, | |
| "grad_norm": 0.3615313470363617, | |
| "learning_rate": 3.2175772321757724e-06, | |
| "loss": 2.3434, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.679790606797906, | |
| "grad_norm": 0.3929205536842346, | |
| "learning_rate": 3.2028312320283128e-06, | |
| "loss": 2.3439, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 0.681265206812652, | |
| "grad_norm": 0.37737640738487244, | |
| "learning_rate": 3.1880852318808523e-06, | |
| "loss": 2.349, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.6827398068273981, | |
| "grad_norm": 0.41763532161712646, | |
| "learning_rate": 3.1733392317333928e-06, | |
| "loss": 2.3422, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 0.6842144068421441, | |
| "grad_norm": 0.4058336615562439, | |
| "learning_rate": 3.1585932315859323e-06, | |
| "loss": 2.3459, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.68568900685689, | |
| "grad_norm": 0.3860774636268616, | |
| "learning_rate": 3.1438472314384727e-06, | |
| "loss": 2.3524, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.687163606871636, | |
| "grad_norm": 0.401663213968277, | |
| "learning_rate": 3.1291012312910123e-06, | |
| "loss": 2.3326, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 0.6886382068863821, | |
| "grad_norm": 0.3691912889480591, | |
| "learning_rate": 3.1143552311435527e-06, | |
| "loss": 2.337, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 0.6901128069011281, | |
| "grad_norm": 0.4243042767047882, | |
| "learning_rate": 3.0996092309960923e-06, | |
| "loss": 2.3432, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.6915874069158741, | |
| "grad_norm": 0.3880211412906647, | |
| "learning_rate": 3.0848632308486327e-06, | |
| "loss": 2.3664, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 0.69306200693062, | |
| "grad_norm": 0.3545699119567871, | |
| "learning_rate": 3.0701172307011723e-06, | |
| "loss": 2.3448, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.6945366069453661, | |
| "grad_norm": 0.38192018866539, | |
| "learning_rate": 3.0553712305537127e-06, | |
| "loss": 2.3564, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.6960112069601121, | |
| "grad_norm": 0.39317330718040466, | |
| "learning_rate": 3.0406252304062527e-06, | |
| "loss": 2.3676, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 0.6974858069748581, | |
| "grad_norm": 0.3686807453632355, | |
| "learning_rate": 3.0258792302587927e-06, | |
| "loss": 2.3513, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 0.698960406989604, | |
| "grad_norm": 0.43928787112236023, | |
| "learning_rate": 3.0111332301113326e-06, | |
| "loss": 2.3368, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.70043500700435, | |
| "grad_norm": 0.3794805705547333, | |
| "learning_rate": 2.9963872299638726e-06, | |
| "loss": 2.3504, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.7019096070190961, | |
| "grad_norm": 0.391825407743454, | |
| "learning_rate": 2.9816412298164126e-06, | |
| "loss": 2.355, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 0.7033842070338421, | |
| "grad_norm": 0.37879860401153564, | |
| "learning_rate": 2.966895229668952e-06, | |
| "loss": 2.3454, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.704858807048588, | |
| "grad_norm": 0.4129418134689331, | |
| "learning_rate": 2.9521492295214926e-06, | |
| "loss": 2.3496, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 0.706333407063334, | |
| "grad_norm": 0.4124239683151245, | |
| "learning_rate": 2.937403229374032e-06, | |
| "loss": 2.353, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 0.7078080070780801, | |
| "grad_norm": 0.391527384519577, | |
| "learning_rate": 2.9226572292265726e-06, | |
| "loss": 2.3397, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.7092826070928261, | |
| "grad_norm": 0.4149307906627655, | |
| "learning_rate": 2.907911229079112e-06, | |
| "loss": 2.3408, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 0.7107572071075721, | |
| "grad_norm": 0.3845043480396271, | |
| "learning_rate": 2.8931652289316526e-06, | |
| "loss": 2.3498, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 0.712231807122318, | |
| "grad_norm": 0.355175256729126, | |
| "learning_rate": 2.878419228784193e-06, | |
| "loss": 2.3558, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.7137064071370641, | |
| "grad_norm": 0.37544727325439453, | |
| "learning_rate": 2.8636732286367325e-06, | |
| "loss": 2.3629, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 0.7151810071518101, | |
| "grad_norm": 0.394980251789093, | |
| "learning_rate": 2.8489272284892725e-06, | |
| "loss": 2.3456, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.7166556071665561, | |
| "grad_norm": 0.39948976039886475, | |
| "learning_rate": 2.8341812283418125e-06, | |
| "loss": 2.3542, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.718130207181302, | |
| "grad_norm": 0.3865370750427246, | |
| "learning_rate": 2.8194352281943525e-06, | |
| "loss": 2.3262, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 0.719604807196048, | |
| "grad_norm": 0.36873430013656616, | |
| "learning_rate": 2.8046892280468925e-06, | |
| "loss": 2.343, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 0.7210794072107941, | |
| "grad_norm": 0.37330904603004456, | |
| "learning_rate": 2.7899432278994325e-06, | |
| "loss": 2.3629, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.7225540072255401, | |
| "grad_norm": 0.392511785030365, | |
| "learning_rate": 2.7751972277519725e-06, | |
| "loss": 2.3364, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.7240286072402861, | |
| "grad_norm": 0.4280540943145752, | |
| "learning_rate": 2.7604512276045125e-06, | |
| "loss": 2.344, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 0.725503207255032, | |
| "grad_norm": 0.36759504675865173, | |
| "learning_rate": 2.7457052274570525e-06, | |
| "loss": 2.3639, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.7269778072697781, | |
| "grad_norm": 0.3967907428741455, | |
| "learning_rate": 2.7309592273095924e-06, | |
| "loss": 2.3459, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.7284524072845241, | |
| "grad_norm": 0.3818022906780243, | |
| "learning_rate": 2.716213227162133e-06, | |
| "loss": 2.3471, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 0.3735957443714142, | |
| "learning_rate": 2.7014672270146724e-06, | |
| "loss": 2.3324, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.731401607314016, | |
| "grad_norm": 0.3850245773792267, | |
| "learning_rate": 2.686721226867213e-06, | |
| "loss": 2.3563, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.732876207328762, | |
| "grad_norm": 0.3557223081588745, | |
| "learning_rate": 2.6719752267197524e-06, | |
| "loss": 2.3416, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 0.7343508073435081, | |
| "grad_norm": 0.3680027723312378, | |
| "learning_rate": 2.657229226572293e-06, | |
| "loss": 2.3486, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.7358254073582541, | |
| "grad_norm": 0.39317336678504944, | |
| "learning_rate": 2.6424832264248324e-06, | |
| "loss": 2.3489, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 0.737300007373, | |
| "grad_norm": 0.44913551211357117, | |
| "learning_rate": 2.627737226277373e-06, | |
| "loss": 2.3476, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.738774607387746, | |
| "grad_norm": 0.4089840054512024, | |
| "learning_rate": 2.6129912261299124e-06, | |
| "loss": 2.344, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 0.7402492074024921, | |
| "grad_norm": 0.37188851833343506, | |
| "learning_rate": 2.5982452259824523e-06, | |
| "loss": 2.3488, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 0.7417238074172381, | |
| "grad_norm": 0.39748087525367737, | |
| "learning_rate": 2.5834992258349923e-06, | |
| "loss": 2.3651, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 0.7431984074319841, | |
| "grad_norm": 0.413461297750473, | |
| "learning_rate": 2.5687532256875323e-06, | |
| "loss": 2.3484, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.74467300744673, | |
| "grad_norm": 0.3835908770561218, | |
| "learning_rate": 2.5540072255400723e-06, | |
| "loss": 2.3542, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.7461476074614761, | |
| "grad_norm": 0.3700994551181793, | |
| "learning_rate": 2.5392612253926123e-06, | |
| "loss": 2.3559, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 0.7476222074762221, | |
| "grad_norm": 0.3828827738761902, | |
| "learning_rate": 2.5245152252451527e-06, | |
| "loss": 2.3453, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 0.7490968074909681, | |
| "grad_norm": 0.36642786860466003, | |
| "learning_rate": 2.5097692250976923e-06, | |
| "loss": 2.3426, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 0.750571407505714, | |
| "grad_norm": 0.44964683055877686, | |
| "learning_rate": 2.4950232249502323e-06, | |
| "loss": 2.3403, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 0.75204600752046, | |
| "grad_norm": 0.36480045318603516, | |
| "learning_rate": 2.4802772248027723e-06, | |
| "loss": 2.3374, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.7535206075352061, | |
| "grad_norm": 0.378704309463501, | |
| "learning_rate": 2.4655312246553127e-06, | |
| "loss": 2.3421, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 0.7549952075499521, | |
| "grad_norm": 0.3538469970226288, | |
| "learning_rate": 2.4507852245078527e-06, | |
| "loss": 2.3389, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.7564698075646981, | |
| "grad_norm": 0.3797251284122467, | |
| "learning_rate": 2.4360392243603927e-06, | |
| "loss": 2.3336, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 0.757944407579444, | |
| "grad_norm": 0.4074282944202423, | |
| "learning_rate": 2.4212932242129326e-06, | |
| "loss": 2.3509, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 0.7594190075941901, | |
| "grad_norm": 0.38486722111701965, | |
| "learning_rate": 2.4065472240654726e-06, | |
| "loss": 2.3461, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.7608936076089361, | |
| "grad_norm": 0.3682025372982025, | |
| "learning_rate": 2.3918012239180126e-06, | |
| "loss": 2.3539, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 0.7623682076236821, | |
| "grad_norm": 0.36342653632164, | |
| "learning_rate": 2.3770552237705526e-06, | |
| "loss": 2.3449, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 0.763842807638428, | |
| "grad_norm": 0.3666427433490753, | |
| "learning_rate": 2.3623092236230926e-06, | |
| "loss": 2.3422, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 0.7653174076531741, | |
| "grad_norm": 0.4068521559238434, | |
| "learning_rate": 2.347563223475632e-06, | |
| "loss": 2.3419, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 0.7667920076679201, | |
| "grad_norm": 0.3707336485385895, | |
| "learning_rate": 2.332817223328172e-06, | |
| "loss": 2.3598, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.7682666076826661, | |
| "grad_norm": 0.3835085332393646, | |
| "learning_rate": 2.318071223180712e-06, | |
| "loss": 2.3303, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 0.769741207697412, | |
| "grad_norm": 0.35998964309692383, | |
| "learning_rate": 2.303325223033252e-06, | |
| "loss": 2.347, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.771215807712158, | |
| "grad_norm": 0.6574367880821228, | |
| "learning_rate": 2.2885792228857925e-06, | |
| "loss": 2.3422, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 0.7726904077269041, | |
| "grad_norm": 0.391596257686615, | |
| "learning_rate": 2.2738332227383325e-06, | |
| "loss": 2.3322, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 0.7741650077416501, | |
| "grad_norm": 0.37120455503463745, | |
| "learning_rate": 2.2590872225908725e-06, | |
| "loss": 2.344, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.7756396077563961, | |
| "grad_norm": 0.43302685022354126, | |
| "learning_rate": 2.2443412224434125e-06, | |
| "loss": 2.3523, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 0.777114207771142, | |
| "grad_norm": 0.3854134678840637, | |
| "learning_rate": 2.2295952222959525e-06, | |
| "loss": 2.3581, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 0.7785888077858881, | |
| "grad_norm": 0.37082743644714355, | |
| "learning_rate": 2.2148492221484925e-06, | |
| "loss": 2.3417, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 0.7800634078006341, | |
| "grad_norm": 0.3822407126426697, | |
| "learning_rate": 2.2001032220010325e-06, | |
| "loss": 2.3432, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 0.7815380078153801, | |
| "grad_norm": 0.36346620321273804, | |
| "learning_rate": 2.1853572218535725e-06, | |
| "loss": 2.3525, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.783012607830126, | |
| "grad_norm": 0.3793860375881195, | |
| "learning_rate": 2.1706112217061125e-06, | |
| "loss": 2.3372, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 0.784487207844872, | |
| "grad_norm": 0.36953747272491455, | |
| "learning_rate": 2.1558652215586525e-06, | |
| "loss": 2.3496, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 0.7859618078596181, | |
| "grad_norm": 0.38170909881591797, | |
| "learning_rate": 2.1411192214111924e-06, | |
| "loss": 2.3468, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 0.7874364078743641, | |
| "grad_norm": 0.3705514073371887, | |
| "learning_rate": 2.1263732212637324e-06, | |
| "loss": 2.3288, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 0.7889110078891101, | |
| "grad_norm": 0.3680097460746765, | |
| "learning_rate": 2.1116272211162724e-06, | |
| "loss": 2.3361, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.790385607903856, | |
| "grad_norm": 0.35381895303726196, | |
| "learning_rate": 2.0968812209688124e-06, | |
| "loss": 2.3523, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 0.7918602079186021, | |
| "grad_norm": 0.3935386836528778, | |
| "learning_rate": 2.0821352208213524e-06, | |
| "loss": 2.3489, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 0.7933348079333481, | |
| "grad_norm": 0.4096679985523224, | |
| "learning_rate": 2.0673892206738924e-06, | |
| "loss": 2.3416, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 0.7948094079480941, | |
| "grad_norm": 0.37470847368240356, | |
| "learning_rate": 2.0526432205264324e-06, | |
| "loss": 2.3371, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 0.79628400796284, | |
| "grad_norm": 0.34806546568870544, | |
| "learning_rate": 2.0378972203789724e-06, | |
| "loss": 2.342, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.7977586079775861, | |
| "grad_norm": 0.39850929379463196, | |
| "learning_rate": 2.0231512202315124e-06, | |
| "loss": 2.3342, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 0.7992332079923321, | |
| "grad_norm": 0.3578292429447174, | |
| "learning_rate": 2.0084052200840523e-06, | |
| "loss": 2.3459, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 0.8007078080070781, | |
| "grad_norm": 0.38795024156570435, | |
| "learning_rate": 1.9936592199365923e-06, | |
| "loss": 2.345, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 0.8021824080218241, | |
| "grad_norm": 0.43707677721977234, | |
| "learning_rate": 1.9789132197891323e-06, | |
| "loss": 2.3461, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 0.80365700803657, | |
| "grad_norm": 0.38418900966644287, | |
| "learning_rate": 1.9641672196416723e-06, | |
| "loss": 2.342, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.8051316080513161, | |
| "grad_norm": 0.3943639099597931, | |
| "learning_rate": 1.9494212194942123e-06, | |
| "loss": 2.35, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 0.8066062080660621, | |
| "grad_norm": 0.36821678280830383, | |
| "learning_rate": 1.9346752193467523e-06, | |
| "loss": 2.3396, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 0.39869850873947144, | |
| "learning_rate": 1.9199292191992923e-06, | |
| "loss": 2.3631, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 0.809555408095554, | |
| "grad_norm": 0.3753523528575897, | |
| "learning_rate": 1.9051832190518325e-06, | |
| "loss": 2.3482, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 0.8110300081103001, | |
| "grad_norm": 0.4102868139743805, | |
| "learning_rate": 1.8904372189043725e-06, | |
| "loss": 2.3503, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.8125046081250461, | |
| "grad_norm": 0.37794923782348633, | |
| "learning_rate": 1.8756912187569122e-06, | |
| "loss": 2.334, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 0.8139792081397921, | |
| "grad_norm": 0.3869600296020508, | |
| "learning_rate": 1.8609452186094522e-06, | |
| "loss": 2.3432, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 0.815453808154538, | |
| "grad_norm": 0.3850298821926117, | |
| "learning_rate": 1.8461992184619922e-06, | |
| "loss": 2.3408, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 0.8169284081692841, | |
| "grad_norm": 0.36297619342803955, | |
| "learning_rate": 1.8314532183145322e-06, | |
| "loss": 2.3476, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 0.8184030081840301, | |
| "grad_norm": 0.3866339325904846, | |
| "learning_rate": 1.8167072181670722e-06, | |
| "loss": 2.3553, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.8198776081987761, | |
| "grad_norm": 0.41206780076026917, | |
| "learning_rate": 1.8019612180196122e-06, | |
| "loss": 2.3523, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 0.8213522082135221, | |
| "grad_norm": 0.35867977142333984, | |
| "learning_rate": 1.7872152178721524e-06, | |
| "loss": 2.3478, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 0.822826808228268, | |
| "grad_norm": 0.3772488236427307, | |
| "learning_rate": 1.7724692177246924e-06, | |
| "loss": 2.3411, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.8243014082430141, | |
| "grad_norm": 0.4143722355365753, | |
| "learning_rate": 1.7577232175772324e-06, | |
| "loss": 2.3466, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 0.8257760082577601, | |
| "grad_norm": 0.3593420684337616, | |
| "learning_rate": 1.7429772174297724e-06, | |
| "loss": 2.3542, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.8272506082725061, | |
| "grad_norm": 0.3915468454360962, | |
| "learning_rate": 1.7282312172823124e-06, | |
| "loss": 2.3357, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 0.828725208287252, | |
| "grad_norm": 0.3736458718776703, | |
| "learning_rate": 1.7134852171348523e-06, | |
| "loss": 2.3371, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 0.8301998083019981, | |
| "grad_norm": 0.3699648976325989, | |
| "learning_rate": 1.6987392169873923e-06, | |
| "loss": 2.3514, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 0.8316744083167441, | |
| "grad_norm": 0.37435752153396606, | |
| "learning_rate": 1.6839932168399323e-06, | |
| "loss": 2.3535, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 0.8331490083314901, | |
| "grad_norm": 0.4178158640861511, | |
| "learning_rate": 1.6692472166924723e-06, | |
| "loss": 2.3212, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.8346236083462361, | |
| "grad_norm": 0.38455960154533386, | |
| "learning_rate": 1.6545012165450123e-06, | |
| "loss": 2.3473, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 0.836098208360982, | |
| "grad_norm": 0.3831127882003784, | |
| "learning_rate": 1.6397552163975523e-06, | |
| "loss": 2.3565, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 0.8375728083757281, | |
| "grad_norm": 0.4265178442001343, | |
| "learning_rate": 1.625009216250092e-06, | |
| "loss": 2.341, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 0.8390474083904741, | |
| "grad_norm": 0.3953835666179657, | |
| "learning_rate": 1.6102632161026325e-06, | |
| "loss": 2.3405, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 0.8405220084052201, | |
| "grad_norm": 0.39547884464263916, | |
| "learning_rate": 1.5955172159551725e-06, | |
| "loss": 2.3385, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.841996608419966, | |
| "grad_norm": 0.352119505405426, | |
| "learning_rate": 1.5807712158077125e-06, | |
| "loss": 2.3277, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 0.8434712084347121, | |
| "grad_norm": 0.4068032205104828, | |
| "learning_rate": 1.5660252156602522e-06, | |
| "loss": 2.3498, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 0.8449458084494581, | |
| "grad_norm": 0.4023716151714325, | |
| "learning_rate": 1.5512792155127922e-06, | |
| "loss": 2.373, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 0.8464204084642041, | |
| "grad_norm": 0.36872199177742004, | |
| "learning_rate": 1.5365332153653322e-06, | |
| "loss": 2.346, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 0.84789500847895, | |
| "grad_norm": 0.37465929985046387, | |
| "learning_rate": 1.5217872152178722e-06, | |
| "loss": 2.3464, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.8493696084936961, | |
| "grad_norm": 0.37545257806777954, | |
| "learning_rate": 1.5070412150704122e-06, | |
| "loss": 2.3493, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.8508442085084421, | |
| "grad_norm": 0.36542752385139465, | |
| "learning_rate": 1.4922952149229522e-06, | |
| "loss": 2.348, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 0.8523188085231881, | |
| "grad_norm": 0.36369800567626953, | |
| "learning_rate": 1.4775492147754922e-06, | |
| "loss": 2.3648, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 0.8537934085379341, | |
| "grad_norm": 0.3845520615577698, | |
| "learning_rate": 1.4628032146280322e-06, | |
| "loss": 2.3521, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 0.85526800855268, | |
| "grad_norm": 0.3865341246128082, | |
| "learning_rate": 1.4480572144805722e-06, | |
| "loss": 2.3442, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.8567426085674261, | |
| "grad_norm": 0.37881407141685486, | |
| "learning_rate": 1.4333112143331124e-06, | |
| "loss": 2.3469, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 0.8582172085821721, | |
| "grad_norm": 0.38905656337738037, | |
| "learning_rate": 1.4185652141856523e-06, | |
| "loss": 2.357, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 0.8596918085969181, | |
| "grad_norm": 0.40676259994506836, | |
| "learning_rate": 1.4038192140381923e-06, | |
| "loss": 2.34, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 0.861166408611664, | |
| "grad_norm": 0.3595060110092163, | |
| "learning_rate": 1.3890732138907323e-06, | |
| "loss": 2.3351, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 0.8626410086264101, | |
| "grad_norm": 0.39331743121147156, | |
| "learning_rate": 1.3743272137432723e-06, | |
| "loss": 2.3463, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.8641156086411561, | |
| "grad_norm": 0.3540342450141907, | |
| "learning_rate": 1.3595812135958123e-06, | |
| "loss": 2.3438, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 0.8655902086559021, | |
| "grad_norm": 0.40179315209388733, | |
| "learning_rate": 1.3448352134483523e-06, | |
| "loss": 2.3518, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 0.8670648086706481, | |
| "grad_norm": 0.3446930944919586, | |
| "learning_rate": 1.3300892133008923e-06, | |
| "loss": 2.3446, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 0.868539408685394, | |
| "grad_norm": 0.3802293539047241, | |
| "learning_rate": 1.3153432131534323e-06, | |
| "loss": 2.3442, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 0.8700140087001401, | |
| "grad_norm": 0.4139614403247833, | |
| "learning_rate": 1.300597213005972e-06, | |
| "loss": 2.3283, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.8714886087148861, | |
| "grad_norm": 0.36472469568252563, | |
| "learning_rate": 1.285851212858512e-06, | |
| "loss": 2.3493, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 0.8729632087296321, | |
| "grad_norm": 0.36495742201805115, | |
| "learning_rate": 1.2711052127110524e-06, | |
| "loss": 2.3424, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 0.874437808744378, | |
| "grad_norm": 0.3818816840648651, | |
| "learning_rate": 1.2563592125635924e-06, | |
| "loss": 2.355, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.4109640419483185, | |
| "learning_rate": 1.2416132124161322e-06, | |
| "loss": 2.3367, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 0.8773870087738701, | |
| "grad_norm": 0.38012924790382385, | |
| "learning_rate": 1.2268672122686722e-06, | |
| "loss": 2.342, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.8788616087886161, | |
| "grad_norm": 0.4005228877067566, | |
| "learning_rate": 1.2121212121212122e-06, | |
| "loss": 2.3479, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 0.880336208803362, | |
| "grad_norm": 0.3996869623661041, | |
| "learning_rate": 1.1973752119737522e-06, | |
| "loss": 2.3539, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 0.8818108088181081, | |
| "grad_norm": 0.4269565939903259, | |
| "learning_rate": 1.1826292118262922e-06, | |
| "loss": 2.3413, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 0.8832854088328541, | |
| "grad_norm": 0.3505631983280182, | |
| "learning_rate": 1.1678832116788322e-06, | |
| "loss": 2.3542, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 0.8847600088476001, | |
| "grad_norm": 0.3690703213214874, | |
| "learning_rate": 1.1531372115313722e-06, | |
| "loss": 2.3461, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.8862346088623461, | |
| "grad_norm": 0.40082600712776184, | |
| "learning_rate": 1.1383912113839124e-06, | |
| "loss": 2.3473, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 0.887709208877092, | |
| "grad_norm": 0.3835630714893341, | |
| "learning_rate": 1.1236452112364521e-06, | |
| "loss": 2.3459, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 0.8891838088918381, | |
| "grad_norm": 0.41259172558784485, | |
| "learning_rate": 1.1088992110889921e-06, | |
| "loss": 2.3486, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 0.8906584089065841, | |
| "grad_norm": 0.40323716402053833, | |
| "learning_rate": 1.0941532109415321e-06, | |
| "loss": 2.3371, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 0.8921330089213301, | |
| "grad_norm": 0.36476635932922363, | |
| "learning_rate": 1.079407210794072e-06, | |
| "loss": 2.3514, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.893607608936076, | |
| "grad_norm": 0.38329634070396423, | |
| "learning_rate": 1.064661210646612e-06, | |
| "loss": 2.3238, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 0.8950822089508221, | |
| "grad_norm": 0.38826659321784973, | |
| "learning_rate": 1.0499152104991523e-06, | |
| "loss": 2.3481, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 0.8965568089655681, | |
| "grad_norm": 0.43189194798469543, | |
| "learning_rate": 1.0351692103516923e-06, | |
| "loss": 2.3562, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 0.8980314089803141, | |
| "grad_norm": 0.3478831946849823, | |
| "learning_rate": 1.0204232102042323e-06, | |
| "loss": 2.3451, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.8995060089950601, | |
| "grad_norm": 0.3830432891845703, | |
| "learning_rate": 1.0056772100567723e-06, | |
| "loss": 2.3351, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.9009806090098061, | |
| "grad_norm": 0.3779089152812958, | |
| "learning_rate": 9.90931209909312e-07, | |
| "loss": 2.3421, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 0.9024552090245521, | |
| "grad_norm": 0.37689492106437683, | |
| "learning_rate": 9.761852097618522e-07, | |
| "loss": 2.3456, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.9039298090392981, | |
| "grad_norm": 0.38662415742874146, | |
| "learning_rate": 9.614392096143922e-07, | |
| "loss": 2.3414, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 0.9054044090540441, | |
| "grad_norm": 0.3387671411037445, | |
| "learning_rate": 9.466932094669322e-07, | |
| "loss": 2.3408, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 0.90687900906879, | |
| "grad_norm": 0.37837815284729004, | |
| "learning_rate": 9.319472093194722e-07, | |
| "loss": 2.3413, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.9083536090835361, | |
| "grad_norm": 0.35393086075782776, | |
| "learning_rate": 9.172012091720121e-07, | |
| "loss": 2.348, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 0.9098282090982821, | |
| "grad_norm": 0.4034173786640167, | |
| "learning_rate": 9.024552090245521e-07, | |
| "loss": 2.3728, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 0.9113028091130281, | |
| "grad_norm": 0.37714606523513794, | |
| "learning_rate": 8.877092088770922e-07, | |
| "loss": 2.3473, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 0.912777409127774, | |
| "grad_norm": 0.41862747073173523, | |
| "learning_rate": 8.729632087296322e-07, | |
| "loss": 2.3446, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 0.9142520091425201, | |
| "grad_norm": 0.3840957283973694, | |
| "learning_rate": 8.582172085821722e-07, | |
| "loss": 2.3374, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.9157266091572661, | |
| "grad_norm": 0.3914671838283539, | |
| "learning_rate": 8.434712084347121e-07, | |
| "loss": 2.3307, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 0.9172012091720121, | |
| "grad_norm": 0.38131779432296753, | |
| "learning_rate": 8.287252082872521e-07, | |
| "loss": 2.3293, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 0.9186758091867581, | |
| "grad_norm": 0.35891827940940857, | |
| "learning_rate": 8.139792081397921e-07, | |
| "loss": 2.3502, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 0.920150409201504, | |
| "grad_norm": 0.3831949532032013, | |
| "learning_rate": 7.992332079923322e-07, | |
| "loss": 2.3375, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 0.9216250092162501, | |
| "grad_norm": 0.3593050241470337, | |
| "learning_rate": 7.844872078448722e-07, | |
| "loss": 2.3435, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.9230996092309961, | |
| "grad_norm": 0.4091593027114868, | |
| "learning_rate": 7.697412076974122e-07, | |
| "loss": 2.3461, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 0.9245742092457421, | |
| "grad_norm": 0.3784448206424713, | |
| "learning_rate": 7.549952075499521e-07, | |
| "loss": 2.352, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 0.926048809260488, | |
| "grad_norm": 0.3828209936618805, | |
| "learning_rate": 7.402492074024921e-07, | |
| "loss": 2.3456, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 0.9275234092752341, | |
| "grad_norm": 0.3510684072971344, | |
| "learning_rate": 7.255032072550321e-07, | |
| "loss": 2.3501, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 0.9289980092899801, | |
| "grad_norm": 0.3757297694683075, | |
| "learning_rate": 7.107572071075722e-07, | |
| "loss": 2.3365, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.9304726093047261, | |
| "grad_norm": 0.38082218170166016, | |
| "learning_rate": 6.960112069601121e-07, | |
| "loss": 2.3467, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 0.9319472093194721, | |
| "grad_norm": 0.3628241717815399, | |
| "learning_rate": 6.812652068126521e-07, | |
| "loss": 2.3403, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 0.9334218093342181, | |
| "grad_norm": 0.3504602611064911, | |
| "learning_rate": 6.665192066651921e-07, | |
| "loss": 2.3363, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 0.9348964093489641, | |
| "grad_norm": 0.38298091292381287, | |
| "learning_rate": 6.517732065177321e-07, | |
| "loss": 2.3473, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 0.9363710093637101, | |
| "grad_norm": 0.36593878269195557, | |
| "learning_rate": 6.370272063702722e-07, | |
| "loss": 2.3467, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.9378456093784561, | |
| "grad_norm": 0.3678169250488281, | |
| "learning_rate": 6.222812062228121e-07, | |
| "loss": 2.3318, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 0.939320209393202, | |
| "grad_norm": 0.36956238746643066, | |
| "learning_rate": 6.075352060753521e-07, | |
| "loss": 2.3183, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 0.9407948094079481, | |
| "grad_norm": 0.34040504693984985, | |
| "learning_rate": 5.927892059278921e-07, | |
| "loss": 2.3532, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 0.9422694094226941, | |
| "grad_norm": 0.3800647258758545, | |
| "learning_rate": 5.780432057804321e-07, | |
| "loss": 2.346, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 0.9437440094374401, | |
| "grad_norm": 0.376245379447937, | |
| "learning_rate": 5.632972056329722e-07, | |
| "loss": 2.3461, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.945218609452186, | |
| "grad_norm": 0.39852550625801086, | |
| "learning_rate": 5.485512054855121e-07, | |
| "loss": 2.3454, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 0.9466932094669321, | |
| "grad_norm": 0.3686327338218689, | |
| "learning_rate": 5.33805205338052e-07, | |
| "loss": 2.3398, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 0.9481678094816781, | |
| "grad_norm": 0.44276946783065796, | |
| "learning_rate": 5.190592051905921e-07, | |
| "loss": 2.3512, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 0.9496424094964241, | |
| "grad_norm": 0.3805672824382782, | |
| "learning_rate": 5.043132050431321e-07, | |
| "loss": 2.3428, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 0.9511170095111701, | |
| "grad_norm": 0.3791063129901886, | |
| "learning_rate": 4.895672048956721e-07, | |
| "loss": 2.3548, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.9525916095259161, | |
| "grad_norm": 0.35641756653785706, | |
| "learning_rate": 4.748212047482121e-07, | |
| "loss": 2.3361, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 0.9540662095406621, | |
| "grad_norm": 0.41041725873947144, | |
| "learning_rate": 4.600752046007521e-07, | |
| "loss": 2.3399, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 0.9555408095554081, | |
| "grad_norm": 0.3670080304145813, | |
| "learning_rate": 4.453292044532921e-07, | |
| "loss": 2.3488, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 0.9570154095701541, | |
| "grad_norm": 0.4006000757217407, | |
| "learning_rate": 4.305832043058321e-07, | |
| "loss": 2.3352, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 0.9584900095849, | |
| "grad_norm": 0.3790174424648285, | |
| "learning_rate": 4.1583720415837205e-07, | |
| "loss": 2.3378, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.9599646095996461, | |
| "grad_norm": 0.39817535877227783, | |
| "learning_rate": 4.010912040109121e-07, | |
| "loss": 2.3444, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 0.9614392096143921, | |
| "grad_norm": 0.39086398482322693, | |
| "learning_rate": 3.863452038634521e-07, | |
| "loss": 2.3446, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 0.9629138096291381, | |
| "grad_norm": 0.34894123673439026, | |
| "learning_rate": 3.7159920371599207e-07, | |
| "loss": 2.3554, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 0.9643884096438841, | |
| "grad_norm": 0.37704578042030334, | |
| "learning_rate": 3.568532035685321e-07, | |
| "loss": 2.3389, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 0.9658630096586301, | |
| "grad_norm": 0.4316023588180542, | |
| "learning_rate": 3.4210720342107205e-07, | |
| "loss": 2.339, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.9673376096733761, | |
| "grad_norm": 0.38732075691223145, | |
| "learning_rate": 3.2736120327361203e-07, | |
| "loss": 2.3363, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 0.9688122096881221, | |
| "grad_norm": 0.451651394367218, | |
| "learning_rate": 3.126152031261521e-07, | |
| "loss": 2.3435, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 0.9702868097028681, | |
| "grad_norm": 0.41907384991645813, | |
| "learning_rate": 2.9786920297869207e-07, | |
| "loss": 2.3503, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 0.971761409717614, | |
| "grad_norm": 0.3828534185886383, | |
| "learning_rate": 2.8312320283123206e-07, | |
| "loss": 2.364, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 0.9732360097323601, | |
| "grad_norm": 0.3919944763183594, | |
| "learning_rate": 2.6837720268377204e-07, | |
| "loss": 2.3461, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.9747106097471061, | |
| "grad_norm": 0.3798130452632904, | |
| "learning_rate": 2.5363120253631203e-07, | |
| "loss": 2.3376, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 0.9761852097618521, | |
| "grad_norm": 0.3798838257789612, | |
| "learning_rate": 2.388852023888521e-07, | |
| "loss": 2.3473, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 0.977659809776598, | |
| "grad_norm": 0.39622053503990173, | |
| "learning_rate": 2.24139202241392e-07, | |
| "loss": 2.3545, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 0.9791344097913441, | |
| "grad_norm": 0.4032035768032074, | |
| "learning_rate": 2.0939320209393203e-07, | |
| "loss": 2.3571, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 0.9806090098060901, | |
| "grad_norm": 0.38117462396621704, | |
| "learning_rate": 1.9464720194647204e-07, | |
| "loss": 2.3389, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.9820836098208361, | |
| "grad_norm": 0.38328099250793457, | |
| "learning_rate": 1.7990120179901206e-07, | |
| "loss": 2.3606, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 0.9835582098355821, | |
| "grad_norm": 0.3704865276813507, | |
| "learning_rate": 1.6515520165155202e-07, | |
| "loss": 2.344, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 0.9850328098503282, | |
| "grad_norm": 0.3868178427219391, | |
| "learning_rate": 1.5040920150409204e-07, | |
| "loss": 2.3395, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 0.9865074098650741, | |
| "grad_norm": 0.3584325313568115, | |
| "learning_rate": 1.3566320135663203e-07, | |
| "loss": 2.3508, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 0.9879820098798201, | |
| "grad_norm": 0.389412522315979, | |
| "learning_rate": 1.2091720120917202e-07, | |
| "loss": 2.3484, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.9894566098945661, | |
| "grad_norm": 0.3739551603794098, | |
| "learning_rate": 1.0617120106171202e-07, | |
| "loss": 2.3594, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 0.990931209909312, | |
| "grad_norm": 0.3698684871196747, | |
| "learning_rate": 9.142520091425201e-08, | |
| "loss": 2.3275, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 0.9924058099240581, | |
| "grad_norm": 0.4226621687412262, | |
| "learning_rate": 7.667920076679201e-08, | |
| "loss": 2.3412, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 0.9938804099388041, | |
| "grad_norm": 0.4069538414478302, | |
| "learning_rate": 6.1933200619332e-08, | |
| "loss": 2.3561, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 0.9953550099535501, | |
| "grad_norm": 0.36342284083366394, | |
| "learning_rate": 4.7187200471872005e-08, | |
| "loss": 2.3422, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.9968296099682961, | |
| "grad_norm": 0.38066765666007996, | |
| "learning_rate": 3.244120032441201e-08, | |
| "loss": 2.3472, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 0.9983042099830421, | |
| "grad_norm": 0.4299827218055725, | |
| "learning_rate": 1.7695200176952003e-08, | |
| "loss": 2.3462, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 0.9997788099977881, | |
| "grad_norm": 0.4048897325992584, | |
| "learning_rate": 2.9492000294920003e-09, | |
| "loss": 2.3342, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 13563, | |
| "total_flos": 5.80008589556659e+18, | |
| "train_loss": 2.354225961330374, | |
| "train_runtime": 12020.9549, | |
| "train_samples_per_second": 216.628, | |
| "train_steps_per_second": 1.128 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 13563, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.80008589556659e+18, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |