| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 10000, | |
| "global_step": 12869, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007770611547128759, | |
| "grad_norm": 10.867119295233653, | |
| "learning_rate": 7.770007770007771e-08, | |
| "loss": 1.9304, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0015541223094257517, | |
| "grad_norm": 10.849708826912844, | |
| "learning_rate": 1.5540015540015542e-07, | |
| "loss": 1.7731, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002331183464138628, | |
| "grad_norm": 14.155064770562149, | |
| "learning_rate": 2.3310023310023313e-07, | |
| "loss": 1.8856, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0031082446188515035, | |
| "grad_norm": 7.8437428614229106, | |
| "learning_rate": 3.1080031080031084e-07, | |
| "loss": 1.7444, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0038853057735643796, | |
| "grad_norm": 6.831881710614827, | |
| "learning_rate": 3.885003885003885e-07, | |
| "loss": 1.8265, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.004662366928277256, | |
| "grad_norm": 4.832603626709541, | |
| "learning_rate": 4.6620046620046626e-07, | |
| "loss": 1.6188, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005439428082990132, | |
| "grad_norm": 4.682650839871238, | |
| "learning_rate": 5.43900543900544e-07, | |
| "loss": 1.4275, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006216489237703007, | |
| "grad_norm": 3.4425826085380953, | |
| "learning_rate": 6.216006216006217e-07, | |
| "loss": 1.2748, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.006993550392415883, | |
| "grad_norm": 2.5177730139033967, | |
| "learning_rate": 6.993006993006994e-07, | |
| "loss": 1.3117, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.007770611547128759, | |
| "grad_norm": 3.7240853256217887, | |
| "learning_rate": 7.77000777000777e-07, | |
| "loss": 1.3679, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008547672701841634, | |
| "grad_norm": 3.326103554099626, | |
| "learning_rate": 8.547008547008548e-07, | |
| "loss": 1.2847, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.009324733856554511, | |
| "grad_norm": 3.06027479016113, | |
| "learning_rate": 9.324009324009325e-07, | |
| "loss": 1.4199, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.010101795011267387, | |
| "grad_norm": 3.3944711373731336, | |
| "learning_rate": 1.01010101010101e-06, | |
| "loss": 1.2416, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.010878856165980264, | |
| "grad_norm": 2.8783458689024943, | |
| "learning_rate": 1.087801087801088e-06, | |
| "loss": 1.2386, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.011655917320693139, | |
| "grad_norm": 4.560753080777367, | |
| "learning_rate": 1.1655011655011655e-06, | |
| "loss": 1.1683, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.012432978475406014, | |
| "grad_norm": 3.2610287866769823, | |
| "learning_rate": 1.2432012432012434e-06, | |
| "loss": 1.3331, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.013210039630118891, | |
| "grad_norm": 3.5630097050518494, | |
| "learning_rate": 1.320901320901321e-06, | |
| "loss": 1.3022, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.013987100784831766, | |
| "grad_norm": 3.190644560112282, | |
| "learning_rate": 1.3986013986013987e-06, | |
| "loss": 1.2078, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.014764161939544641, | |
| "grad_norm": 2.7424883315006667, | |
| "learning_rate": 1.4763014763014764e-06, | |
| "loss": 1.2883, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.015541223094257518, | |
| "grad_norm": 3.578715680372139, | |
| "learning_rate": 1.554001554001554e-06, | |
| "loss": 1.2041, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.016318284248970395, | |
| "grad_norm": 3.2571998176648207, | |
| "learning_rate": 1.6317016317016318e-06, | |
| "loss": 1.2505, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01709534540368327, | |
| "grad_norm": 3.4399528179608327, | |
| "learning_rate": 1.7094017094017097e-06, | |
| "loss": 1.2012, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.017872406558396146, | |
| "grad_norm": 3.8729257141905116, | |
| "learning_rate": 1.7871017871017873e-06, | |
| "loss": 1.3179, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.018649467713109023, | |
| "grad_norm": 3.6027496697475616, | |
| "learning_rate": 1.864801864801865e-06, | |
| "loss": 1.2437, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.019426528867821896, | |
| "grad_norm": 3.6431878968740072, | |
| "learning_rate": 1.9425019425019425e-06, | |
| "loss": 1.1645, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.020203590022534773, | |
| "grad_norm": 2.857881707560637, | |
| "learning_rate": 2.02020202020202e-06, | |
| "loss": 1.1369, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02098065117724765, | |
| "grad_norm": 2.8739221855243042, | |
| "learning_rate": 2.0979020979020983e-06, | |
| "loss": 1.1846, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.021757712331960527, | |
| "grad_norm": 3.5028112168977557, | |
| "learning_rate": 2.175602175602176e-06, | |
| "loss": 1.2122, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0225347734866734, | |
| "grad_norm": 3.4640995610274445, | |
| "learning_rate": 2.2533022533022537e-06, | |
| "loss": 1.1927, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.023311834641386277, | |
| "grad_norm": 3.379264646936701, | |
| "learning_rate": 2.331002331002331e-06, | |
| "loss": 1.258, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.024088895796099154, | |
| "grad_norm": 2.4371515340367385, | |
| "learning_rate": 2.408702408702409e-06, | |
| "loss": 1.1477, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.024865956950812028, | |
| "grad_norm": 3.014613121507287, | |
| "learning_rate": 2.4864024864024867e-06, | |
| "loss": 1.1715, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.025643018105524905, | |
| "grad_norm": 3.0458793192067715, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 1.1353, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.026420079260237782, | |
| "grad_norm": 2.9917200999353906, | |
| "learning_rate": 2.641802641802642e-06, | |
| "loss": 1.1992, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.027197140414950655, | |
| "grad_norm": 2.6599563280716985, | |
| "learning_rate": 2.7195027195027198e-06, | |
| "loss": 1.1782, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.027974201569663532, | |
| "grad_norm": 3.0009575544324454, | |
| "learning_rate": 2.7972027972027974e-06, | |
| "loss": 1.2762, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.02875126272437641, | |
| "grad_norm": 2.8774035033800343, | |
| "learning_rate": 2.874902874902875e-06, | |
| "loss": 1.2687, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.029528323879089283, | |
| "grad_norm": 3.11771455020667, | |
| "learning_rate": 2.952602952602953e-06, | |
| "loss": 1.207, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.03030538503380216, | |
| "grad_norm": 3.6810769724431345, | |
| "learning_rate": 3.0303030303030305e-06, | |
| "loss": 1.2037, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.031082446188515037, | |
| "grad_norm": 2.5507766565385084, | |
| "learning_rate": 3.108003108003108e-06, | |
| "loss": 1.163, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.031859507343227914, | |
| "grad_norm": 2.9816770527812686, | |
| "learning_rate": 3.1857031857031863e-06, | |
| "loss": 1.1592, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.03263656849794079, | |
| "grad_norm": 2.591410551140759, | |
| "learning_rate": 3.2634032634032635e-06, | |
| "loss": 1.0411, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03341362965265366, | |
| "grad_norm": 3.1328334298888345, | |
| "learning_rate": 3.3411033411033412e-06, | |
| "loss": 1.1438, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03419069080736654, | |
| "grad_norm": 2.9537075236771675, | |
| "learning_rate": 3.4188034188034193e-06, | |
| "loss": 1.1713, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.034967751962079414, | |
| "grad_norm": 4.35570272552757, | |
| "learning_rate": 3.4965034965034966e-06, | |
| "loss": 1.2358, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.03574481311679229, | |
| "grad_norm": 2.4749714488159613, | |
| "learning_rate": 3.5742035742035747e-06, | |
| "loss": 1.1325, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.03652187427150517, | |
| "grad_norm": 2.770830578293701, | |
| "learning_rate": 3.651903651903652e-06, | |
| "loss": 1.1979, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.037298935426218045, | |
| "grad_norm": 3.2166027135563793, | |
| "learning_rate": 3.72960372960373e-06, | |
| "loss": 1.1605, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03807599658093092, | |
| "grad_norm": 2.843605809275243, | |
| "learning_rate": 3.8073038073038077e-06, | |
| "loss": 1.2299, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.03885305773564379, | |
| "grad_norm": 2.959678568881321, | |
| "learning_rate": 3.885003885003885e-06, | |
| "loss": 1.2634, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03963011889035667, | |
| "grad_norm": 2.5622873834599367, | |
| "learning_rate": 3.962703962703963e-06, | |
| "loss": 1.1137, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.040407180045069546, | |
| "grad_norm": 3.086457018563733, | |
| "learning_rate": 4.04040404040404e-06, | |
| "loss": 1.2407, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.04118424119978242, | |
| "grad_norm": 4.106519986211115, | |
| "learning_rate": 4.1181041181041185e-06, | |
| "loss": 1.1239, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0419613023544953, | |
| "grad_norm": 2.7183745936305312, | |
| "learning_rate": 4.195804195804197e-06, | |
| "loss": 1.1746, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.04273836350920818, | |
| "grad_norm": 2.703894165918197, | |
| "learning_rate": 4.273504273504274e-06, | |
| "loss": 1.1105, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.043515424663921054, | |
| "grad_norm": 2.4867862713355686, | |
| "learning_rate": 4.351204351204352e-06, | |
| "loss": 1.1258, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.044292485818633924, | |
| "grad_norm": 2.838440814840756, | |
| "learning_rate": 4.428904428904429e-06, | |
| "loss": 1.0962, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.0450695469733468, | |
| "grad_norm": 2.1466654271023162, | |
| "learning_rate": 4.506604506604507e-06, | |
| "loss": 1.1085, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04584660812805968, | |
| "grad_norm": 2.5468209985419477, | |
| "learning_rate": 4.5843045843045846e-06, | |
| "loss": 1.1391, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.046623669282772555, | |
| "grad_norm": 2.7865905520731493, | |
| "learning_rate": 4.662004662004662e-06, | |
| "loss": 1.1387, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.04740073043748543, | |
| "grad_norm": 2.1644371566582827, | |
| "learning_rate": 4.73970473970474e-06, | |
| "loss": 1.1499, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.04817779159219831, | |
| "grad_norm": 2.4913053508847667, | |
| "learning_rate": 4.817404817404818e-06, | |
| "loss": 1.1007, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.04895485274691118, | |
| "grad_norm": 3.001782928715834, | |
| "learning_rate": 4.895104895104895e-06, | |
| "loss": 1.2201, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.049731913901624056, | |
| "grad_norm": 2.2758391555971835, | |
| "learning_rate": 4.972804972804973e-06, | |
| "loss": 1.2219, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.05050897505633693, | |
| "grad_norm": 2.2066322284819155, | |
| "learning_rate": 5.0505050505050515e-06, | |
| "loss": 1.1288, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.05128603621104981, | |
| "grad_norm": 2.5894704735788263, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 1.1989, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.05206309736576269, | |
| "grad_norm": 2.952941171933437, | |
| "learning_rate": 5.205905205905206e-06, | |
| "loss": 1.1165, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.052840158520475564, | |
| "grad_norm": 2.7070115957706946, | |
| "learning_rate": 5.283605283605284e-06, | |
| "loss": 1.1954, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.05361721967518844, | |
| "grad_norm": 2.2390053746810668, | |
| "learning_rate": 5.361305361305362e-06, | |
| "loss": 1.1219, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.05439428082990131, | |
| "grad_norm": 2.5396421668929774, | |
| "learning_rate": 5.4390054390054395e-06, | |
| "loss": 1.2285, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05517134198461419, | |
| "grad_norm": 1.881059123798051, | |
| "learning_rate": 5.516705516705518e-06, | |
| "loss": 1.074, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.055948403139327064, | |
| "grad_norm": 2.4027627997044285, | |
| "learning_rate": 5.594405594405595e-06, | |
| "loss": 1.1395, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.05672546429403994, | |
| "grad_norm": 2.135346477937923, | |
| "learning_rate": 5.672105672105672e-06, | |
| "loss": 1.0813, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.05750252544875282, | |
| "grad_norm": 2.673768837075326, | |
| "learning_rate": 5.74980574980575e-06, | |
| "loss": 1.117, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.058279586603465695, | |
| "grad_norm": 2.1561152898986924, | |
| "learning_rate": 5.827505827505828e-06, | |
| "loss": 1.0868, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.059056647758178565, | |
| "grad_norm": 2.198404569968455, | |
| "learning_rate": 5.905205905205906e-06, | |
| "loss": 1.0985, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.05983370891289144, | |
| "grad_norm": 1.9218754267874707, | |
| "learning_rate": 5.982905982905983e-06, | |
| "loss": 1.1303, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.06061077006760432, | |
| "grad_norm": 2.070293097589863, | |
| "learning_rate": 6.060606060606061e-06, | |
| "loss": 1.056, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.061387831222317196, | |
| "grad_norm": 2.2102128154144833, | |
| "learning_rate": 6.138306138306139e-06, | |
| "loss": 1.1511, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.06216489237703007, | |
| "grad_norm": 2.9020079791880438, | |
| "learning_rate": 6.216006216006216e-06, | |
| "loss": 1.0976, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06294195353174295, | |
| "grad_norm": 3.1669273668699733, | |
| "learning_rate": 6.2937062937062944e-06, | |
| "loss": 1.1263, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.06371901468645583, | |
| "grad_norm": 1.9847375481750156, | |
| "learning_rate": 6.3714063714063726e-06, | |
| "loss": 1.0923, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.0644960758411687, | |
| "grad_norm": 2.09531371322368, | |
| "learning_rate": 6.449106449106449e-06, | |
| "loss": 1.0821, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.06527313699588158, | |
| "grad_norm": 3.287845612968483, | |
| "learning_rate": 6.526806526806527e-06, | |
| "loss": 1.0614, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.06605019815059446, | |
| "grad_norm": 2.2662925493592083, | |
| "learning_rate": 6.604506604506605e-06, | |
| "loss": 1.1021, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.06682725930530732, | |
| "grad_norm": 2.4839925554425717, | |
| "learning_rate": 6.6822066822066824e-06, | |
| "loss": 1.1501, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.0676043204600202, | |
| "grad_norm": 2.331604369524609, | |
| "learning_rate": 6.7599067599067605e-06, | |
| "loss": 1.1742, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.06838138161473307, | |
| "grad_norm": 2.3590829029315583, | |
| "learning_rate": 6.837606837606839e-06, | |
| "loss": 1.1163, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.06915844276944595, | |
| "grad_norm": 2.892618110874262, | |
| "learning_rate": 6.915306915306917e-06, | |
| "loss": 1.1725, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.06993550392415883, | |
| "grad_norm": 2.384306036165181, | |
| "learning_rate": 6.993006993006993e-06, | |
| "loss": 1.1418, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0707125650788717, | |
| "grad_norm": 2.614333186214158, | |
| "learning_rate": 7.070707070707071e-06, | |
| "loss": 1.1369, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.07148962623358458, | |
| "grad_norm": 2.7443403027281428, | |
| "learning_rate": 7.148407148407149e-06, | |
| "loss": 1.1719, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.07226668738829746, | |
| "grad_norm": 2.115919721191313, | |
| "learning_rate": 7.226107226107227e-06, | |
| "loss": 1.1642, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.07304374854301034, | |
| "grad_norm": 2.4226282288227052, | |
| "learning_rate": 7.303807303807304e-06, | |
| "loss": 1.1609, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.07382080969772321, | |
| "grad_norm": 2.3322158120159657, | |
| "learning_rate": 7.381507381507382e-06, | |
| "loss": 1.0715, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.07459787085243609, | |
| "grad_norm": 2.216105414113714, | |
| "learning_rate": 7.45920745920746e-06, | |
| "loss": 1.0836, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.07537493200714897, | |
| "grad_norm": 2.080845937826623, | |
| "learning_rate": 7.536907536907537e-06, | |
| "loss": 1.1298, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.07615199316186184, | |
| "grad_norm": 1.9845987743197342, | |
| "learning_rate": 7.6146076146076155e-06, | |
| "loss": 1.1375, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.07692905431657471, | |
| "grad_norm": 2.052691449449282, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 1.1501, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.07770611547128758, | |
| "grad_norm": 1.9263855972921253, | |
| "learning_rate": 7.77000777000777e-06, | |
| "loss": 1.1237, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07848317662600046, | |
| "grad_norm": 2.9671775698526934, | |
| "learning_rate": 7.847707847707848e-06, | |
| "loss": 1.0918, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.07926023778071334, | |
| "grad_norm": 2.2929478440651394, | |
| "learning_rate": 7.925407925407926e-06, | |
| "loss": 1.0848, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.08003729893542622, | |
| "grad_norm": 1.6757069320789237, | |
| "learning_rate": 8.003108003108003e-06, | |
| "loss": 1.1209, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.08081436009013909, | |
| "grad_norm": 2.093200645109728, | |
| "learning_rate": 8.08080808080808e-06, | |
| "loss": 1.0866, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.08159142124485197, | |
| "grad_norm": 2.408927649486391, | |
| "learning_rate": 8.158508158508159e-06, | |
| "loss": 1.0934, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.08236848239956485, | |
| "grad_norm": 2.2763929710773643, | |
| "learning_rate": 8.236208236208237e-06, | |
| "loss": 1.1081, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.08314554355427772, | |
| "grad_norm": 2.329064562776198, | |
| "learning_rate": 8.313908313908315e-06, | |
| "loss": 1.1366, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.0839226047089906, | |
| "grad_norm": 1.9093884379628574, | |
| "learning_rate": 8.391608391608393e-06, | |
| "loss": 1.0907, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.08469966586370348, | |
| "grad_norm": 2.0666971265552694, | |
| "learning_rate": 8.46930846930847e-06, | |
| "loss": 1.1396, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.08547672701841635, | |
| "grad_norm": 2.6618881870416833, | |
| "learning_rate": 8.547008547008548e-06, | |
| "loss": 1.1204, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.08625378817312923, | |
| "grad_norm": 2.5811056119151115, | |
| "learning_rate": 8.624708624708626e-06, | |
| "loss": 1.1067, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.08703084932784211, | |
| "grad_norm": 2.4891841510360697, | |
| "learning_rate": 8.702408702408704e-06, | |
| "loss": 1.0186, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.08780791048255497, | |
| "grad_norm": 1.9964291348885184, | |
| "learning_rate": 8.78010878010878e-06, | |
| "loss": 1.0534, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.08858497163726785, | |
| "grad_norm": 1.8380639056753707, | |
| "learning_rate": 8.857808857808858e-06, | |
| "loss": 1.1259, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.08936203279198073, | |
| "grad_norm": 2.026492546755725, | |
| "learning_rate": 8.935508935508937e-06, | |
| "loss": 1.1357, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.0901390939466936, | |
| "grad_norm": 2.3881102752793546, | |
| "learning_rate": 9.013209013209015e-06, | |
| "loss": 1.1451, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.09091615510140648, | |
| "grad_norm": 2.3516814013111578, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 1.1304, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.09169321625611936, | |
| "grad_norm": 2.2625675458737255, | |
| "learning_rate": 9.168609168609169e-06, | |
| "loss": 1.1289, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.09247027741083223, | |
| "grad_norm": 1.8601808712202859, | |
| "learning_rate": 9.246309246309247e-06, | |
| "loss": 1.0705, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.09324733856554511, | |
| "grad_norm": 2.45830074558663, | |
| "learning_rate": 9.324009324009324e-06, | |
| "loss": 1.0006, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.09402439972025799, | |
| "grad_norm": 2.2208723337033747, | |
| "learning_rate": 9.401709401709402e-06, | |
| "loss": 1.0642, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.09480146087497086, | |
| "grad_norm": 2.992927987309589, | |
| "learning_rate": 9.47940947940948e-06, | |
| "loss": 1.1099, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.09557852202968374, | |
| "grad_norm": 2.2404447843072526, | |
| "learning_rate": 9.557109557109558e-06, | |
| "loss": 1.1046, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.09635558318439662, | |
| "grad_norm": 2.027188334095754, | |
| "learning_rate": 9.634809634809636e-06, | |
| "loss": 1.1388, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.0971326443391095, | |
| "grad_norm": 1.9884931664591046, | |
| "learning_rate": 9.712509712509714e-06, | |
| "loss": 1.1093, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.09790970549382236, | |
| "grad_norm": 2.2780803616241, | |
| "learning_rate": 9.79020979020979e-06, | |
| "loss": 1.0973, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.09868676664853523, | |
| "grad_norm": 2.482851610024637, | |
| "learning_rate": 9.867909867909869e-06, | |
| "loss": 1.1074, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.09946382780324811, | |
| "grad_norm": 2.1809979058393547, | |
| "learning_rate": 9.945609945609947e-06, | |
| "loss": 1.1111, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.10024088895796099, | |
| "grad_norm": 2.653036244084716, | |
| "learning_rate": 9.999998344553621e-06, | |
| "loss": 1.0539, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.10101795011267387, | |
| "grad_norm": 2.1782834112618144, | |
| "learning_rate": 9.99996891442626e-06, | |
| "loss": 1.1277, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.10179501126738674, | |
| "grad_norm": 2.0794830642914532, | |
| "learning_rate": 9.999902696850819e-06, | |
| "loss": 1.1028, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.10257207242209962, | |
| "grad_norm": 1.9588777456228414, | |
| "learning_rate": 9.999799692314491e-06, | |
| "loss": 1.0799, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.1033491335768125, | |
| "grad_norm": 1.8109731584724105, | |
| "learning_rate": 9.999659901575142e-06, | |
| "loss": 1.0387, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.10412619473152537, | |
| "grad_norm": 1.496513992331799, | |
| "learning_rate": 9.999483325661283e-06, | |
| "loss": 1.0982, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.10490325588623825, | |
| "grad_norm": 1.9418465016002184, | |
| "learning_rate": 9.999269965872081e-06, | |
| "loss": 1.1873, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.10568031704095113, | |
| "grad_norm": 1.8814020449439044, | |
| "learning_rate": 9.999019823777335e-06, | |
| "loss": 1.1121, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.106457378195664, | |
| "grad_norm": 2.5624116813963083, | |
| "learning_rate": 9.998732901217474e-06, | |
| "loss": 1.1057, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.10723443935037688, | |
| "grad_norm": 2.8084481900607767, | |
| "learning_rate": 9.998409200303543e-06, | |
| "loss": 1.0796, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.10801150050508974, | |
| "grad_norm": 2.5585637275706827, | |
| "learning_rate": 9.998048723417184e-06, | |
| "loss": 1.0911, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.10878856165980262, | |
| "grad_norm": 1.8486528676878824, | |
| "learning_rate": 9.997651473210614e-06, | |
| "loss": 1.1027, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.1095656228145155, | |
| "grad_norm": 1.6756625698252106, | |
| "learning_rate": 9.99721745260662e-06, | |
| "loss": 0.9892, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.11034268396922838, | |
| "grad_norm": 1.7980527241240165, | |
| "learning_rate": 9.996746664798523e-06, | |
| "loss": 1.0714, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.11111974512394125, | |
| "grad_norm": 2.965648407184345, | |
| "learning_rate": 9.996239113250158e-06, | |
| "loss": 1.1627, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.11189680627865413, | |
| "grad_norm": 2.58378967500062, | |
| "learning_rate": 9.995694801695856e-06, | |
| "loss": 1.1338, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.112673867433367, | |
| "grad_norm": 2.3312493063488104, | |
| "learning_rate": 9.995113734140409e-06, | |
| "loss": 1.0527, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.11345092858807988, | |
| "grad_norm": 1.7987672632076395, | |
| "learning_rate": 9.99449591485904e-06, | |
| "loss": 1.1463, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.11422798974279276, | |
| "grad_norm": 2.2447963047423674, | |
| "learning_rate": 9.993841348397377e-06, | |
| "loss": 1.0993, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.11500505089750564, | |
| "grad_norm": 2.3307589401248983, | |
| "learning_rate": 9.993150039571417e-06, | |
| "loss": 1.1, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.11578211205221851, | |
| "grad_norm": 2.4461716652591377, | |
| "learning_rate": 9.992421993467488e-06, | |
| "loss": 1.1223, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.11655917320693139, | |
| "grad_norm": 2.325560003259248, | |
| "learning_rate": 9.991657215442215e-06, | |
| "loss": 1.1016, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.11733623436164427, | |
| "grad_norm": 2.324019330722723, | |
| "learning_rate": 9.99085571112248e-06, | |
| "loss": 1.102, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.11811329551635713, | |
| "grad_norm": 2.184804872790777, | |
| "learning_rate": 9.990017486405379e-06, | |
| "loss": 1.0691, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.11889035667107001, | |
| "grad_norm": 2.3778750559007946, | |
| "learning_rate": 9.989142547458182e-06, | |
| "loss": 1.0902, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.11966741782578288, | |
| "grad_norm": 1.9170168154911298, | |
| "learning_rate": 9.988230900718279e-06, | |
| "loss": 1.0755, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.12044447898049576, | |
| "grad_norm": 2.242423744369333, | |
| "learning_rate": 9.987282552893146e-06, | |
| "loss": 1.0557, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.12122154013520864, | |
| "grad_norm": 2.4290588197619574, | |
| "learning_rate": 9.986297510960284e-06, | |
| "loss": 1.0472, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.12199860128992152, | |
| "grad_norm": 2.4366241079551596, | |
| "learning_rate": 9.985275782167175e-06, | |
| "loss": 1.0249, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.12277566244463439, | |
| "grad_norm": 2.6491566316518673, | |
| "learning_rate": 9.984217374031225e-06, | |
| "loss": 1.0816, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.12355272359934727, | |
| "grad_norm": 2.159316756547971, | |
| "learning_rate": 9.983122294339708e-06, | |
| "loss": 1.078, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.12432978475406015, | |
| "grad_norm": 2.0761579284967944, | |
| "learning_rate": 9.981990551149714e-06, | |
| "loss": 1.0913, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.12510684590877302, | |
| "grad_norm": 2.528857689821478, | |
| "learning_rate": 9.980822152788082e-06, | |
| "loss": 1.1034, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.1258839070634859, | |
| "grad_norm": 1.5046304989897192, | |
| "learning_rate": 9.979617107851343e-06, | |
| "loss": 1.114, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.12666096821819878, | |
| "grad_norm": 2.2475747257064707, | |
| "learning_rate": 9.97837542520566e-06, | |
| "loss": 1.0558, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.12743802937291165, | |
| "grad_norm": 2.016387639571554, | |
| "learning_rate": 9.977097113986755e-06, | |
| "loss": 1.1429, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.12821509052762453, | |
| "grad_norm": 2.246062301174424, | |
| "learning_rate": 9.97578218359985e-06, | |
| "loss": 1.0643, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.1289921516823374, | |
| "grad_norm": 2.7312095064634323, | |
| "learning_rate": 9.974430643719591e-06, | |
| "loss": 1.0671, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.12976921283705029, | |
| "grad_norm": 1.813294617554991, | |
| "learning_rate": 9.973042504289978e-06, | |
| "loss": 0.9926, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.13054627399176316, | |
| "grad_norm": 2.2812471968380095, | |
| "learning_rate": 9.971617775524301e-06, | |
| "loss": 1.0825, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.13132333514647604, | |
| "grad_norm": 1.756937891360179, | |
| "learning_rate": 9.970156467905048e-06, | |
| "loss": 1.0673, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.13210039630118892, | |
| "grad_norm": 2.082158585539177, | |
| "learning_rate": 9.968658592183842e-06, | |
| "loss": 1.1994, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1328774574559018, | |
| "grad_norm": 1.9267534200786023, | |
| "learning_rate": 9.967124159381359e-06, | |
| "loss": 1.1162, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.13365451861061464, | |
| "grad_norm": 3.0547406918856748, | |
| "learning_rate": 9.965553180787239e-06, | |
| "loss": 1.0263, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.13443157976532752, | |
| "grad_norm": 1.7665942406417015, | |
| "learning_rate": 9.963945667960017e-06, | |
| "loss": 0.9662, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.1352086409200404, | |
| "grad_norm": 1.8418454319389166, | |
| "learning_rate": 9.962301632727022e-06, | |
| "loss": 1.0806, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.13598570207475327, | |
| "grad_norm": 1.7673330680317212, | |
| "learning_rate": 9.960621087184303e-06, | |
| "loss": 1.0801, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.13676276322946615, | |
| "grad_norm": 2.206590428660935, | |
| "learning_rate": 9.95890404369653e-06, | |
| "loss": 1.1432, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.13753982438417903, | |
| "grad_norm": 2.2302577958801195, | |
| "learning_rate": 9.957150514896919e-06, | |
| "loss": 1.152, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.1383168855388919, | |
| "grad_norm": 2.0260327381346794, | |
| "learning_rate": 9.95536051368711e-06, | |
| "loss": 1.0658, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.13909394669360478, | |
| "grad_norm": 1.5644692783168082, | |
| "learning_rate": 9.953534053237108e-06, | |
| "loss": 1.0604, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.13987100784831766, | |
| "grad_norm": 1.738578328297917, | |
| "learning_rate": 9.951671146985159e-06, | |
| "loss": 0.9911, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.14064806900303053, | |
| "grad_norm": 1.6603612609497798, | |
| "learning_rate": 9.949771808637657e-06, | |
| "loss": 1.0849, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.1414251301577434, | |
| "grad_norm": 2.031511681498179, | |
| "learning_rate": 9.947836052169056e-06, | |
| "loss": 0.9919, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.1422021913124563, | |
| "grad_norm": 1.5044981498939936, | |
| "learning_rate": 9.945863891821749e-06, | |
| "loss": 0.9996, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.14297925246716917, | |
| "grad_norm": 2.293059765739188, | |
| "learning_rate": 9.943855342105979e-06, | |
| "loss": 1.0394, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.14375631362188204, | |
| "grad_norm": 1.9478707992466775, | |
| "learning_rate": 9.941810417799719e-06, | |
| "loss": 0.9964, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.14453337477659492, | |
| "grad_norm": 1.5149400216960562, | |
| "learning_rate": 9.939729133948572e-06, | |
| "loss": 1.0521, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.1453104359313078, | |
| "grad_norm": 2.2351667693118524, | |
| "learning_rate": 9.93761150586566e-06, | |
| "loss": 1.1685, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.14608749708602067, | |
| "grad_norm": 3.4005405751624087, | |
| "learning_rate": 9.935457549131504e-06, | |
| "loss": 1.0859, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.14686455824073355, | |
| "grad_norm": 2.1781460644900257, | |
| "learning_rate": 9.933267279593919e-06, | |
| "loss": 1.037, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.14764161939544643, | |
| "grad_norm": 2.432585604447532, | |
| "learning_rate": 9.931040713367888e-06, | |
| "loss": 1.0816, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.1484186805501593, | |
| "grad_norm": 1.834847415817245, | |
| "learning_rate": 9.928777866835454e-06, | |
| "loss": 1.0843, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.14919574170487218, | |
| "grad_norm": 1.7231188780918039, | |
| "learning_rate": 9.926478756645586e-06, | |
| "loss": 1.0286, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.14997280285958506, | |
| "grad_norm": 2.113770754133767, | |
| "learning_rate": 9.924143399714072e-06, | |
| "loss": 1.0627, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.15074986401429794, | |
| "grad_norm": 2.3994884363588036, | |
| "learning_rate": 9.92177181322338e-06, | |
| "loss": 1.0116, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.1515269251690108, | |
| "grad_norm": 2.0230342364705454, | |
| "learning_rate": 9.919364014622545e-06, | |
| "loss": 1.0606, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.1523039863237237, | |
| "grad_norm": 2.1208192115487816, | |
| "learning_rate": 9.91692002162703e-06, | |
| "loss": 1.0623, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.15308104747843657, | |
| "grad_norm": 1.954692914861481, | |
| "learning_rate": 9.914439852218598e-06, | |
| "loss": 1.036, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.15385810863314942, | |
| "grad_norm": 2.4424599661840394, | |
| "learning_rate": 9.911923524645184e-06, | |
| "loss": 1.0592, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.1546351697878623, | |
| "grad_norm": 1.7002048061692303, | |
| "learning_rate": 9.909371057420756e-06, | |
| "loss": 1.1009, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.15541223094257517, | |
| "grad_norm": 1.6400522184059512, | |
| "learning_rate": 9.906782469325183e-06, | |
| "loss": 1.0584, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.15618929209728805, | |
| "grad_norm": 1.9086125071696802, | |
| "learning_rate": 9.904157779404095e-06, | |
| "loss": 1.027, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.15696635325200092, | |
| "grad_norm": 2.0429187558374284, | |
| "learning_rate": 9.901497006968737e-06, | |
| "loss": 1.0366, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.1577434144067138, | |
| "grad_norm": 1.9839452672457782, | |
| "learning_rate": 9.89880017159584e-06, | |
| "loss": 1.0253, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.15852047556142668, | |
| "grad_norm": 1.9239243059085187, | |
| "learning_rate": 9.896067293127462e-06, | |
| "loss": 1.0809, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.15929753671613955, | |
| "grad_norm": 2.116977455932609, | |
| "learning_rate": 9.893298391670857e-06, | |
| "loss": 1.0288, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.16007459787085243, | |
| "grad_norm": 1.9256786973087672, | |
| "learning_rate": 9.890493487598315e-06, | |
| "loss": 1.062, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.1608516590255653, | |
| "grad_norm": 1.770000631025023, | |
| "learning_rate": 9.887652601547011e-06, | |
| "loss": 1.029, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.16162872018027818, | |
| "grad_norm": 2.0460739758835715, | |
| "learning_rate": 9.884775754418872e-06, | |
| "loss": 1.0978, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.16240578133499106, | |
| "grad_norm": 1.8387960887988681, | |
| "learning_rate": 9.881862967380398e-06, | |
| "loss": 1.0499, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.16318284248970394, | |
| "grad_norm": 2.0055836577178145, | |
| "learning_rate": 9.878914261862524e-06, | |
| "loss": 1.0964, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.16395990364441682, | |
| "grad_norm": 1.7868218097590607, | |
| "learning_rate": 9.875929659560455e-06, | |
| "loss": 1.0277, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.1647369647991297, | |
| "grad_norm": 2.1063589192373424, | |
| "learning_rate": 9.872909182433509e-06, | |
| "loss": 1.1237, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.16551402595384257, | |
| "grad_norm": 2.2482455806975365, | |
| "learning_rate": 9.869852852704951e-06, | |
| "loss": 1.069, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.16629108710855545, | |
| "grad_norm": 1.7191931035624053, | |
| "learning_rate": 9.866760692861837e-06, | |
| "loss": 1.0432, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.16706814826326832, | |
| "grad_norm": 1.9822067032337325, | |
| "learning_rate": 9.863632725654841e-06, | |
| "loss": 1.0966, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.1678452094179812, | |
| "grad_norm": 1.5154087879613518, | |
| "learning_rate": 9.860468974098093e-06, | |
| "loss": 0.9731, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.16862227057269408, | |
| "grad_norm": 2.109259264636941, | |
| "learning_rate": 9.85726946146901e-06, | |
| "loss": 1.075, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.16939933172740695, | |
| "grad_norm": 2.264076822727728, | |
| "learning_rate": 9.854034211308114e-06, | |
| "loss": 1.0237, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.17017639288211983, | |
| "grad_norm": 1.892118264625731, | |
| "learning_rate": 9.850763247418876e-06, | |
| "loss": 1.0245, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.1709534540368327, | |
| "grad_norm": 2.0853632303159535, | |
| "learning_rate": 9.847456593867525e-06, | |
| "loss": 1.0026, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.17173051519154559, | |
| "grad_norm": 1.9677334934726516, | |
| "learning_rate": 9.844114274982885e-06, | |
| "loss": 1.0431, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.17250757634625846, | |
| "grad_norm": 2.2830817893790103, | |
| "learning_rate": 9.840736315356183e-06, | |
| "loss": 1.0943, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.17328463750097134, | |
| "grad_norm": 1.575442825346659, | |
| "learning_rate": 9.837322739840877e-06, | |
| "loss": 1.0007, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.17406169865568422, | |
| "grad_norm": 2.2367315093018134, | |
| "learning_rate": 9.833873573552472e-06, | |
| "loss": 1.0301, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.17483875981039707, | |
| "grad_norm": 2.24222375291448, | |
| "learning_rate": 9.830388841868329e-06, | |
| "loss": 1.0919, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.17561582096510994, | |
| "grad_norm": 1.872156214913949, | |
| "learning_rate": 9.826868570427484e-06, | |
| "loss": 1.0933, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.17639288211982282, | |
| "grad_norm": 2.200623982755955, | |
| "learning_rate": 9.823312785130457e-06, | |
| "loss": 1.0556, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.1771699432745357, | |
| "grad_norm": 2.0166726180309547, | |
| "learning_rate": 9.819721512139069e-06, | |
| "loss": 1.0136, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.17794700442924857, | |
| "grad_norm": 2.3268106459403155, | |
| "learning_rate": 9.816094777876233e-06, | |
| "loss": 1.0609, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.17872406558396145, | |
| "grad_norm": 2.5483756559425097, | |
| "learning_rate": 9.812432609025778e-06, | |
| "loss": 1.1066, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.17950112673867433, | |
| "grad_norm": 1.5050242159549674, | |
| "learning_rate": 9.808735032532239e-06, | |
| "loss": 1.0461, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.1802781878933872, | |
| "grad_norm": 1.7444888511627248, | |
| "learning_rate": 9.805002075600668e-06, | |
| "loss": 0.9875, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.18105524904810008, | |
| "grad_norm": 2.1359724957586295, | |
| "learning_rate": 9.801233765696423e-06, | |
| "loss": 1.0032, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.18183231020281296, | |
| "grad_norm": 2.0933731292318214, | |
| "learning_rate": 9.797430130544983e-06, | |
| "loss": 1.0092, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.18260937135752583, | |
| "grad_norm": 1.7774756159015281, | |
| "learning_rate": 9.793591198131724e-06, | |
| "loss": 0.9708, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.1833864325122387, | |
| "grad_norm": 1.9057742144891412, | |
| "learning_rate": 9.789716996701729e-06, | |
| "loss": 1.0716, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.1841634936669516, | |
| "grad_norm": 1.6679562880223004, | |
| "learning_rate": 9.78580755475957e-06, | |
| "loss": 1.0184, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.18494055482166447, | |
| "grad_norm": 2.036953279006188, | |
| "learning_rate": 9.781862901069105e-06, | |
| "loss": 0.988, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.18571761597637734, | |
| "grad_norm": 2.0964552627447777, | |
| "learning_rate": 9.777883064653266e-06, | |
| "loss": 1.0113, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.18649467713109022, | |
| "grad_norm": 1.6106495155390417, | |
| "learning_rate": 9.773868074793838e-06, | |
| "loss": 1.0423, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.1872717382858031, | |
| "grad_norm": 2.8770640128408456, | |
| "learning_rate": 9.76981796103125e-06, | |
| "loss": 1.0398, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.18804879944051597, | |
| "grad_norm": 2.0693212678122843, | |
| "learning_rate": 9.76573275316436e-06, | |
| "loss": 1.0045, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.18882586059522885, | |
| "grad_norm": 2.0119207294765213, | |
| "learning_rate": 9.761612481250225e-06, | |
| "loss": 1.0224, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.18960292174994173, | |
| "grad_norm": 2.223557066379335, | |
| "learning_rate": 9.757457175603893e-06, | |
| "loss": 1.0773, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.1903799829046546, | |
| "grad_norm": 1.9108372181328375, | |
| "learning_rate": 9.753266866798174e-06, | |
| "loss": 1.0526, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.19115704405936748, | |
| "grad_norm": 2.1959378359625177, | |
| "learning_rate": 9.749041585663411e-06, | |
| "loss": 1.1138, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.19193410521408036, | |
| "grad_norm": 2.0485148481555218, | |
| "learning_rate": 9.74478136328726e-06, | |
| "loss": 1.0884, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.19271116636879324, | |
| "grad_norm": 1.8565352764102319, | |
| "learning_rate": 9.740486231014461e-06, | |
| "loss": 1.0099, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.1934882275235061, | |
| "grad_norm": 1.5302635825343132, | |
| "learning_rate": 9.736156220446597e-06, | |
| "loss": 1.0461, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.194265288678219, | |
| "grad_norm": 1.5954264600641812, | |
| "learning_rate": 9.731791363441876e-06, | |
| "loss": 0.9655, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.19504234983293184, | |
| "grad_norm": 1.7822694013944302, | |
| "learning_rate": 9.727391692114887e-06, | |
| "loss": 1.0542, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.19581941098764472, | |
| "grad_norm": 2.424667963877112, | |
| "learning_rate": 9.722957238836366e-06, | |
| "loss": 1.0331, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.1965964721423576, | |
| "grad_norm": 2.3703044008316487, | |
| "learning_rate": 9.718488036232963e-06, | |
| "loss": 1.0926, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.19737353329707047, | |
| "grad_norm": 2.1530662223107955, | |
| "learning_rate": 9.713984117186993e-06, | |
| "loss": 1.0121, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.19815059445178335, | |
| "grad_norm": 1.9314173573162179, | |
| "learning_rate": 9.7094455148362e-06, | |
| "loss": 1.0475, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.19892765560649622, | |
| "grad_norm": 1.9777777372602399, | |
| "learning_rate": 9.704872262573508e-06, | |
| "loss": 1.0105, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.1997047167612091, | |
| "grad_norm": 2.2819001107312546, | |
| "learning_rate": 9.700264394046787e-06, | |
| "loss": 0.948, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.20048177791592198, | |
| "grad_norm": 1.7288416994808482, | |
| "learning_rate": 9.69562194315859e-06, | |
| "loss": 1.0458, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.20125883907063485, | |
| "grad_norm": 1.7457323208199687, | |
| "learning_rate": 9.690944944065914e-06, | |
| "loss": 1.0476, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.20203590022534773, | |
| "grad_norm": 2.47172385268511, | |
| "learning_rate": 9.686233431179944e-06, | |
| "loss": 1.0115, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.2028129613800606, | |
| "grad_norm": 2.38182568324136, | |
| "learning_rate": 9.681487439165804e-06, | |
| "loss": 1.0733, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.20359002253477348, | |
| "grad_norm": 2.1251613678643153, | |
| "learning_rate": 9.676707002942299e-06, | |
| "loss": 1.1202, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.20436708368948636, | |
| "grad_norm": 2.3331174035594158, | |
| "learning_rate": 9.671892157681656e-06, | |
| "loss": 0.9892, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.20514414484419924, | |
| "grad_norm": 2.5297296744464597, | |
| "learning_rate": 9.66704293880927e-06, | |
| "loss": 1.0913, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.20592120599891212, | |
| "grad_norm": 1.9953398885425944, | |
| "learning_rate": 9.662159382003438e-06, | |
| "loss": 0.9739, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.206698267153625, | |
| "grad_norm": 1.9554157695142245, | |
| "learning_rate": 9.657241523195106e-06, | |
| "loss": 1.0062, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.20747532830833787, | |
| "grad_norm": 1.9681771655746416, | |
| "learning_rate": 9.652289398567591e-06, | |
| "loss": 0.9645, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.20825238946305075, | |
| "grad_norm": 1.6398427617567763, | |
| "learning_rate": 9.647303044556327e-06, | |
| "loss": 1.0691, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.20902945061776362, | |
| "grad_norm": 2.191033664996454, | |
| "learning_rate": 9.642282497848587e-06, | |
| "loss": 1.0046, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.2098065117724765, | |
| "grad_norm": 2.422573387512772, | |
| "learning_rate": 9.637227795383223e-06, | |
| "loss": 1.0334, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.21058357292718938, | |
| "grad_norm": 2.2231115952498817, | |
| "learning_rate": 9.63213897435039e-06, | |
| "loss": 1.0092, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.21136063408190225, | |
| "grad_norm": 1.5887335858791765, | |
| "learning_rate": 9.627016072191263e-06, | |
| "loss": 1.0601, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.21213769523661513, | |
| "grad_norm": 2.079071610960163, | |
| "learning_rate": 9.62185912659778e-06, | |
| "loss": 1.0089, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.212914756391328, | |
| "grad_norm": 2.6538511902261672, | |
| "learning_rate": 9.616668175512347e-06, | |
| "loss": 1.0996, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.21369181754604089, | |
| "grad_norm": 2.2366602617889675, | |
| "learning_rate": 9.611443257127573e-06, | |
| "loss": 0.995, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.21446887870075376, | |
| "grad_norm": 1.9923272374726597, | |
| "learning_rate": 9.60618440988598e-06, | |
| "loss": 1.0588, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.2152459398554666, | |
| "grad_norm": 1.933851579802707, | |
| "learning_rate": 9.60089167247972e-06, | |
| "loss": 1.0677, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.2160230010101795, | |
| "grad_norm": 1.7051761690927782, | |
| "learning_rate": 9.595565083850298e-06, | |
| "loss": 0.9761, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.21680006216489237, | |
| "grad_norm": 2.760621047319595, | |
| "learning_rate": 9.590204683188275e-06, | |
| "loss": 1.0485, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.21757712331960524, | |
| "grad_norm": 2.164361791637637, | |
| "learning_rate": 9.584810509932993e-06, | |
| "loss": 1.0935, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.21835418447431812, | |
| "grad_norm": 2.1290187047633387, | |
| "learning_rate": 9.579382603772269e-06, | |
| "loss": 1.0242, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.219131245629031, | |
| "grad_norm": 2.4594545836748796, | |
| "learning_rate": 9.573921004642117e-06, | |
| "loss": 1.0066, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.21990830678374387, | |
| "grad_norm": 2.211316974662037, | |
| "learning_rate": 9.568425752726442e-06, | |
| "loss": 0.9617, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.22068536793845675, | |
| "grad_norm": 2.914326191682928, | |
| "learning_rate": 9.562896888456758e-06, | |
| "loss": 1.0298, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.22146242909316963, | |
| "grad_norm": 1.8033463375470347, | |
| "learning_rate": 9.557334452511879e-06, | |
| "loss": 0.9536, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.2222394902478825, | |
| "grad_norm": 2.1801243317191856, | |
| "learning_rate": 9.551738485817622e-06, | |
| "loss": 0.951, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.22301655140259538, | |
| "grad_norm": 2.1629577942104183, | |
| "learning_rate": 9.546109029546511e-06, | |
| "loss": 0.9987, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.22379361255730826, | |
| "grad_norm": 1.3716114805711197, | |
| "learning_rate": 9.540446125117468e-06, | |
| "loss": 0.969, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.22457067371202113, | |
| "grad_norm": 1.9483284357069952, | |
| "learning_rate": 9.534749814195516e-06, | |
| "loss": 1.0039, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.225347734866734, | |
| "grad_norm": 2.0793028495715697, | |
| "learning_rate": 9.529020138691463e-06, | |
| "loss": 0.9743, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.2261247960214469, | |
| "grad_norm": 2.3579800092596646, | |
| "learning_rate": 9.523257140761595e-06, | |
| "loss": 0.9396, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.22690185717615977, | |
| "grad_norm": 1.9666592282727686, | |
| "learning_rate": 9.517460862807378e-06, | |
| "loss": 1.0413, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.22767891833087264, | |
| "grad_norm": 2.332398520531907, | |
| "learning_rate": 9.51163134747513e-06, | |
| "loss": 0.9895, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.22845597948558552, | |
| "grad_norm": 2.0112812087397853, | |
| "learning_rate": 9.505768637655717e-06, | |
| "loss": 1.026, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.2292330406402984, | |
| "grad_norm": 1.30588230567386, | |
| "learning_rate": 9.499872776484234e-06, | |
| "loss": 0.9389, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.23001010179501127, | |
| "grad_norm": 2.4882043492951107, | |
| "learning_rate": 9.493943807339686e-06, | |
| "loss": 1.0177, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.23078716294972415, | |
| "grad_norm": 2.472037249258304, | |
| "learning_rate": 9.487981773844673e-06, | |
| "loss": 1.0865, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.23156422410443703, | |
| "grad_norm": 2.3974288694298864, | |
| "learning_rate": 9.48198671986507e-06, | |
| "loss": 1.1025, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.2323412852591499, | |
| "grad_norm": 1.8931995855209747, | |
| "learning_rate": 9.475958689509697e-06, | |
| "loss": 1.0401, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.23311834641386278, | |
| "grad_norm": 1.7588453721284736, | |
| "learning_rate": 9.469897727130001e-06, | |
| "loss": 1.026, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.23389540756857566, | |
| "grad_norm": 2.25782280175551, | |
| "learning_rate": 9.463803877319727e-06, | |
| "loss": 1.045, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.23467246872328854, | |
| "grad_norm": 2.062470298217632, | |
| "learning_rate": 9.45767718491459e-06, | |
| "loss": 0.9873, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.2354495298780014, | |
| "grad_norm": 2.235317636179408, | |
| "learning_rate": 9.451517694991947e-06, | |
| "loss": 0.9935, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.23622659103271426, | |
| "grad_norm": 1.8159214167836841, | |
| "learning_rate": 9.445325452870459e-06, | |
| "loss": 0.9837, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.23700365218742714, | |
| "grad_norm": 2.530492729153044, | |
| "learning_rate": 9.439100504109772e-06, | |
| "loss": 1.0975, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.23778071334214002, | |
| "grad_norm": 1.9008032910522048, | |
| "learning_rate": 9.432842894510164e-06, | |
| "loss": 0.975, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.2385577744968529, | |
| "grad_norm": 1.340909447158594, | |
| "learning_rate": 9.42655267011222e-06, | |
| "loss": 0.8966, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.23933483565156577, | |
| "grad_norm": 2.3032534649906053, | |
| "learning_rate": 9.420229877196484e-06, | |
| "loss": 0.899, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.24011189680627865, | |
| "grad_norm": 3.3083719276637815, | |
| "learning_rate": 9.413874562283136e-06, | |
| "loss": 1.0154, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.24088895796099152, | |
| "grad_norm": 1.7584921998647791, | |
| "learning_rate": 9.407486772131624e-06, | |
| "loss": 0.9767, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.2416660191157044, | |
| "grad_norm": 2.9427356878313686, | |
| "learning_rate": 9.401066553740343e-06, | |
| "loss": 0.9662, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.24244308027041728, | |
| "grad_norm": 2.1699016387323233, | |
| "learning_rate": 9.394613954346274e-06, | |
| "loss": 0.9713, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.24322014142513015, | |
| "grad_norm": 2.1813371019451653, | |
| "learning_rate": 9.388129021424648e-06, | |
| "loss": 0.9555, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.24399720257984303, | |
| "grad_norm": 1.9891788565996813, | |
| "learning_rate": 9.381611802688586e-06, | |
| "loss": 1.0036, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.2447742637345559, | |
| "grad_norm": 2.3143675049942014, | |
| "learning_rate": 9.375062346088759e-06, | |
| "loss": 0.971, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.24555132488926878, | |
| "grad_norm": 2.6629770871009155, | |
| "learning_rate": 9.368480699813021e-06, | |
| "loss": 0.9176, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.24632838604398166, | |
| "grad_norm": 2.1132958055867808, | |
| "learning_rate": 9.36186691228607e-06, | |
| "loss": 0.8972, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.24710544719869454, | |
| "grad_norm": 2.029313412599108, | |
| "learning_rate": 9.35522103216908e-06, | |
| "loss": 0.9154, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.24788250835340742, | |
| "grad_norm": 1.4578868082629726, | |
| "learning_rate": 9.34854310835935e-06, | |
| "loss": 1.036, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.2486595695081203, | |
| "grad_norm": 2.1062999698802503, | |
| "learning_rate": 9.341833189989942e-06, | |
| "loss": 0.8603, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.24943663066283317, | |
| "grad_norm": 2.6614219310606892, | |
| "learning_rate": 9.335091326429313e-06, | |
| "loss": 0.9924, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.25021369181754605, | |
| "grad_norm": 2.0301151705921665, | |
| "learning_rate": 9.328317567280968e-06, | |
| "loss": 0.953, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.2509907529722589, | |
| "grad_norm": 1.9699445720729638, | |
| "learning_rate": 9.321511962383077e-06, | |
| "loss": 0.9379, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.2517678141269718, | |
| "grad_norm": 2.2607361825721854, | |
| "learning_rate": 9.314674561808117e-06, | |
| "loss": 0.986, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.2525448752816847, | |
| "grad_norm": 1.839113470172114, | |
| "learning_rate": 9.307805415862507e-06, | |
| "loss": 0.9541, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.25332193643639755, | |
| "grad_norm": 1.8823362594556383, | |
| "learning_rate": 9.300904575086232e-06, | |
| "loss": 0.9203, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.25409899759111043, | |
| "grad_norm": 2.259964303887286, | |
| "learning_rate": 9.293972090252468e-06, | |
| "loss": 0.9679, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.2548760587458233, | |
| "grad_norm": 2.058151781656702, | |
| "learning_rate": 9.287008012367221e-06, | |
| "loss": 1.0023, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.2556531199005362, | |
| "grad_norm": 2.306218040399529, | |
| "learning_rate": 9.280012392668938e-06, | |
| "loss": 1.0326, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.25643018105524906, | |
| "grad_norm": 2.218261287466935, | |
| "learning_rate": 9.272985282628138e-06, | |
| "loss": 0.988, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.25720724220996194, | |
| "grad_norm": 2.8185774692963146, | |
| "learning_rate": 9.265926733947035e-06, | |
| "loss": 0.9237, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.2579843033646748, | |
| "grad_norm": 1.966754798605311, | |
| "learning_rate": 9.258836798559148e-06, | |
| "loss": 0.8764, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.2587613645193877, | |
| "grad_norm": 2.907713378609492, | |
| "learning_rate": 9.251715528628926e-06, | |
| "loss": 0.9781, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.25953842567410057, | |
| "grad_norm": 2.5867766624212107, | |
| "learning_rate": 9.244562976551368e-06, | |
| "loss": 0.9835, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.26031548682881345, | |
| "grad_norm": 2.659891863331392, | |
| "learning_rate": 9.237379194951626e-06, | |
| "loss": 0.9438, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.2610925479835263, | |
| "grad_norm": 1.8970250029232214, | |
| "learning_rate": 9.230164236684628e-06, | |
| "loss": 0.9617, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.2618696091382392, | |
| "grad_norm": 1.4823476343052233, | |
| "learning_rate": 9.222918154834684e-06, | |
| "loss": 1.0756, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.2626466702929521, | |
| "grad_norm": 2.1930418016202577, | |
| "learning_rate": 9.215641002715097e-06, | |
| "loss": 1.0523, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.26342373144766495, | |
| "grad_norm": 1.8533472991342042, | |
| "learning_rate": 9.208332833867772e-06, | |
| "loss": 0.8869, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.26420079260237783, | |
| "grad_norm": 2.184383922916281, | |
| "learning_rate": 9.200993702062821e-06, | |
| "loss": 0.9808, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.2649778537570907, | |
| "grad_norm": 2.510050570387309, | |
| "learning_rate": 9.193623661298164e-06, | |
| "loss": 0.9156, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.2657549149118036, | |
| "grad_norm": 2.546343372247806, | |
| "learning_rate": 9.186222765799137e-06, | |
| "loss": 0.9764, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.2665319760665164, | |
| "grad_norm": 1.5693684379771662, | |
| "learning_rate": 9.17879107001809e-06, | |
| "loss": 0.9491, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.2673090372212293, | |
| "grad_norm": 2.2264963076350544, | |
| "learning_rate": 9.171328628633987e-06, | |
| "loss": 0.9796, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.26808609837594216, | |
| "grad_norm": 1.8513099710874061, | |
| "learning_rate": 9.163835496552006e-06, | |
| "loss": 0.9294, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.26886315953065504, | |
| "grad_norm": 2.1369479039679913, | |
| "learning_rate": 9.15631172890313e-06, | |
| "loss": 0.9428, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.2696402206853679, | |
| "grad_norm": 2.1701410069417806, | |
| "learning_rate": 9.148757381043745e-06, | |
| "loss": 0.9497, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.2704172818400808, | |
| "grad_norm": 2.4018785001267102, | |
| "learning_rate": 9.141172508555234e-06, | |
| "loss": 0.9611, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.27119434299479367, | |
| "grad_norm": 2.5173991790204346, | |
| "learning_rate": 9.133557167243565e-06, | |
| "loss": 0.9233, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.27197140414950655, | |
| "grad_norm": 2.5138075382856497, | |
| "learning_rate": 9.125911413138877e-06, | |
| "loss": 0.9203, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2727484653042194, | |
| "grad_norm": 2.898893363605526, | |
| "learning_rate": 9.11823530249508e-06, | |
| "loss": 0.8849, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.2735255264589323, | |
| "grad_norm": 1.6850916480287021, | |
| "learning_rate": 9.11052889178943e-06, | |
| "loss": 0.875, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.2743025876136452, | |
| "grad_norm": 2.3316883827873447, | |
| "learning_rate": 9.102792237722114e-06, | |
| "loss": 1.0095, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.27507964876835805, | |
| "grad_norm": 2.1632200172689298, | |
| "learning_rate": 9.095025397215838e-06, | |
| "loss": 0.9276, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.27585670992307093, | |
| "grad_norm": 2.8796310855009795, | |
| "learning_rate": 9.087228427415405e-06, | |
| "loss": 0.9235, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.2766337710777838, | |
| "grad_norm": 2.0564562085035023, | |
| "learning_rate": 9.079401385687299e-06, | |
| "loss": 0.9491, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.2774108322324967, | |
| "grad_norm": 2.608162831191934, | |
| "learning_rate": 9.071544329619253e-06, | |
| "loss": 0.9458, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.27818789338720956, | |
| "grad_norm": 2.521963823842101, | |
| "learning_rate": 9.063657317019838e-06, | |
| "loss": 0.9137, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.27896495454192244, | |
| "grad_norm": 2.535651222771701, | |
| "learning_rate": 9.055740405918026e-06, | |
| "loss": 0.9567, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.2797420156966353, | |
| "grad_norm": 2.2389260303888476, | |
| "learning_rate": 9.04779365456277e-06, | |
| "loss": 0.9689, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.2805190768513482, | |
| "grad_norm": 1.7592398575015094, | |
| "learning_rate": 9.039817121422575e-06, | |
| "loss": 0.9177, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.28129613800606107, | |
| "grad_norm": 1.8002755024191208, | |
| "learning_rate": 9.031810865185066e-06, | |
| "loss": 0.9407, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.28207319916077395, | |
| "grad_norm": 2.3928408034774082, | |
| "learning_rate": 9.023774944756555e-06, | |
| "loss": 0.9863, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.2828502603154868, | |
| "grad_norm": 2.395034750902151, | |
| "learning_rate": 9.015709419261612e-06, | |
| "loss": 0.9869, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.2836273214701997, | |
| "grad_norm": 2.3890411242782466, | |
| "learning_rate": 9.007614348042626e-06, | |
| "loss": 0.909, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.2844043826249126, | |
| "grad_norm": 2.2350831565472107, | |
| "learning_rate": 8.999489790659368e-06, | |
| "loss": 0.8966, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.28518144377962545, | |
| "grad_norm": 3.694934035517618, | |
| "learning_rate": 8.991335806888558e-06, | |
| "loss": 0.9765, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.28595850493433833, | |
| "grad_norm": 3.0768679656946794, | |
| "learning_rate": 8.983152456723419e-06, | |
| "loss": 0.9859, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.2867355660890512, | |
| "grad_norm": 2.4664124428796548, | |
| "learning_rate": 8.97493980037324e-06, | |
| "loss": 0.9534, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.2875126272437641, | |
| "grad_norm": 2.245723206050526, | |
| "learning_rate": 8.96669789826293e-06, | |
| "loss": 0.9482, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.28828968839847696, | |
| "grad_norm": 2.355965037185437, | |
| "learning_rate": 8.958426811032576e-06, | |
| "loss": 0.8993, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.28906674955318984, | |
| "grad_norm": 2.43480515736849, | |
| "learning_rate": 8.950126599536993e-06, | |
| "loss": 0.9597, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.2898438107079027, | |
| "grad_norm": 2.5741426103315304, | |
| "learning_rate": 8.941797324845284e-06, | |
| "loss": 0.9499, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.2906208718626156, | |
| "grad_norm": 2.490107440300966, | |
| "learning_rate": 8.933439048240376e-06, | |
| "loss": 0.8834, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.29139793301732847, | |
| "grad_norm": 2.079854760599078, | |
| "learning_rate": 8.92505183121859e-06, | |
| "loss": 0.9257, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.29217499417204135, | |
| "grad_norm": 2.520480318994419, | |
| "learning_rate": 8.91663573548917e-06, | |
| "loss": 0.9679, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.2929520553267542, | |
| "grad_norm": 1.8583413033492335, | |
| "learning_rate": 8.908190822973838e-06, | |
| "loss": 0.8838, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.2937291164814671, | |
| "grad_norm": 2.3837910942670177, | |
| "learning_rate": 8.899717155806337e-06, | |
| "loss": 0.8847, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.29450617763618, | |
| "grad_norm": 2.711696676240023, | |
| "learning_rate": 8.891214796331973e-06, | |
| "loss": 0.9878, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.29528323879089285, | |
| "grad_norm": 2.502641692502333, | |
| "learning_rate": 8.882683807107154e-06, | |
| "loss": 0.9536, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.29606029994560573, | |
| "grad_norm": 2.3453784276871708, | |
| "learning_rate": 8.874124250898937e-06, | |
| "loss": 0.8787, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.2968373611003186, | |
| "grad_norm": 1.8832906440195756, | |
| "learning_rate": 8.865536190684559e-06, | |
| "loss": 0.9384, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.2976144222550315, | |
| "grad_norm": 2.1009680565481514, | |
| "learning_rate": 8.856919689650977e-06, | |
| "loss": 0.8934, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.29839148340974436, | |
| "grad_norm": 1.9232637840358615, | |
| "learning_rate": 8.848274811194402e-06, | |
| "loss": 0.9733, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.29916854456445724, | |
| "grad_norm": 2.807204409009, | |
| "learning_rate": 8.839601618919833e-06, | |
| "loss": 0.9018, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.2999456057191701, | |
| "grad_norm": 2.0589460869005065, | |
| "learning_rate": 8.830900176640587e-06, | |
| "loss": 0.9858, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.300722666873883, | |
| "grad_norm": 2.454773689152951, | |
| "learning_rate": 8.822170548377835e-06, | |
| "loss": 0.9769, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.30149972802859587, | |
| "grad_norm": 2.128683839495848, | |
| "learning_rate": 8.813412798360126e-06, | |
| "loss": 0.8856, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.30227678918330875, | |
| "grad_norm": 2.4279634048337213, | |
| "learning_rate": 8.804626991022915e-06, | |
| "loss": 0.9671, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.3030538503380216, | |
| "grad_norm": 3.6045788043321894, | |
| "learning_rate": 8.79581319100809e-06, | |
| "loss": 0.8933, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.3038309114927345, | |
| "grad_norm": 2.1672482233441084, | |
| "learning_rate": 8.786971463163495e-06, | |
| "loss": 0.9564, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.3046079726474474, | |
| "grad_norm": 2.1636428752933328, | |
| "learning_rate": 8.778101872542458e-06, | |
| "loss": 0.9913, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.30538503380216026, | |
| "grad_norm": 2.871516588464275, | |
| "learning_rate": 8.769204484403304e-06, | |
| "loss": 0.8939, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.30616209495687313, | |
| "grad_norm": 2.2048100149121814, | |
| "learning_rate": 8.760279364208879e-06, | |
| "loss": 0.8993, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.306939156111586, | |
| "grad_norm": 2.0054550377532343, | |
| "learning_rate": 8.751326577626075e-06, | |
| "loss": 0.9712, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.30771621726629883, | |
| "grad_norm": 1.941321214144556, | |
| "learning_rate": 8.742346190525332e-06, | |
| "loss": 0.9545, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.3084932784210117, | |
| "grad_norm": 2.3634949614963743, | |
| "learning_rate": 8.733338268980166e-06, | |
| "loss": 0.887, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.3092703395757246, | |
| "grad_norm": 3.5243533187865403, | |
| "learning_rate": 8.72430287926668e-06, | |
| "loss": 0.8955, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.31004740073043746, | |
| "grad_norm": 2.3622243989894747, | |
| "learning_rate": 8.715240087863072e-06, | |
| "loss": 0.8944, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.31082446188515034, | |
| "grad_norm": 1.946906851098621, | |
| "learning_rate": 8.70614996144915e-06, | |
| "loss": 0.8534, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3116015230398632, | |
| "grad_norm": 2.268588081924812, | |
| "learning_rate": 8.697032566905842e-06, | |
| "loss": 0.8884, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.3123785841945761, | |
| "grad_norm": 2.183711381325099, | |
| "learning_rate": 8.6878879713147e-06, | |
| "loss": 0.9143, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.31315564534928897, | |
| "grad_norm": 2.627681687760923, | |
| "learning_rate": 8.678716241957408e-06, | |
| "loss": 0.8835, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.31393270650400185, | |
| "grad_norm": 1.443133233680791, | |
| "learning_rate": 8.669517446315292e-06, | |
| "loss": 0.9273, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.3147097676587147, | |
| "grad_norm": 2.393245491803305, | |
| "learning_rate": 8.660291652068813e-06, | |
| "loss": 0.9162, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.3154868288134276, | |
| "grad_norm": 2.2137742145203987, | |
| "learning_rate": 8.65103892709708e-06, | |
| "loss": 0.9558, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.3162638899681405, | |
| "grad_norm": 1.8575771555594642, | |
| "learning_rate": 8.641759339477345e-06, | |
| "loss": 0.9469, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.31704095112285335, | |
| "grad_norm": 2.3987640931014496, | |
| "learning_rate": 8.632452957484498e-06, | |
| "loss": 0.8976, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.31781801227756623, | |
| "grad_norm": 2.6592688199749612, | |
| "learning_rate": 8.62311984959058e-06, | |
| "loss": 0.8577, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.3185950734322791, | |
| "grad_norm": 2.6015155100334226, | |
| "learning_rate": 8.613760084464258e-06, | |
| "loss": 0.8989, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.319372134586992, | |
| "grad_norm": 3.2861649632260903, | |
| "learning_rate": 8.604373730970334e-06, | |
| "loss": 0.9379, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.32014919574170486, | |
| "grad_norm": 2.2805290644540315, | |
| "learning_rate": 8.59496085816924e-06, | |
| "loss": 0.9307, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.32092625689641774, | |
| "grad_norm": 1.9526498942261281, | |
| "learning_rate": 8.585521535316517e-06, | |
| "loss": 0.9789, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.3217033180511306, | |
| "grad_norm": 2.1513380917456923, | |
| "learning_rate": 8.576055831862317e-06, | |
| "loss": 0.9632, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.3224803792058435, | |
| "grad_norm": 2.9117768462597273, | |
| "learning_rate": 8.56656381745089e-06, | |
| "loss": 0.8607, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.32325744036055637, | |
| "grad_norm": 1.6037295849873296, | |
| "learning_rate": 8.557045561920066e-06, | |
| "loss": 0.9062, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.32403450151526925, | |
| "grad_norm": 2.3047029595748745, | |
| "learning_rate": 8.547501135300747e-06, | |
| "loss": 0.8982, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.3248115626699821, | |
| "grad_norm": 2.414214418200032, | |
| "learning_rate": 8.537930607816386e-06, | |
| "loss": 0.952, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.325588623824695, | |
| "grad_norm": 2.6048634749383037, | |
| "learning_rate": 8.528334049882482e-06, | |
| "loss": 0.9004, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.3263656849794079, | |
| "grad_norm": 2.090591332073441, | |
| "learning_rate": 8.51871153210605e-06, | |
| "loss": 0.9109, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.32714274613412075, | |
| "grad_norm": 2.039137230473015, | |
| "learning_rate": 8.5090631252851e-06, | |
| "loss": 0.8622, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.32791980728883363, | |
| "grad_norm": 1.3644794656877728, | |
| "learning_rate": 8.499388900408131e-06, | |
| "loss": 0.8932, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.3286968684435465, | |
| "grad_norm": 1.9869041419127695, | |
| "learning_rate": 8.489688928653593e-06, | |
| "loss": 0.8921, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.3294739295982594, | |
| "grad_norm": 2.1198129652125908, | |
| "learning_rate": 8.479963281389369e-06, | |
| "loss": 0.9178, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.33025099075297226, | |
| "grad_norm": 2.922298668933732, | |
| "learning_rate": 8.470212030172254e-06, | |
| "loss": 0.8541, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.33102805190768514, | |
| "grad_norm": 2.862204782837741, | |
| "learning_rate": 8.460435246747425e-06, | |
| "loss": 0.9081, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.331805113062398, | |
| "grad_norm": 2.4866367731953103, | |
| "learning_rate": 8.45063300304791e-06, | |
| "loss": 0.9563, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.3325821742171109, | |
| "grad_norm": 3.6054620715626298, | |
| "learning_rate": 8.440805371194064e-06, | |
| "loss": 0.8762, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.33335923537182377, | |
| "grad_norm": 1.357274089384285, | |
| "learning_rate": 8.430952423493038e-06, | |
| "loss": 0.89, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.33413629652653665, | |
| "grad_norm": 2.462550588436075, | |
| "learning_rate": 8.42107423243824e-06, | |
| "loss": 0.8998, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.3349133576812495, | |
| "grad_norm": 2.4758376060526337, | |
| "learning_rate": 8.41117087070881e-06, | |
| "loss": 0.8602, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.3356904188359624, | |
| "grad_norm": 2.670924674405534, | |
| "learning_rate": 8.401242411169085e-06, | |
| "loss": 0.9091, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.3364674799906753, | |
| "grad_norm": 2.4965212229622855, | |
| "learning_rate": 8.391288926868055e-06, | |
| "loss": 0.905, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.33724454114538815, | |
| "grad_norm": 2.6193244431141105, | |
| "learning_rate": 8.381310491038835e-06, | |
| "loss": 0.8834, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.33802160230010103, | |
| "grad_norm": 2.639094468488719, | |
| "learning_rate": 8.371307177098114e-06, | |
| "loss": 0.9659, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.3387986634548139, | |
| "grad_norm": 1.844532803490863, | |
| "learning_rate": 8.361279058645634e-06, | |
| "loss": 0.8736, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.3395757246095268, | |
| "grad_norm": 3.4447047963873647, | |
| "learning_rate": 8.351226209463628e-06, | |
| "loss": 0.8564, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.34035278576423966, | |
| "grad_norm": 2.0546081486698773, | |
| "learning_rate": 8.341148703516291e-06, | |
| "loss": 0.929, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.34112984691895254, | |
| "grad_norm": 2.498839246884663, | |
| "learning_rate": 8.331046614949228e-06, | |
| "loss": 0.8663, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.3419069080736654, | |
| "grad_norm": 2.574109259388575, | |
| "learning_rate": 8.320920018088912e-06, | |
| "loss": 0.9137, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.3426839692283783, | |
| "grad_norm": 3.1393397756280206, | |
| "learning_rate": 8.310768987442139e-06, | |
| "loss": 0.9368, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.34346103038309117, | |
| "grad_norm": 3.20210731977578, | |
| "learning_rate": 8.300593597695476e-06, | |
| "loss": 0.9299, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.34423809153780405, | |
| "grad_norm": 3.5589792979708994, | |
| "learning_rate": 8.290393923714713e-06, | |
| "loss": 0.9587, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.3450151526925169, | |
| "grad_norm": 2.8541415351108825, | |
| "learning_rate": 8.280170040544312e-06, | |
| "loss": 0.8605, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.3457922138472298, | |
| "grad_norm": 2.0518411713546554, | |
| "learning_rate": 8.269922023406851e-06, | |
| "loss": 0.7918, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.3465692750019427, | |
| "grad_norm": 2.699406909968831, | |
| "learning_rate": 8.259649947702485e-06, | |
| "loss": 0.873, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.34734633615665556, | |
| "grad_norm": 3.0919334403019425, | |
| "learning_rate": 8.24935388900837e-06, | |
| "loss": 0.8373, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.34812339731136843, | |
| "grad_norm": 2.9019624759746305, | |
| "learning_rate": 8.239033923078124e-06, | |
| "loss": 0.9174, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.34890045846608125, | |
| "grad_norm": 2.1140460699445764, | |
| "learning_rate": 8.228690125841258e-06, | |
| "loss": 0.8672, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.34967751962079413, | |
| "grad_norm": 3.0197408308584146, | |
| "learning_rate": 8.218322573402629e-06, | |
| "loss": 0.8523, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.350454580775507, | |
| "grad_norm": 2.657040743922122, | |
| "learning_rate": 8.20793134204187e-06, | |
| "loss": 0.8497, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.3512316419302199, | |
| "grad_norm": 3.4478785002624903, | |
| "learning_rate": 8.197516508212832e-06, | |
| "loss": 0.9144, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.35200870308493276, | |
| "grad_norm": 2.615501805261325, | |
| "learning_rate": 8.187078148543026e-06, | |
| "loss": 0.8521, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.35278576423964564, | |
| "grad_norm": 2.7673910964569566, | |
| "learning_rate": 8.176616339833048e-06, | |
| "loss": 0.9834, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.3535628253943585, | |
| "grad_norm": 3.110704979833664, | |
| "learning_rate": 8.166131159056028e-06, | |
| "loss": 0.9291, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.3543398865490714, | |
| "grad_norm": 2.382239717418457, | |
| "learning_rate": 8.155622683357056e-06, | |
| "loss": 0.962, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.35511694770378427, | |
| "grad_norm": 2.974819074830629, | |
| "learning_rate": 8.14509099005261e-06, | |
| "loss": 0.9076, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.35589400885849715, | |
| "grad_norm": 2.025484177379498, | |
| "learning_rate": 8.13453615663e-06, | |
| "loss": 0.9316, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.35667107001321, | |
| "grad_norm": 2.490523269053249, | |
| "learning_rate": 8.123958260746781e-06, | |
| "loss": 0.9202, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.3574481311679229, | |
| "grad_norm": 2.4151860798523566, | |
| "learning_rate": 8.113357380230198e-06, | |
| "loss": 0.8332, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.3582251923226358, | |
| "grad_norm": 2.994576094392819, | |
| "learning_rate": 8.102733593076608e-06, | |
| "loss": 0.907, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.35900225347734865, | |
| "grad_norm": 2.2570861805827898, | |
| "learning_rate": 8.092086977450896e-06, | |
| "loss": 0.892, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.35977931463206153, | |
| "grad_norm": 1.9441465953568793, | |
| "learning_rate": 8.081417611685914e-06, | |
| "loss": 0.8221, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.3605563757867744, | |
| "grad_norm": 2.9229560639134, | |
| "learning_rate": 8.0707255742819e-06, | |
| "loss": 0.8765, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.3613334369414873, | |
| "grad_norm": 3.3085405723587216, | |
| "learning_rate": 8.060010943905894e-06, | |
| "loss": 0.8406, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.36211049809620016, | |
| "grad_norm": 2.7364277865283624, | |
| "learning_rate": 8.049273799391171e-06, | |
| "loss": 0.8282, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.36288755925091304, | |
| "grad_norm": 2.483155933386303, | |
| "learning_rate": 8.038514219736648e-06, | |
| "loss": 0.9325, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.3636646204056259, | |
| "grad_norm": 3.132743137231315, | |
| "learning_rate": 8.027732284106316e-06, | |
| "loss": 0.8662, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.3644416815603388, | |
| "grad_norm": 2.9308723735400233, | |
| "learning_rate": 8.016928071828644e-06, | |
| "loss": 0.876, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.36521874271505167, | |
| "grad_norm": 2.4289372656874058, | |
| "learning_rate": 8.006101662396011e-06, | |
| "loss": 0.8752, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.36599580386976455, | |
| "grad_norm": 3.5005034837842794, | |
| "learning_rate": 7.995253135464103e-06, | |
| "loss": 0.8211, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.3667728650244774, | |
| "grad_norm": 2.6219168824993897, | |
| "learning_rate": 7.984382570851341e-06, | |
| "loss": 0.8963, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.3675499261791903, | |
| "grad_norm": 2.6913591077446544, | |
| "learning_rate": 7.973490048538291e-06, | |
| "loss": 0.8135, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.3683269873339032, | |
| "grad_norm": 3.323688764018341, | |
| "learning_rate": 7.962575648667068e-06, | |
| "loss": 0.8394, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.36910404848861605, | |
| "grad_norm": 1.9160655382592797, | |
| "learning_rate": 7.951639451540759e-06, | |
| "loss": 0.8373, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.36988110964332893, | |
| "grad_norm": 2.2592953806408977, | |
| "learning_rate": 7.940681537622816e-06, | |
| "loss": 0.8717, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.3706581707980418, | |
| "grad_norm": 2.4625597781213933, | |
| "learning_rate": 7.92970198753648e-06, | |
| "loss": 0.8353, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.3714352319527547, | |
| "grad_norm": 2.547595160954955, | |
| "learning_rate": 7.918700882064181e-06, | |
| "loss": 0.8747, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.37221229310746756, | |
| "grad_norm": 3.276135067674202, | |
| "learning_rate": 7.907678302146939e-06, | |
| "loss": 0.8997, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.37298935426218044, | |
| "grad_norm": 3.036723238718559, | |
| "learning_rate": 7.896634328883777e-06, | |
| "loss": 0.8189, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.3737664154168933, | |
| "grad_norm": 2.0650698930773093, | |
| "learning_rate": 7.885569043531118e-06, | |
| "loss": 0.8454, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.3745434765716062, | |
| "grad_norm": 3.760117109301269, | |
| "learning_rate": 7.874482527502192e-06, | |
| "loss": 0.8213, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.37532053772631907, | |
| "grad_norm": 3.531426821109854, | |
| "learning_rate": 7.863374862366428e-06, | |
| "loss": 0.8113, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.37609759888103195, | |
| "grad_norm": 2.3515365517581164, | |
| "learning_rate": 7.85224612984887e-06, | |
| "loss": 0.8064, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.3768746600357448, | |
| "grad_norm": 1.8840341910034588, | |
| "learning_rate": 7.841096411829561e-06, | |
| "loss": 0.8683, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.3776517211904577, | |
| "grad_norm": 2.38418725628485, | |
| "learning_rate": 7.829925790342942e-06, | |
| "loss": 0.7812, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.3784287823451706, | |
| "grad_norm": 2.4785026498656615, | |
| "learning_rate": 7.818734347577258e-06, | |
| "loss": 0.8119, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.37920584349988345, | |
| "grad_norm": 3.137259786348735, | |
| "learning_rate": 7.807522165873945e-06, | |
| "loss": 0.8764, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.37998290465459633, | |
| "grad_norm": 2.8359325177369845, | |
| "learning_rate": 7.796289327727022e-06, | |
| "loss": 0.7978, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.3807599658093092, | |
| "grad_norm": 3.158128777649866, | |
| "learning_rate": 7.7850359157825e-06, | |
| "loss": 0.8412, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.3815370269640221, | |
| "grad_norm": 3.501006126578136, | |
| "learning_rate": 7.773762012837751e-06, | |
| "loss": 0.8779, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.38231408811873496, | |
| "grad_norm": 2.468978859483751, | |
| "learning_rate": 7.762467701840914e-06, | |
| "loss": 0.8813, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.38309114927344784, | |
| "grad_norm": 3.0067259204153634, | |
| "learning_rate": 7.751153065890284e-06, | |
| "loss": 0.7915, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.3838682104281607, | |
| "grad_norm": 3.9988455962849865, | |
| "learning_rate": 7.739818188233693e-06, | |
| "loss": 0.8698, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.3846452715828736, | |
| "grad_norm": 2.8749069871202746, | |
| "learning_rate": 7.728463152267905e-06, | |
| "loss": 0.8986, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.38542233273758647, | |
| "grad_norm": 1.8557781579247277, | |
| "learning_rate": 7.717088041538e-06, | |
| "loss": 0.836, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.38619939389229935, | |
| "grad_norm": 2.554552315654769, | |
| "learning_rate": 7.705692939736754e-06, | |
| "loss": 0.905, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.3869764550470122, | |
| "grad_norm": 3.253478052848826, | |
| "learning_rate": 7.694277930704035e-06, | |
| "loss": 0.8877, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.3877535162017251, | |
| "grad_norm": 2.8816016322900095, | |
| "learning_rate": 7.682843098426173e-06, | |
| "loss": 0.9017, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.388530577356438, | |
| "grad_norm": 3.6095277498188465, | |
| "learning_rate": 7.671388527035353e-06, | |
| "loss": 0.839, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.38930763851115086, | |
| "grad_norm": 2.872689759467288, | |
| "learning_rate": 7.659914300808987e-06, | |
| "loss": 0.8551, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.3900846996658637, | |
| "grad_norm": 3.57677819644193, | |
| "learning_rate": 7.6484205041691e-06, | |
| "loss": 0.9367, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.39086176082057655, | |
| "grad_norm": 2.516301941871412, | |
| "learning_rate": 7.63690722168171e-06, | |
| "loss": 0.8439, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.39163882197528943, | |
| "grad_norm": 3.6976446460324985, | |
| "learning_rate": 7.625374538056196e-06, | |
| "loss": 0.9143, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.3924158831300023, | |
| "grad_norm": 2.4108959760850976, | |
| "learning_rate": 7.61382253814469e-06, | |
| "loss": 0.8488, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.3931929442847152, | |
| "grad_norm": 3.575102830931404, | |
| "learning_rate": 7.6022513069414375e-06, | |
| "loss": 0.9244, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.39397000543942806, | |
| "grad_norm": 2.5214806607432156, | |
| "learning_rate": 7.5906609295821785e-06, | |
| "loss": 0.7828, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.39474706659414094, | |
| "grad_norm": 2.2256882514741267, | |
| "learning_rate": 7.57905149134353e-06, | |
| "loss": 0.8343, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.3955241277488538, | |
| "grad_norm": 2.9737799015299915, | |
| "learning_rate": 7.567423077642342e-06, | |
| "loss": 0.8029, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.3963011889035667, | |
| "grad_norm": 2.1814937586449474, | |
| "learning_rate": 7.555775774035077e-06, | |
| "loss": 0.8595, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.39707825005827957, | |
| "grad_norm": 3.756192351660152, | |
| "learning_rate": 7.544109666217186e-06, | |
| "loss": 0.8058, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.39785531121299245, | |
| "grad_norm": 2.3316584523565544, | |
| "learning_rate": 7.532424840022468e-06, | |
| "loss": 0.8203, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.3986323723677053, | |
| "grad_norm": 3.3303069401649195, | |
| "learning_rate": 7.520721381422444e-06, | |
| "loss": 0.8766, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.3994094335224182, | |
| "grad_norm": 2.7096079276885203, | |
| "learning_rate": 7.5089993765257295e-06, | |
| "loss": 0.8252, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.4001864946771311, | |
| "grad_norm": 2.7989889775088987, | |
| "learning_rate": 7.497258911577385e-06, | |
| "loss": 0.8241, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.40096355583184395, | |
| "grad_norm": 2.8348088908838833, | |
| "learning_rate": 7.485500072958298e-06, | |
| "loss": 0.8047, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.40174061698655683, | |
| "grad_norm": 2.7178413634018206, | |
| "learning_rate": 7.4737229471845384e-06, | |
| "loss": 0.8469, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.4025176781412697, | |
| "grad_norm": 2.653716140680188, | |
| "learning_rate": 7.46192762090673e-06, | |
| "loss": 0.8986, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.4032947392959826, | |
| "grad_norm": 3.2114074118987097, | |
| "learning_rate": 7.450114180909396e-06, | |
| "loss": 0.8572, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.40407180045069546, | |
| "grad_norm": 2.5594692675174904, | |
| "learning_rate": 7.438282714110346e-06, | |
| "loss": 0.8348, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.40484886160540834, | |
| "grad_norm": 2.570719975580699, | |
| "learning_rate": 7.4264333075600094e-06, | |
| "loss": 0.817, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.4056259227601212, | |
| "grad_norm": 1.7904273430264364, | |
| "learning_rate": 7.414566048440815e-06, | |
| "loss": 0.773, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.4064029839148341, | |
| "grad_norm": 3.1160992335315836, | |
| "learning_rate": 7.4026810240665455e-06, | |
| "loss": 0.8406, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.40718004506954697, | |
| "grad_norm": 2.879056289166062, | |
| "learning_rate": 7.390778321881684e-06, | |
| "loss": 0.8299, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.40795710622425985, | |
| "grad_norm": 3.4705886843955134, | |
| "learning_rate": 7.378858029460785e-06, | |
| "loss": 0.8443, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.4087341673789727, | |
| "grad_norm": 3.0683501999483203, | |
| "learning_rate": 7.366920234507819e-06, | |
| "loss": 0.8563, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.4095112285336856, | |
| "grad_norm": 3.0155201359764248, | |
| "learning_rate": 7.354965024855536e-06, | |
| "loss": 0.7995, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.4102882896883985, | |
| "grad_norm": 3.7649992863447594, | |
| "learning_rate": 7.342992488464813e-06, | |
| "loss": 0.8513, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.41106535084311135, | |
| "grad_norm": 2.766804831311677, | |
| "learning_rate": 7.331002713424012e-06, | |
| "loss": 0.818, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.41184241199782423, | |
| "grad_norm": 3.759592880394652, | |
| "learning_rate": 7.3189957879483235e-06, | |
| "loss": 0.8724, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.4126194731525371, | |
| "grad_norm": 3.069207342018398, | |
| "learning_rate": 7.3069718003791276e-06, | |
| "loss": 0.8836, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.41339653430725, | |
| "grad_norm": 3.3679689130107433, | |
| "learning_rate": 7.29493083918334e-06, | |
| "loss": 0.8408, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.41417359546196286, | |
| "grad_norm": 3.1614295846456244, | |
| "learning_rate": 7.282872992952757e-06, | |
| "loss": 0.796, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.41495065661667574, | |
| "grad_norm": 2.3615270875399905, | |
| "learning_rate": 7.270798350403407e-06, | |
| "loss": 0.7622, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.4157277177713886, | |
| "grad_norm": 4.796953025378249, | |
| "learning_rate": 7.2587070003749015e-06, | |
| "loss": 0.8264, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.4165047789261015, | |
| "grad_norm": 3.137452467564146, | |
| "learning_rate": 7.246599031829775e-06, | |
| "loss": 0.7943, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.41728184008081437, | |
| "grad_norm": 3.0340412586302064, | |
| "learning_rate": 7.234474533852834e-06, | |
| "loss": 0.8368, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.41805890123552725, | |
| "grad_norm": 3.5888770778936627, | |
| "learning_rate": 7.222333595650502e-06, | |
| "loss": 0.8416, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.4188359623902401, | |
| "grad_norm": 1.602353309028904, | |
| "learning_rate": 7.210176306550161e-06, | |
| "loss": 0.8347, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.419613023544953, | |
| "grad_norm": 4.051100900717811, | |
| "learning_rate": 7.198002755999495e-06, | |
| "loss": 0.8079, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.4203900846996659, | |
| "grad_norm": 2.6685524323790215, | |
| "learning_rate": 7.185813033565832e-06, | |
| "loss": 0.8434, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.42116714585437875, | |
| "grad_norm": 2.729322434976506, | |
| "learning_rate": 7.1736072289354875e-06, | |
| "loss": 0.8578, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.42194420700909163, | |
| "grad_norm": 2.906073044503289, | |
| "learning_rate": 7.161385431913098e-06, | |
| "loss": 0.7804, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.4227212681638045, | |
| "grad_norm": 2.290496693909145, | |
| "learning_rate": 7.149147732420971e-06, | |
| "loss": 0.8248, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.4234983293185174, | |
| "grad_norm": 5.010159443056758, | |
| "learning_rate": 7.1368942204984094e-06, | |
| "loss": 0.8057, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.42427539047323026, | |
| "grad_norm": 2.536646021262698, | |
| "learning_rate": 7.124624986301062e-06, | |
| "loss": 0.8439, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.42505245162794314, | |
| "grad_norm": 2.9421994943957364, | |
| "learning_rate": 7.112340120100255e-06, | |
| "loss": 0.8744, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.425829512782656, | |
| "grad_norm": 3.3641741595063888, | |
| "learning_rate": 7.100039712282323e-06, | |
| "loss": 0.8211, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.4266065739373689, | |
| "grad_norm": 4.096933321696819, | |
| "learning_rate": 7.0877238533479535e-06, | |
| "loss": 0.838, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.42738363509208177, | |
| "grad_norm": 3.4094346710709678, | |
| "learning_rate": 7.075392633911513e-06, | |
| "loss": 0.8409, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.42816069624679465, | |
| "grad_norm": 1.993447683519007, | |
| "learning_rate": 7.063046144700383e-06, | |
| "loss": 0.8555, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.4289377574015075, | |
| "grad_norm": 2.5909937579577256, | |
| "learning_rate": 7.050684476554299e-06, | |
| "loss": 0.822, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.4297148185562204, | |
| "grad_norm": 3.3258757291630716, | |
| "learning_rate": 7.038307720424668e-06, | |
| "loss": 0.8538, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.4304918797109332, | |
| "grad_norm": 3.6044299100524535, | |
| "learning_rate": 7.025915967373911e-06, | |
| "loss": 0.7909, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.4312689408656461, | |
| "grad_norm": 2.945760411127075, | |
| "learning_rate": 7.013509308574788e-06, | |
| "loss": 0.7084, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.432046002020359, | |
| "grad_norm": 3.9943856557515405, | |
| "learning_rate": 7.001087835309734e-06, | |
| "loss": 0.8192, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.43282306317507185, | |
| "grad_norm": 3.9363696932078094, | |
| "learning_rate": 6.988651638970175e-06, | |
| "loss": 0.7937, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.43360012432978473, | |
| "grad_norm": 2.7961832443632697, | |
| "learning_rate": 6.976200811055867e-06, | |
| "loss": 0.8409, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.4343771854844976, | |
| "grad_norm": 3.573733698773883, | |
| "learning_rate": 6.963735443174213e-06, | |
| "loss": 0.8, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.4351542466392105, | |
| "grad_norm": 1.861238869282892, | |
| "learning_rate": 6.9512556270395996e-06, | |
| "loss": 0.8202, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.43593130779392336, | |
| "grad_norm": 3.435004374927387, | |
| "learning_rate": 6.938761454472718e-06, | |
| "loss": 0.7907, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.43670836894863624, | |
| "grad_norm": 2.169031421644934, | |
| "learning_rate": 6.926253017399882e-06, | |
| "loss": 0.7455, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.4374854301033491, | |
| "grad_norm": 2.639119266804599, | |
| "learning_rate": 6.913730407852359e-06, | |
| "loss": 0.7798, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.438262491258062, | |
| "grad_norm": 3.0923108923433653, | |
| "learning_rate": 6.9011937179656956e-06, | |
| "loss": 0.86, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.43903955241277487, | |
| "grad_norm": 3.4778690753111974, | |
| "learning_rate": 6.888643039979025e-06, | |
| "loss": 0.8565, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.43981661356748775, | |
| "grad_norm": 2.019550042044677, | |
| "learning_rate": 6.8760784662344085e-06, | |
| "loss": 0.8222, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.4405936747222006, | |
| "grad_norm": 2.531115492821316, | |
| "learning_rate": 6.863500089176141e-06, | |
| "loss": 0.7994, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.4413707358769135, | |
| "grad_norm": 3.623980012450744, | |
| "learning_rate": 6.850908001350076e-06, | |
| "loss": 0.8085, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.4421477970316264, | |
| "grad_norm": 2.874269072854778, | |
| "learning_rate": 6.838302295402944e-06, | |
| "loss": 0.8206, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.44292485818633925, | |
| "grad_norm": 3.3046693857663767, | |
| "learning_rate": 6.825683064081673e-06, | |
| "loss": 0.7733, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.44370191934105213, | |
| "grad_norm": 2.820815832528071, | |
| "learning_rate": 6.813050400232705e-06, | |
| "loss": 0.7684, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.444478980495765, | |
| "grad_norm": 3.2657952823703513, | |
| "learning_rate": 6.800404396801309e-06, | |
| "loss": 0.8003, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.4452560416504779, | |
| "grad_norm": 3.316944889654959, | |
| "learning_rate": 6.787745146830903e-06, | |
| "loss": 0.8037, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.44603310280519076, | |
| "grad_norm": 3.850090302427542, | |
| "learning_rate": 6.775072743462368e-06, | |
| "loss": 0.7318, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.44681016395990364, | |
| "grad_norm": 2.488942618483238, | |
| "learning_rate": 6.762387279933355e-06, | |
| "loss": 0.7842, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.4475872251146165, | |
| "grad_norm": 3.9986923312061537, | |
| "learning_rate": 6.749688849577616e-06, | |
| "loss": 0.7452, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.4483642862693294, | |
| "grad_norm": 3.174677745330878, | |
| "learning_rate": 6.736977545824299e-06, | |
| "loss": 0.7755, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.44914134742404227, | |
| "grad_norm": 3.008290639491103, | |
| "learning_rate": 6.72425346219727e-06, | |
| "loss": 0.7483, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.44991840857875515, | |
| "grad_norm": 3.7842544499599335, | |
| "learning_rate": 6.711516692314426e-06, | |
| "loss": 0.8714, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.450695469733468, | |
| "grad_norm": 3.595279361244756, | |
| "learning_rate": 6.698767329887001e-06, | |
| "loss": 0.8087, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.4514725308881809, | |
| "grad_norm": 3.2985766841264974, | |
| "learning_rate": 6.686005468718879e-06, | |
| "loss": 0.7593, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.4522495920428938, | |
| "grad_norm": 3.3364617948252855, | |
| "learning_rate": 6.673231202705906e-06, | |
| "loss": 0.744, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.45302665319760665, | |
| "grad_norm": 1.6739208971136896, | |
| "learning_rate": 6.660444625835194e-06, | |
| "loss": 0.7233, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.45380371435231953, | |
| "grad_norm": 2.908524261261958, | |
| "learning_rate": 6.647645832184437e-06, | |
| "loss": 0.7726, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.4545807755070324, | |
| "grad_norm": 3.741049911001574, | |
| "learning_rate": 6.634834915921211e-06, | |
| "loss": 0.7414, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.4553578366617453, | |
| "grad_norm": 3.525582515759396, | |
| "learning_rate": 6.6220119713022855e-06, | |
| "loss": 0.7431, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.45613489781645816, | |
| "grad_norm": 3.6441156387339446, | |
| "learning_rate": 6.609177092672927e-06, | |
| "loss": 0.8191, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.45691195897117104, | |
| "grad_norm": 3.235190279824699, | |
| "learning_rate": 6.596330374466212e-06, | |
| "loss": 0.7609, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.4576890201258839, | |
| "grad_norm": 2.6003682513249555, | |
| "learning_rate": 6.5834719112023215e-06, | |
| "loss": 0.7252, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.4584660812805968, | |
| "grad_norm": 4.03595764942659, | |
| "learning_rate": 6.570601797487854e-06, | |
| "loss": 0.8437, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.45924314243530967, | |
| "grad_norm": 2.7068297821785943, | |
| "learning_rate": 6.557720128015127e-06, | |
| "loss": 0.8236, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.46002020359002255, | |
| "grad_norm": 3.4599815225643495, | |
| "learning_rate": 6.544826997561479e-06, | |
| "loss": 0.7797, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.4607972647447354, | |
| "grad_norm": 3.773628994151356, | |
| "learning_rate": 6.531922500988572e-06, | |
| "loss": 0.751, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.4615743258994483, | |
| "grad_norm": 2.2173873623143563, | |
| "learning_rate": 6.519006733241697e-06, | |
| "loss": 0.7701, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.4623513870541612, | |
| "grad_norm": 3.033174067089371, | |
| "learning_rate": 6.506079789349074e-06, | |
| "loss": 0.7682, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.46312844820887406, | |
| "grad_norm": 4.1166433622525584, | |
| "learning_rate": 6.493141764421145e-06, | |
| "loss": 0.8537, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.46390550936358693, | |
| "grad_norm": 3.131603304402972, | |
| "learning_rate": 6.48019275364989e-06, | |
| "loss": 0.7729, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.4646825705182998, | |
| "grad_norm": 3.0925113977774674, | |
| "learning_rate": 6.46723285230811e-06, | |
| "loss": 0.7959, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.4654596316730127, | |
| "grad_norm": 4.214785149959189, | |
| "learning_rate": 6.454262155748741e-06, | |
| "loss": 0.771, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.46623669282772556, | |
| "grad_norm": 4.231644528802966, | |
| "learning_rate": 6.4412807594041396e-06, | |
| "loss": 0.8038, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.46701375398243844, | |
| "grad_norm": 3.077252834668561, | |
| "learning_rate": 6.428288758785387e-06, | |
| "loss": 0.7784, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.4677908151371513, | |
| "grad_norm": 3.710905060380187, | |
| "learning_rate": 6.415286249481591e-06, | |
| "loss": 0.7705, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.4685678762918642, | |
| "grad_norm": 4.489857568139187, | |
| "learning_rate": 6.402273327159169e-06, | |
| "loss": 0.7182, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.46934493744657707, | |
| "grad_norm": 3.852955528938296, | |
| "learning_rate": 6.389250087561162e-06, | |
| "loss": 0.7736, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.47012199860128995, | |
| "grad_norm": 3.9025918987862878, | |
| "learning_rate": 6.376216626506513e-06, | |
| "loss": 0.7431, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.4708990597560028, | |
| "grad_norm": 3.4097364478378203, | |
| "learning_rate": 6.363173039889373e-06, | |
| "loss": 0.7973, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.47167612091071565, | |
| "grad_norm": 4.33473272302523, | |
| "learning_rate": 6.350119423678391e-06, | |
| "loss": 0.7898, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.4724531820654285, | |
| "grad_norm": 3.679757021095654, | |
| "learning_rate": 6.3370558739160096e-06, | |
| "loss": 0.7576, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.4732302432201414, | |
| "grad_norm": 3.9057618817922033, | |
| "learning_rate": 6.32398248671776e-06, | |
| "loss": 0.7725, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.4740073043748543, | |
| "grad_norm": 3.403797504220692, | |
| "learning_rate": 6.310899358271549e-06, | |
| "loss": 0.8273, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.47478436552956715, | |
| "grad_norm": 2.2498527490634936, | |
| "learning_rate": 6.2978065848369594e-06, | |
| "loss": 0.7365, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.47556142668428003, | |
| "grad_norm": 3.5041131745023777, | |
| "learning_rate": 6.284704262744532e-06, | |
| "loss": 0.7739, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.4763384878389929, | |
| "grad_norm": 3.236195246500179, | |
| "learning_rate": 6.271592488395064e-06, | |
| "loss": 0.769, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.4771155489937058, | |
| "grad_norm": 4.227426671695652, | |
| "learning_rate": 6.2584713582589015e-06, | |
| "loss": 0.801, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.47789261014841866, | |
| "grad_norm": 2.395986835968045, | |
| "learning_rate": 6.2453409688752244e-06, | |
| "loss": 0.7343, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.47866967130313154, | |
| "grad_norm": 3.050933140103267, | |
| "learning_rate": 6.232201416851332e-06, | |
| "loss": 0.7774, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.4794467324578444, | |
| "grad_norm": 3.680174317755052, | |
| "learning_rate": 6.219052798861948e-06, | |
| "loss": 0.8151, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.4802237936125573, | |
| "grad_norm": 3.282669805242103, | |
| "learning_rate": 6.205895211648489e-06, | |
| "loss": 0.7851, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.48100085476727017, | |
| "grad_norm": 3.0746449279394454, | |
| "learning_rate": 6.192728752018373e-06, | |
| "loss": 0.8465, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.48177791592198305, | |
| "grad_norm": 3.6239050452367345, | |
| "learning_rate": 6.179553516844291e-06, | |
| "loss": 0.7675, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.4825549770766959, | |
| "grad_norm": 2.4293135613154706, | |
| "learning_rate": 6.1663696030635e-06, | |
| "loss": 0.7459, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.4833320382314088, | |
| "grad_norm": 2.4717149655776716, | |
| "learning_rate": 6.153177107677112e-06, | |
| "loss": 0.7385, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.4841090993861217, | |
| "grad_norm": 3.7011954863420424, | |
| "learning_rate": 6.139976127749381e-06, | |
| "loss": 0.7594, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.48488616054083455, | |
| "grad_norm": 3.580923341493924, | |
| "learning_rate": 6.126766760406982e-06, | |
| "loss": 0.7504, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.48566322169554743, | |
| "grad_norm": 3.7474824398696054, | |
| "learning_rate": 6.1135491028383e-06, | |
| "loss": 0.8189, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.4864402828502603, | |
| "grad_norm": 4.008525494927905, | |
| "learning_rate": 6.100323252292721e-06, | |
| "loss": 0.8037, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.4872173440049732, | |
| "grad_norm": 4.533137670554457, | |
| "learning_rate": 6.087089306079907e-06, | |
| "loss": 0.7396, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.48799440515968606, | |
| "grad_norm": 3.577325942559521, | |
| "learning_rate": 6.073847361569085e-06, | |
| "loss": 0.7712, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.48877146631439894, | |
| "grad_norm": 3.4785892916574226, | |
| "learning_rate": 6.06059751618833e-06, | |
| "loss": 0.7744, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.4895485274691118, | |
| "grad_norm": 2.726294641729152, | |
| "learning_rate": 6.047339867423849e-06, | |
| "loss": 0.739, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.4903255886238247, | |
| "grad_norm": 3.2923367667657244, | |
| "learning_rate": 6.034074512819259e-06, | |
| "loss": 0.7921, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.49110264977853757, | |
| "grad_norm": 2.5138919730315163, | |
| "learning_rate": 6.020801549974879e-06, | |
| "loss": 0.7627, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.49187971093325045, | |
| "grad_norm": 3.0639205838133923, | |
| "learning_rate": 6.007521076546999e-06, | |
| "loss": 0.6908, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.4926567720879633, | |
| "grad_norm": 5.28489991162866, | |
| "learning_rate": 5.994233190247174e-06, | |
| "loss": 0.6984, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.4934338332426762, | |
| "grad_norm": 3.1930218466849665, | |
| "learning_rate": 5.9809379888414975e-06, | |
| "loss": 0.7312, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.4942108943973891, | |
| "grad_norm": 2.140853783592497, | |
| "learning_rate": 5.967635570149881e-06, | |
| "loss": 0.739, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.49498795555210195, | |
| "grad_norm": 2.6520877753384706, | |
| "learning_rate": 5.9543260320453445e-06, | |
| "loss": 0.7115, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.49576501670681483, | |
| "grad_norm": 3.5362571286933693, | |
| "learning_rate": 5.941009472453283e-06, | |
| "loss": 0.7313, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.4965420778615277, | |
| "grad_norm": 3.1479357916202173, | |
| "learning_rate": 5.927685989350755e-06, | |
| "loss": 0.7689, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.4973191390162406, | |
| "grad_norm": 4.239286662147043, | |
| "learning_rate": 5.914355680765757e-06, | |
| "loss": 0.7209, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.49809620017095346, | |
| "grad_norm": 4.168222516693175, | |
| "learning_rate": 5.901018644776509e-06, | |
| "loss": 0.7151, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.49887326132566634, | |
| "grad_norm": 2.857843662958384, | |
| "learning_rate": 5.8876749795107214e-06, | |
| "loss": 0.768, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.4996503224803792, | |
| "grad_norm": 3.52360411131157, | |
| "learning_rate": 5.874324783144885e-06, | |
| "loss": 0.8139, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.5004273836350921, | |
| "grad_norm": 3.657211308302993, | |
| "learning_rate": 5.860968153903542e-06, | |
| "loss": 0.6869, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.501204444789805, | |
| "grad_norm": 2.331407753002653, | |
| "learning_rate": 5.847605190058563e-06, | |
| "loss": 0.747, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.5019815059445178, | |
| "grad_norm": 3.7182364487724713, | |
| "learning_rate": 5.8342359899284286e-06, | |
| "loss": 0.7425, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.5027585670992307, | |
| "grad_norm": 3.5617096002819926, | |
| "learning_rate": 5.8208606518775e-06, | |
| "loss": 0.7474, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.5035356282539436, | |
| "grad_norm": 3.1283143308974477, | |
| "learning_rate": 5.807479274315302e-06, | |
| "loss": 0.7354, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.5043126894086565, | |
| "grad_norm": 3.183649544594623, | |
| "learning_rate": 5.79409195569579e-06, | |
| "loss": 0.7693, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.5050897505633694, | |
| "grad_norm": 4.183143639793591, | |
| "learning_rate": 5.780698794516636e-06, | |
| "loss": 0.7159, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5058668117180822, | |
| "grad_norm": 3.3530863093489613, | |
| "learning_rate": 5.767299889318496e-06, | |
| "loss": 0.7258, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.5066438728727951, | |
| "grad_norm": 3.4594325919428703, | |
| "learning_rate": 5.75389533868429e-06, | |
| "loss": 0.831, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.507420934027508, | |
| "grad_norm": 2.9431596981070642, | |
| "learning_rate": 5.7404852412384725e-06, | |
| "loss": 0.6962, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.5081979951822209, | |
| "grad_norm": 3.0367905793947894, | |
| "learning_rate": 5.72706969564631e-06, | |
| "loss": 0.7612, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.5089750563369337, | |
| "grad_norm": 2.429198874828814, | |
| "learning_rate": 5.713648800613154e-06, | |
| "loss": 0.7464, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.5097521174916466, | |
| "grad_norm": 3.4346659673155964, | |
| "learning_rate": 5.700222654883712e-06, | |
| "loss": 0.784, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.5105291786463595, | |
| "grad_norm": 3.412520275752024, | |
| "learning_rate": 5.686791357241329e-06, | |
| "loss": 0.7418, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.5113062398010724, | |
| "grad_norm": 3.5500533489754957, | |
| "learning_rate": 5.673355006507251e-06, | |
| "loss": 0.7931, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.5120833009557852, | |
| "grad_norm": 3.3785219578924073, | |
| "learning_rate": 5.659913701539903e-06, | |
| "loss": 0.7255, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.5128603621104981, | |
| "grad_norm": 2.8478099507815493, | |
| "learning_rate": 5.646467541234162e-06, | |
| "loss": 0.6869, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.513637423265211, | |
| "grad_norm": 4.116946216809252, | |
| "learning_rate": 5.633016624520627e-06, | |
| "loss": 0.723, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.5144144844199239, | |
| "grad_norm": 4.278208268527751, | |
| "learning_rate": 5.619561050364897e-06, | |
| "loss": 0.7021, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.5151915455746368, | |
| "grad_norm": 3.9380435048254068, | |
| "learning_rate": 5.606100917766829e-06, | |
| "loss": 0.7289, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.5159686067293496, | |
| "grad_norm": 3.035312643544745, | |
| "learning_rate": 5.592636325759829e-06, | |
| "loss": 0.6616, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.5167456678840625, | |
| "grad_norm": 4.67293135855067, | |
| "learning_rate": 5.579167373410108e-06, | |
| "loss": 0.6983, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.5175227290387754, | |
| "grad_norm": 4.655170532587341, | |
| "learning_rate": 5.565694159815955e-06, | |
| "loss": 0.7799, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.5182997901934883, | |
| "grad_norm": 3.3764468867138193, | |
| "learning_rate": 5.552216784107022e-06, | |
| "loss": 0.7443, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.5190768513482011, | |
| "grad_norm": 3.441315238146844, | |
| "learning_rate": 5.538735345443573e-06, | |
| "loss": 0.7195, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.519853912502914, | |
| "grad_norm": 4.575454800944016, | |
| "learning_rate": 5.525249943015771e-06, | |
| "loss": 0.7499, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.5206309736576269, | |
| "grad_norm": 5.206336978319692, | |
| "learning_rate": 5.511760676042941e-06, | |
| "loss": 0.7462, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5214080348123398, | |
| "grad_norm": 2.782422183265534, | |
| "learning_rate": 5.498267643772842e-06, | |
| "loss": 0.6735, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.5221850959670526, | |
| "grad_norm": 4.799976665563157, | |
| "learning_rate": 5.484770945480935e-06, | |
| "loss": 0.7432, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.5229621571217655, | |
| "grad_norm": 3.68056618328099, | |
| "learning_rate": 5.471270680469656e-06, | |
| "loss": 0.7086, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.5237392182764784, | |
| "grad_norm": 4.337600776833273, | |
| "learning_rate": 5.457766948067682e-06, | |
| "loss": 0.6972, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.5245162794311913, | |
| "grad_norm": 2.9170786823925754, | |
| "learning_rate": 5.4442598476292e-06, | |
| "loss": 0.697, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.5252933405859042, | |
| "grad_norm": 3.389813065457727, | |
| "learning_rate": 5.430749478533182e-06, | |
| "loss": 0.6823, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.526070401740617, | |
| "grad_norm": 4.405810375053449, | |
| "learning_rate": 5.417235940182646e-06, | |
| "loss": 0.6954, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.5268474628953299, | |
| "grad_norm": 3.745948791175591, | |
| "learning_rate": 5.403719332003925e-06, | |
| "loss": 0.7129, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.5276245240500428, | |
| "grad_norm": 3.5759861354998095, | |
| "learning_rate": 5.390199753445945e-06, | |
| "loss": 0.7457, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.5284015852047557, | |
| "grad_norm": 3.133292740862389, | |
| "learning_rate": 5.376677303979481e-06, | |
| "loss": 0.716, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5291786463594685, | |
| "grad_norm": 3.6994792177101536, | |
| "learning_rate": 5.3631520830964335e-06, | |
| "loss": 0.7075, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.5299557075141814, | |
| "grad_norm": 4.2709254391755875, | |
| "learning_rate": 5.349624190309095e-06, | |
| "loss": 0.6646, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.5307327686688943, | |
| "grad_norm": 4.757235420288998, | |
| "learning_rate": 5.3360937251494145e-06, | |
| "loss": 0.7197, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.5315098298236072, | |
| "grad_norm": 3.970395562121448, | |
| "learning_rate": 5.322560787168266e-06, | |
| "loss": 0.7113, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.53228689097832, | |
| "grad_norm": 3.4076129510381636, | |
| "learning_rate": 5.30902547593472e-06, | |
| "loss": 0.7051, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.5330639521330328, | |
| "grad_norm": 4.69605182138137, | |
| "learning_rate": 5.29548789103531e-06, | |
| "loss": 0.7044, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.5338410132877457, | |
| "grad_norm": 3.804895971708535, | |
| "learning_rate": 5.281948132073293e-06, | |
| "loss": 0.7119, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.5346180744424586, | |
| "grad_norm": 3.6916149040278596, | |
| "learning_rate": 5.2684062986679245e-06, | |
| "loss": 0.7208, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.5353951355971714, | |
| "grad_norm": 3.0845852262650775, | |
| "learning_rate": 5.254862490453723e-06, | |
| "loss": 0.6855, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.5361721967518843, | |
| "grad_norm": 4.685912874705627, | |
| "learning_rate": 5.241316807079735e-06, | |
| "loss": 0.7176, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5369492579065972, | |
| "grad_norm": 2.9240144110586157, | |
| "learning_rate": 5.227769348208808e-06, | |
| "loss": 0.7158, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.5377263190613101, | |
| "grad_norm": 3.258492056259544, | |
| "learning_rate": 5.214220213516849e-06, | |
| "loss": 0.6492, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.538503380216023, | |
| "grad_norm": 4.273950085839226, | |
| "learning_rate": 5.200669502692092e-06, | |
| "loss": 0.6784, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.5392804413707358, | |
| "grad_norm": 2.6079076529513503, | |
| "learning_rate": 5.187117315434374e-06, | |
| "loss": 0.6969, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.5400575025254487, | |
| "grad_norm": 3.4737447133789847, | |
| "learning_rate": 5.173563751454393e-06, | |
| "loss": 0.7804, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.5408345636801616, | |
| "grad_norm": 4.786817720128349, | |
| "learning_rate": 5.160008910472971e-06, | |
| "loss": 0.6805, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.5416116248348745, | |
| "grad_norm": 3.7701770083150197, | |
| "learning_rate": 5.146452892220334e-06, | |
| "loss": 0.7214, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.5423886859895873, | |
| "grad_norm": 3.7554811031983344, | |
| "learning_rate": 5.132895796435363e-06, | |
| "loss": 0.6417, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.5431657471443002, | |
| "grad_norm": 3.5547381426364097, | |
| "learning_rate": 5.119337722864871e-06, | |
| "loss": 0.6636, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.5439428082990131, | |
| "grad_norm": 5.011611632534712, | |
| "learning_rate": 5.1057787712628645e-06, | |
| "loss": 0.6869, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.544719869453726, | |
| "grad_norm": 3.833252076719035, | |
| "learning_rate": 5.092219041389809e-06, | |
| "loss": 0.698, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.5454969306084388, | |
| "grad_norm": 3.94968001273636, | |
| "learning_rate": 5.0786586330118936e-06, | |
| "loss": 0.6499, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.5462739917631517, | |
| "grad_norm": 4.652418519560147, | |
| "learning_rate": 5.065097645900305e-06, | |
| "loss": 0.7365, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.5470510529178646, | |
| "grad_norm": 3.4688260249453333, | |
| "learning_rate": 5.051536179830485e-06, | |
| "loss": 0.7244, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.5478281140725775, | |
| "grad_norm": 3.507980085656876, | |
| "learning_rate": 5.0379743345814e-06, | |
| "loss": 0.6463, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.5486051752272904, | |
| "grad_norm": 4.08415517826481, | |
| "learning_rate": 5.024412209934806e-06, | |
| "loss": 0.7134, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.5493822363820032, | |
| "grad_norm": 3.1430434027718848, | |
| "learning_rate": 5.010849905674513e-06, | |
| "loss": 0.6646, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.5501592975367161, | |
| "grad_norm": 1.7398353080625177, | |
| "learning_rate": 4.997287521585657e-06, | |
| "loss": 0.6604, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.550936358691429, | |
| "grad_norm": 3.6616218145390356, | |
| "learning_rate": 4.983725157453956e-06, | |
| "loss": 0.6713, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.5517134198461419, | |
| "grad_norm": 3.811153246818418, | |
| "learning_rate": 4.9701629130649834e-06, | |
| "loss": 0.7095, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.5524904810008547, | |
| "grad_norm": 4.929016419712588, | |
| "learning_rate": 4.956600888203433e-06, | |
| "loss": 0.6714, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.5532675421555676, | |
| "grad_norm": 3.4541756616239927, | |
| "learning_rate": 4.943039182652383e-06, | |
| "loss": 0.7235, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.5540446033102805, | |
| "grad_norm": 4.095722371398238, | |
| "learning_rate": 4.929477896192561e-06, | |
| "loss": 0.8093, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.5548216644649934, | |
| "grad_norm": 4.870666395156222, | |
| "learning_rate": 4.915917128601611e-06, | |
| "loss": 0.7031, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.5555987256197062, | |
| "grad_norm": 3.448418758510041, | |
| "learning_rate": 4.902356979653361e-06, | |
| "loss": 0.7084, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.5563757867744191, | |
| "grad_norm": 3.829159584215915, | |
| "learning_rate": 4.8887975491170845e-06, | |
| "loss": 0.7181, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.557152847929132, | |
| "grad_norm": 3.555777208653401, | |
| "learning_rate": 4.875238936756774e-06, | |
| "loss": 0.6763, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.5579299090838449, | |
| "grad_norm": 2.5493937496001187, | |
| "learning_rate": 4.861681242330397e-06, | |
| "loss": 0.6756, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.5587069702385578, | |
| "grad_norm": 3.3198532718689813, | |
| "learning_rate": 4.84812456558917e-06, | |
| "loss": 0.6644, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.5594840313932706, | |
| "grad_norm": 3.829290955616477, | |
| "learning_rate": 4.834569006276823e-06, | |
| "loss": 0.6786, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.5602610925479835, | |
| "grad_norm": 2.592783541640363, | |
| "learning_rate": 4.821014664128859e-06, | |
| "loss": 0.7156, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.5610381537026964, | |
| "grad_norm": 4.188978510013467, | |
| "learning_rate": 4.807461638871835e-06, | |
| "loss": 0.7262, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.5618152148574093, | |
| "grad_norm": 3.069522579226053, | |
| "learning_rate": 4.79391003022261e-06, | |
| "loss": 0.6989, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.5625922760121221, | |
| "grad_norm": 4.039799899118001, | |
| "learning_rate": 4.780359937887625e-06, | |
| "loss": 0.6682, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.563369337166835, | |
| "grad_norm": 4.6623197649536126, | |
| "learning_rate": 4.766811461562163e-06, | |
| "loss": 0.6464, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.5641463983215479, | |
| "grad_norm": 5.438968217638661, | |
| "learning_rate": 4.753264700929619e-06, | |
| "loss": 0.6507, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.5649234594762608, | |
| "grad_norm": 4.0222533809812, | |
| "learning_rate": 4.739719755660761e-06, | |
| "loss": 0.7014, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.5657005206309736, | |
| "grad_norm": 4.058570524163514, | |
| "learning_rate": 4.726176725413004e-06, | |
| "loss": 0.693, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.5664775817856865, | |
| "grad_norm": 3.3787013409423445, | |
| "learning_rate": 4.712635709829672e-06, | |
| "loss": 0.6591, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.5672546429403994, | |
| "grad_norm": 3.3640659595948708, | |
| "learning_rate": 4.699096808539264e-06, | |
| "loss": 0.7431, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.5680317040951123, | |
| "grad_norm": 3.1238662551833616, | |
| "learning_rate": 4.685560121154729e-06, | |
| "loss": 0.6474, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.5688087652498252, | |
| "grad_norm": 2.452949406434516, | |
| "learning_rate": 4.672025747272721e-06, | |
| "loss": 0.6816, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.569585826404538, | |
| "grad_norm": 3.127308776747053, | |
| "learning_rate": 4.658493786472874e-06, | |
| "loss": 0.6741, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.5703628875592509, | |
| "grad_norm": 3.9891903397041455, | |
| "learning_rate": 4.644964338317069e-06, | |
| "loss": 0.7111, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.5711399487139638, | |
| "grad_norm": 3.495751965003335, | |
| "learning_rate": 4.631437502348697e-06, | |
| "loss": 0.6552, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.5719170098686767, | |
| "grad_norm": 3.436449484433345, | |
| "learning_rate": 4.617913378091935e-06, | |
| "loss": 0.6893, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.5726940710233895, | |
| "grad_norm": 3.0865849237950784, | |
| "learning_rate": 4.604392065051003e-06, | |
| "loss": 0.7376, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.5734711321781024, | |
| "grad_norm": 4.474788471571803, | |
| "learning_rate": 4.590873662709441e-06, | |
| "loss": 0.6914, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.5742481933328153, | |
| "grad_norm": 2.91533419260106, | |
| "learning_rate": 4.577358270529371e-06, | |
| "loss": 0.6414, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.5750252544875282, | |
| "grad_norm": 4.0797704361429785, | |
| "learning_rate": 4.5638459879507685e-06, | |
| "loss": 0.6661, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.575802315642241, | |
| "grad_norm": 4.709772893333078, | |
| "learning_rate": 4.550336914390734e-06, | |
| "loss": 0.6594, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.5765793767969539, | |
| "grad_norm": 4.564968479413114, | |
| "learning_rate": 4.536831149242752e-06, | |
| "loss": 0.6672, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.5773564379516668, | |
| "grad_norm": 4.056479158493849, | |
| "learning_rate": 4.5233287918759645e-06, | |
| "loss": 0.708, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.5781334991063797, | |
| "grad_norm": 3.645071188138108, | |
| "learning_rate": 4.509829941634447e-06, | |
| "loss": 0.686, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.5789105602610926, | |
| "grad_norm": 3.7318479118380044, | |
| "learning_rate": 4.496334697836466e-06, | |
| "loss": 0.6866, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.5796876214158054, | |
| "grad_norm": 3.6748150242674384, | |
| "learning_rate": 4.482843159773753e-06, | |
| "loss": 0.701, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.5804646825705183, | |
| "grad_norm": 3.532495775566941, | |
| "learning_rate": 4.46935542671078e-06, | |
| "loss": 0.6266, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.5812417437252312, | |
| "grad_norm": 3.917282093097207, | |
| "learning_rate": 4.455871597884016e-06, | |
| "loss": 0.6965, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.5820188048799441, | |
| "grad_norm": 3.541326700374132, | |
| "learning_rate": 4.4423917725012125e-06, | |
| "loss": 0.6256, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.5827958660346569, | |
| "grad_norm": 2.8073311337818088, | |
| "learning_rate": 4.428916049740657e-06, | |
| "loss": 0.5885, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.5835729271893698, | |
| "grad_norm": 3.374101386732686, | |
| "learning_rate": 4.41544452875046e-06, | |
| "loss": 0.6549, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.5843499883440827, | |
| "grad_norm": 4.325578617573067, | |
| "learning_rate": 4.401977308647811e-06, | |
| "loss": 0.6566, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.5851270494987956, | |
| "grad_norm": 4.915536833619769, | |
| "learning_rate": 4.38851448851826e-06, | |
| "loss": 0.6687, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.5859041106535084, | |
| "grad_norm": 3.6537787425693544, | |
| "learning_rate": 4.3750561674149815e-06, | |
| "loss": 0.6292, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.5866811718082213, | |
| "grad_norm": 2.9777148243481335, | |
| "learning_rate": 4.3616024443580475e-06, | |
| "loss": 0.6541, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.5874582329629342, | |
| "grad_norm": 3.5260018889623455, | |
| "learning_rate": 4.348153418333703e-06, | |
| "loss": 0.667, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 3.7174490457010654, | |
| "learning_rate": 4.334709188293631e-06, | |
| "loss": 0.6419, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.58901235527236, | |
| "grad_norm": 3.4684662206499355, | |
| "learning_rate": 4.321269853154231e-06, | |
| "loss": 0.65, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.5897894164270728, | |
| "grad_norm": 3.1882054970304083, | |
| "learning_rate": 4.307835511795883e-06, | |
| "loss": 0.622, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.5905664775817857, | |
| "grad_norm": 4.381319562804776, | |
| "learning_rate": 4.294406263062235e-06, | |
| "loss": 0.6422, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.5913435387364986, | |
| "grad_norm": 3.724730362444138, | |
| "learning_rate": 4.280982205759453e-06, | |
| "loss": 0.664, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.5921205998912115, | |
| "grad_norm": 3.2942646676430027, | |
| "learning_rate": 4.267563438655517e-06, | |
| "loss": 0.6834, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.5928976610459243, | |
| "grad_norm": 3.9059709080382445, | |
| "learning_rate": 4.254150060479479e-06, | |
| "loss": 0.6773, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.5936747222006372, | |
| "grad_norm": 3.2926775490538867, | |
| "learning_rate": 4.240742169920744e-06, | |
| "loss": 0.6612, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.5944517833553501, | |
| "grad_norm": 3.721480675397905, | |
| "learning_rate": 4.22733986562834e-06, | |
| "loss": 0.5946, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.595228844510063, | |
| "grad_norm": 3.6657313410284282, | |
| "learning_rate": 4.213943246210195e-06, | |
| "loss": 0.6839, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.5960059056647758, | |
| "grad_norm": 3.555216109953286, | |
| "learning_rate": 4.200552410232411e-06, | |
| "loss": 0.6839, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.5967829668194887, | |
| "grad_norm": 4.24437071856819, | |
| "learning_rate": 4.187167456218536e-06, | |
| "loss": 0.7096, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.5975600279742016, | |
| "grad_norm": 3.760444842640791, | |
| "learning_rate": 4.173788482648841e-06, | |
| "loss": 0.6495, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.5983370891289145, | |
| "grad_norm": 3.2749111360276086, | |
| "learning_rate": 4.1604155879595985e-06, | |
| "loss": 0.6266, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.5991141502836274, | |
| "grad_norm": 4.05061726263054, | |
| "learning_rate": 4.147048870542358e-06, | |
| "loss": 0.6682, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.5998912114383402, | |
| "grad_norm": 4.177296915658458, | |
| "learning_rate": 4.133688428743209e-06, | |
| "loss": 0.6504, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.6006682725930531, | |
| "grad_norm": 3.4374499956078997, | |
| "learning_rate": 4.120334360862078e-06, | |
| "loss": 0.6068, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.601445333747766, | |
| "grad_norm": 3.7771571359160374, | |
| "learning_rate": 4.106986765151992e-06, | |
| "loss": 0.6811, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.6022223949024789, | |
| "grad_norm": 2.755405096701383, | |
| "learning_rate": 4.093645739818357e-06, | |
| "loss": 0.6374, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.6029994560571917, | |
| "grad_norm": 4.718012688255332, | |
| "learning_rate": 4.080311383018239e-06, | |
| "loss": 0.7078, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.6037765172119046, | |
| "grad_norm": 2.894912540809299, | |
| "learning_rate": 4.06698379285964e-06, | |
| "loss": 0.6759, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.6045535783666175, | |
| "grad_norm": 3.025336800067562, | |
| "learning_rate": 4.0536630674007734e-06, | |
| "loss": 0.6109, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.6053306395213304, | |
| "grad_norm": 3.2614510795042126, | |
| "learning_rate": 4.040349304649351e-06, | |
| "loss": 0.685, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.6061077006760432, | |
| "grad_norm": 2.800252117497351, | |
| "learning_rate": 4.027042602561853e-06, | |
| "loss": 0.6498, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.6068847618307561, | |
| "grad_norm": 3.8460226274586122, | |
| "learning_rate": 4.013743059042808e-06, | |
| "loss": 0.6977, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.607661822985469, | |
| "grad_norm": 3.771896387641876, | |
| "learning_rate": 4.0004507719440795e-06, | |
| "loss": 0.6635, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.6084388841401819, | |
| "grad_norm": 3.1786304501140092, | |
| "learning_rate": 3.987165839064141e-06, | |
| "loss": 0.6758, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.6092159452948948, | |
| "grad_norm": 5.015425132509244, | |
| "learning_rate": 3.973888358147353e-06, | |
| "loss": 0.623, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.6099930064496076, | |
| "grad_norm": 4.27847425835873, | |
| "learning_rate": 3.9606184268832525e-06, | |
| "loss": 0.6758, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.6107700676043205, | |
| "grad_norm": 3.3936214832633507, | |
| "learning_rate": 3.947356142905827e-06, | |
| "loss": 0.6132, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.6115471287590334, | |
| "grad_norm": 2.5020153230654896, | |
| "learning_rate": 3.934101603792802e-06, | |
| "loss": 0.6084, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.6123241899137463, | |
| "grad_norm": 3.0348186320695936, | |
| "learning_rate": 3.920854907064912e-06, | |
| "loss": 0.6277, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.6131012510684591, | |
| "grad_norm": 4.926182627828219, | |
| "learning_rate": 3.907616150185205e-06, | |
| "loss": 0.6746, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.613878312223172, | |
| "grad_norm": 4.0423507052637735, | |
| "learning_rate": 3.894385430558297e-06, | |
| "loss": 0.6112, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.6146553733778849, | |
| "grad_norm": 3.549727749823181, | |
| "learning_rate": 3.881162845529678e-06, | |
| "loss": 0.6219, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.6154324345325977, | |
| "grad_norm": 4.713227361162499, | |
| "learning_rate": 3.867948492384983e-06, | |
| "loss": 0.6693, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.6162094956873105, | |
| "grad_norm": 3.471848373352376, | |
| "learning_rate": 3.854742468349283e-06, | |
| "loss": 0.6833, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.6169865568420234, | |
| "grad_norm": 7.217595191023394, | |
| "learning_rate": 3.841544870586369e-06, | |
| "loss": 0.6947, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.6177636179967363, | |
| "grad_norm": 2.9040989631629976, | |
| "learning_rate": 3.828355796198029e-06, | |
| "loss": 0.6342, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.6185406791514492, | |
| "grad_norm": 3.7080878359935268, | |
| "learning_rate": 3.815175342223349e-06, | |
| "loss": 0.6267, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.619317740306162, | |
| "grad_norm": 4.731993499154974, | |
| "learning_rate": 3.80200360563798e-06, | |
| "loss": 0.6319, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.6200948014608749, | |
| "grad_norm": 3.2422107203395267, | |
| "learning_rate": 3.7888406833534447e-06, | |
| "loss": 0.6219, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.6208718626155878, | |
| "grad_norm": 2.7384103955014565, | |
| "learning_rate": 3.7756866722164055e-06, | |
| "loss": 0.6304, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.6216489237703007, | |
| "grad_norm": 4.934854236839532, | |
| "learning_rate": 3.7625416690079674e-06, | |
| "loss": 0.5913, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6224259849250136, | |
| "grad_norm": 5.278185394532136, | |
| "learning_rate": 3.749405770442954e-06, | |
| "loss": 0.6062, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.6232030460797264, | |
| "grad_norm": 3.745775463675437, | |
| "learning_rate": 3.7362790731692045e-06, | |
| "loss": 0.5785, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.6239801072344393, | |
| "grad_norm": 3.0793776700444893, | |
| "learning_rate": 3.7231616737668587e-06, | |
| "loss": 0.6212, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.6247571683891522, | |
| "grad_norm": 4.616140309647705, | |
| "learning_rate": 3.710053668747644e-06, | |
| "loss": 0.6978, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.6255342295438651, | |
| "grad_norm": 2.266055763696263, | |
| "learning_rate": 3.696955154554174e-06, | |
| "loss": 0.6677, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.6263112906985779, | |
| "grad_norm": 3.167710349649831, | |
| "learning_rate": 3.6838662275592285e-06, | |
| "loss": 0.5961, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.6270883518532908, | |
| "grad_norm": 3.6679021169417583, | |
| "learning_rate": 3.670786984065049e-06, | |
| "loss": 0.5932, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.6278654130080037, | |
| "grad_norm": 4.807394417840595, | |
| "learning_rate": 3.657717520302635e-06, | |
| "loss": 0.6507, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.6286424741627166, | |
| "grad_norm": 2.8567195928058697, | |
| "learning_rate": 3.6446579324310283e-06, | |
| "loss": 0.5622, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.6294195353174294, | |
| "grad_norm": 4.87655399348002, | |
| "learning_rate": 3.6316083165366066e-06, | |
| "loss": 0.6807, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.6301965964721423, | |
| "grad_norm": 3.7014748147970886, | |
| "learning_rate": 3.61856876863238e-06, | |
| "loss": 0.6127, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.6309736576268552, | |
| "grad_norm": 3.9766985471750482, | |
| "learning_rate": 3.6055393846572863e-06, | |
| "loss": 0.6355, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.6317507187815681, | |
| "grad_norm": 5.176163354598203, | |
| "learning_rate": 3.592520260475474e-06, | |
| "loss": 0.5764, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.632527779936281, | |
| "grad_norm": 3.3915897413256273, | |
| "learning_rate": 3.579511491875614e-06, | |
| "loss": 0.5824, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.6333048410909938, | |
| "grad_norm": 2.968301217496569, | |
| "learning_rate": 3.5665131745701796e-06, | |
| "loss": 0.6927, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.6340819022457067, | |
| "grad_norm": 3.4049937558114367, | |
| "learning_rate": 3.5535254041947487e-06, | |
| "loss": 0.6589, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.6348589634004196, | |
| "grad_norm": 3.0490199659476223, | |
| "learning_rate": 3.5405482763073006e-06, | |
| "loss": 0.6264, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.6356360245551325, | |
| "grad_norm": 4.610543482084557, | |
| "learning_rate": 3.5275818863875176e-06, | |
| "loss": 0.6298, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.6364130857098453, | |
| "grad_norm": 3.792284286942197, | |
| "learning_rate": 3.5146263298360676e-06, | |
| "loss": 0.6409, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.6371901468645582, | |
| "grad_norm": 4.791463361046891, | |
| "learning_rate": 3.501681701973917e-06, | |
| "loss": 0.5988, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.6379672080192711, | |
| "grad_norm": 2.946227557833364, | |
| "learning_rate": 3.488748098041623e-06, | |
| "loss": 0.56, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 0.638744269173984, | |
| "grad_norm": 3.9143118513649013, | |
| "learning_rate": 3.4758256131986333e-06, | |
| "loss": 0.6102, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.6395213303286968, | |
| "grad_norm": 7.013871477575305, | |
| "learning_rate": 3.4629143425225893e-06, | |
| "loss": 0.6887, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 0.6402983914834097, | |
| "grad_norm": 3.771798826744058, | |
| "learning_rate": 3.4500143810086194e-06, | |
| "loss": 0.6373, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.6410754526381226, | |
| "grad_norm": 3.132474576222066, | |
| "learning_rate": 3.437125823568646e-06, | |
| "loss": 0.6452, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.6418525137928355, | |
| "grad_norm": 4.0341361359246, | |
| "learning_rate": 3.4242487650306867e-06, | |
| "loss": 0.65, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.6426295749475484, | |
| "grad_norm": 3.489817034481266, | |
| "learning_rate": 3.4113833001381575e-06, | |
| "loss": 0.6041, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 0.6434066361022612, | |
| "grad_norm": 4.207948013742414, | |
| "learning_rate": 3.398529523549169e-06, | |
| "loss": 0.6047, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.6441836972569741, | |
| "grad_norm": 3.300977059658827, | |
| "learning_rate": 3.3856875298358365e-06, | |
| "loss": 0.6619, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 0.644960758411687, | |
| "grad_norm": 3.8241041070180413, | |
| "learning_rate": 3.3728574134835846e-06, | |
| "loss": 0.6198, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.6457378195663999, | |
| "grad_norm": 3.875014176616493, | |
| "learning_rate": 3.360039268890446e-06, | |
| "loss": 0.6003, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.6465148807211127, | |
| "grad_norm": 3.2752573740495556, | |
| "learning_rate": 3.347233190366375e-06, | |
| "loss": 0.6101, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.6472919418758256, | |
| "grad_norm": 3.8745882003993177, | |
| "learning_rate": 3.3344392721325458e-06, | |
| "loss": 0.6248, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 0.6480690030305385, | |
| "grad_norm": 2.942894246587158, | |
| "learning_rate": 3.3216576083206637e-06, | |
| "loss": 0.6087, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.6488460641852514, | |
| "grad_norm": 2.990495379975504, | |
| "learning_rate": 3.308888292972273e-06, | |
| "loss": 0.5888, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.6496231253399642, | |
| "grad_norm": 3.376642101090337, | |
| "learning_rate": 3.2961314200380616e-06, | |
| "loss": 0.637, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.6504001864946771, | |
| "grad_norm": 3.4092448553804156, | |
| "learning_rate": 3.2833870833771753e-06, | |
| "loss": 0.6105, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.65117724764939, | |
| "grad_norm": 5.292717322884515, | |
| "learning_rate": 3.270655376756521e-06, | |
| "loss": 0.579, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.6519543088041029, | |
| "grad_norm": 3.7225346348995982, | |
| "learning_rate": 3.25793639385008e-06, | |
| "loss": 0.6072, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 0.6527313699588158, | |
| "grad_norm": 3.656912994279593, | |
| "learning_rate": 3.2452302282382185e-06, | |
| "loss": 0.5656, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.6535084311135286, | |
| "grad_norm": 5.191851471827204, | |
| "learning_rate": 3.232536973407e-06, | |
| "loss": 0.6353, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.6542854922682415, | |
| "grad_norm": 4.5342622406097135, | |
| "learning_rate": 3.2198567227474954e-06, | |
| "loss": 0.6239, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.6550625534229544, | |
| "grad_norm": 3.2997906214128507, | |
| "learning_rate": 3.207189569555096e-06, | |
| "loss": 0.6493, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.6558396145776673, | |
| "grad_norm": 3.7417655823104092, | |
| "learning_rate": 3.194535607028832e-06, | |
| "loss": 0.5765, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.6566166757323801, | |
| "grad_norm": 4.1174225350073685, | |
| "learning_rate": 3.1818949282706764e-06, | |
| "loss": 0.584, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.657393736887093, | |
| "grad_norm": 5.288074659352862, | |
| "learning_rate": 3.1692676262848732e-06, | |
| "loss": 0.5846, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.6581707980418059, | |
| "grad_norm": 6.8794935144127285, | |
| "learning_rate": 3.1566537939772433e-06, | |
| "loss": 0.6164, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 0.6589478591965188, | |
| "grad_norm": 3.369610724208555, | |
| "learning_rate": 3.1440535241545035e-06, | |
| "loss": 0.5667, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.6597249203512316, | |
| "grad_norm": 2.700055960128087, | |
| "learning_rate": 3.131466909523582e-06, | |
| "loss": 0.5729, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.6605019815059445, | |
| "grad_norm": 4.481552377327523, | |
| "learning_rate": 3.118894042690945e-06, | |
| "loss": 0.5639, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.6612790426606574, | |
| "grad_norm": 5.130216388568981, | |
| "learning_rate": 3.1063350161619025e-06, | |
| "loss": 0.5904, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 0.6620561038153703, | |
| "grad_norm": 4.00502225199317, | |
| "learning_rate": 3.093789922339936e-06, | |
| "loss": 0.5998, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.6628331649700832, | |
| "grad_norm": 3.774461462354705, | |
| "learning_rate": 3.081258853526018e-06, | |
| "loss": 0.5886, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 0.663610226124796, | |
| "grad_norm": 2.821168583180078, | |
| "learning_rate": 3.0687419019179285e-06, | |
| "loss": 0.6011, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.6643872872795089, | |
| "grad_norm": 4.63573425963788, | |
| "learning_rate": 3.0562391596095833e-06, | |
| "loss": 0.61, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.6651643484342218, | |
| "grad_norm": 4.151701829585363, | |
| "learning_rate": 3.0437507185903516e-06, | |
| "loss": 0.6334, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.6659414095889347, | |
| "grad_norm": 3.1823244853803097, | |
| "learning_rate": 3.0312766707443784e-06, | |
| "loss": 0.6492, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 0.6667184707436475, | |
| "grad_norm": 3.494168616800063, | |
| "learning_rate": 3.0188171078499117e-06, | |
| "loss": 0.6293, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.6674955318983604, | |
| "grad_norm": 3.007455561802234, | |
| "learning_rate": 3.0063721215786274e-06, | |
| "loss": 0.6125, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 0.6682725930530733, | |
| "grad_norm": 4.328591303423522, | |
| "learning_rate": 2.99394180349495e-06, | |
| "loss": 0.6152, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.6690496542077862, | |
| "grad_norm": 3.0920402812840413, | |
| "learning_rate": 2.981526245055387e-06, | |
| "loss": 0.5768, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.669826715362499, | |
| "grad_norm": 2.9353592413440155, | |
| "learning_rate": 2.9691255376078464e-06, | |
| "loss": 0.542, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.6706037765172119, | |
| "grad_norm": 3.882400088723547, | |
| "learning_rate": 2.9567397723909725e-06, | |
| "loss": 0.519, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 0.6713808376719248, | |
| "grad_norm": 4.783097703300002, | |
| "learning_rate": 2.944369040533471e-06, | |
| "loss": 0.6396, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.6721578988266377, | |
| "grad_norm": 4.770262430972376, | |
| "learning_rate": 2.9320134330534367e-06, | |
| "loss": 0.6385, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.6729349599813506, | |
| "grad_norm": 3.1574059447890486, | |
| "learning_rate": 2.919673040857693e-06, | |
| "loss": 0.5935, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.6737120211360634, | |
| "grad_norm": 3.945392779400959, | |
| "learning_rate": 2.9073479547411087e-06, | |
| "loss": 0.6041, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.6744890822907763, | |
| "grad_norm": 3.834570241650989, | |
| "learning_rate": 2.89503826538594e-06, | |
| "loss": 0.5603, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.6752661434454892, | |
| "grad_norm": 3.322325574324924, | |
| "learning_rate": 2.882744063361165e-06, | |
| "loss": 0.5839, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 0.6760432046002021, | |
| "grad_norm": 5.400737978025128, | |
| "learning_rate": 2.870465439121807e-06, | |
| "loss": 0.6, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.6768202657549149, | |
| "grad_norm": 3.7907802256324614, | |
| "learning_rate": 2.8582024830082796e-06, | |
| "loss": 0.6255, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 0.6775973269096278, | |
| "grad_norm": 3.912677923882123, | |
| "learning_rate": 2.845955285245715e-06, | |
| "loss": 0.5545, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.6783743880643407, | |
| "grad_norm": 4.941243247209147, | |
| "learning_rate": 2.833723935943301e-06, | |
| "loss": 0.5684, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.6791514492190536, | |
| "grad_norm": 3.289971837418658, | |
| "learning_rate": 2.821508525093627e-06, | |
| "loss": 0.6519, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.6799285103737664, | |
| "grad_norm": 3.939920814084507, | |
| "learning_rate": 2.8093091425720097e-06, | |
| "loss": 0.6229, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.6807055715284793, | |
| "grad_norm": 4.336532929599707, | |
| "learning_rate": 2.797125878135837e-06, | |
| "loss": 0.5641, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.6814826326831922, | |
| "grad_norm": 3.322566385669406, | |
| "learning_rate": 2.784958821423907e-06, | |
| "loss": 0.6232, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 0.6822596938379051, | |
| "grad_norm": 4.200430984375038, | |
| "learning_rate": 2.7728080619557702e-06, | |
| "loss": 0.5977, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.683036754992618, | |
| "grad_norm": 3.740176445426232, | |
| "learning_rate": 2.760673689131068e-06, | |
| "loss": 0.6185, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.6838138161473308, | |
| "grad_norm": 2.1066076609366613, | |
| "learning_rate": 2.7485557922288776e-06, | |
| "loss": 0.6274, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.6845908773020437, | |
| "grad_norm": 2.8053182283923213, | |
| "learning_rate": 2.736454460407055e-06, | |
| "loss": 0.6181, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 0.6853679384567566, | |
| "grad_norm": 3.437087088984394, | |
| "learning_rate": 2.724369782701578e-06, | |
| "loss": 0.621, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.6861449996114695, | |
| "grad_norm": 3.0623391960294595, | |
| "learning_rate": 2.7123018480258876e-06, | |
| "loss": 0.5441, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 0.6869220607661823, | |
| "grad_norm": 4.447855889156802, | |
| "learning_rate": 2.7002507451702394e-06, | |
| "loss": 0.5498, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.6876991219208952, | |
| "grad_norm": 3.328238936470799, | |
| "learning_rate": 2.688216562801052e-06, | |
| "loss": 0.5992, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.6884761830756081, | |
| "grad_norm": 4.421506555636393, | |
| "learning_rate": 2.6761993894602444e-06, | |
| "loss": 0.5945, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.689253244230321, | |
| "grad_norm": 5.322591815355897, | |
| "learning_rate": 2.664199313564598e-06, | |
| "loss": 0.5958, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 0.6900303053850338, | |
| "grad_norm": 3.7611828384663393, | |
| "learning_rate": 2.652216423405093e-06, | |
| "loss": 0.5645, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.6908073665397467, | |
| "grad_norm": 3.3085304945194176, | |
| "learning_rate": 2.6402508071462685e-06, | |
| "loss": 0.5821, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 0.6915844276944596, | |
| "grad_norm": 4.5103793305482105, | |
| "learning_rate": 2.6283025528255685e-06, | |
| "loss": 0.6111, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.6923614888491725, | |
| "grad_norm": 3.2568624242920623, | |
| "learning_rate": 2.6163717483526953e-06, | |
| "loss": 0.5546, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.6931385500038854, | |
| "grad_norm": 2.973519357151336, | |
| "learning_rate": 2.6044584815089667e-06, | |
| "loss": 0.5685, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.6939156111585982, | |
| "grad_norm": 3.5837020468987166, | |
| "learning_rate": 2.592562839946664e-06, | |
| "loss": 0.5456, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 0.6946926723133111, | |
| "grad_norm": 4.064184411405787, | |
| "learning_rate": 2.5806849111883913e-06, | |
| "loss": 0.559, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.695469733468024, | |
| "grad_norm": 3.3437426814478406, | |
| "learning_rate": 2.56882478262643e-06, | |
| "loss": 0.5538, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.6962467946227369, | |
| "grad_norm": 3.107677218552789, | |
| "learning_rate": 2.556982541522094e-06, | |
| "loss": 0.5383, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.6970238557774496, | |
| "grad_norm": 2.882272796253547, | |
| "learning_rate": 2.5451582750050896e-06, | |
| "loss": 0.5698, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.6978009169321625, | |
| "grad_norm": 3.2190081599711164, | |
| "learning_rate": 2.5333520700728793e-06, | |
| "loss": 0.5581, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.6985779780868754, | |
| "grad_norm": 4.12751667992376, | |
| "learning_rate": 2.521564013590031e-06, | |
| "loss": 0.5334, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.6993550392415883, | |
| "grad_norm": 4.145588694570731, | |
| "learning_rate": 2.509794192287588e-06, | |
| "loss": 0.561, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7001321003963011, | |
| "grad_norm": 3.155212860949128, | |
| "learning_rate": 2.498042692762426e-06, | |
| "loss": 0.5418, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 0.700909161551014, | |
| "grad_norm": 3.2632869764204897, | |
| "learning_rate": 2.4863096014766193e-06, | |
| "loss": 0.5411, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.7016862227057269, | |
| "grad_norm": 4.001715026222935, | |
| "learning_rate": 2.474595004756799e-06, | |
| "loss": 0.5589, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 0.7024632838604398, | |
| "grad_norm": 3.3415316677677325, | |
| "learning_rate": 2.4628989887935266e-06, | |
| "loss": 0.537, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.7032403450151526, | |
| "grad_norm": 5.797689446433965, | |
| "learning_rate": 2.4512216396406552e-06, | |
| "loss": 0.6243, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.7040174061698655, | |
| "grad_norm": 4.284101589916973, | |
| "learning_rate": 2.4395630432146926e-06, | |
| "loss": 0.5817, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.7047944673245784, | |
| "grad_norm": 3.211724547014886, | |
| "learning_rate": 2.427923285294174e-06, | |
| "loss": 0.5788, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 0.7055715284792913, | |
| "grad_norm": 3.2055910232947085, | |
| "learning_rate": 2.4163024515190293e-06, | |
| "loss": 0.5311, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.7063485896340042, | |
| "grad_norm": 4.255051995836248, | |
| "learning_rate": 2.4047006273899527e-06, | |
| "loss": 0.5713, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 0.707125650788717, | |
| "grad_norm": 4.597394692328588, | |
| "learning_rate": 2.393117898267779e-06, | |
| "loss": 0.6031, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.7079027119434299, | |
| "grad_norm": 3.2150862347569933, | |
| "learning_rate": 2.3815543493728454e-06, | |
| "loss": 0.5594, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 0.7086797730981428, | |
| "grad_norm": 4.683878110698539, | |
| "learning_rate": 2.370010065784372e-06, | |
| "loss": 0.5461, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.7094568342528557, | |
| "grad_norm": 4.033438486304492, | |
| "learning_rate": 2.358485132439831e-06, | |
| "loss": 0.5815, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 0.7102338954075685, | |
| "grad_norm": 3.3703523652063168, | |
| "learning_rate": 2.3469796341343315e-06, | |
| "loss": 0.5247, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 0.7110109565622814, | |
| "grad_norm": 4.325956291425198, | |
| "learning_rate": 2.33549365551998e-06, | |
| "loss": 0.5387, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.7117880177169943, | |
| "grad_norm": 2.490947555344077, | |
| "learning_rate": 2.3240272811052738e-06, | |
| "loss": 0.5776, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 0.7125650788717072, | |
| "grad_norm": 4.949535189967038, | |
| "learning_rate": 2.3125805952544666e-06, | |
| "loss": 0.5842, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 0.71334214002642, | |
| "grad_norm": 3.670543908233672, | |
| "learning_rate": 2.301153682186954e-06, | |
| "loss": 0.53, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.7141192011811329, | |
| "grad_norm": 4.866130796619525, | |
| "learning_rate": 2.289746625976653e-06, | |
| "loss": 0.5681, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 0.7148962623358458, | |
| "grad_norm": 3.4112599844471467, | |
| "learning_rate": 2.2783595105513832e-06, | |
| "loss": 0.5575, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.7156733234905587, | |
| "grad_norm": 3.844471466545408, | |
| "learning_rate": 2.266992419692247e-06, | |
| "loss": 0.5716, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 0.7164503846452716, | |
| "grad_norm": 3.3046961399811474, | |
| "learning_rate": 2.2556454370330195e-06, | |
| "loss": 0.5431, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 0.7172274457999844, | |
| "grad_norm": 2.960816022759597, | |
| "learning_rate": 2.2443186460595277e-06, | |
| "loss": 0.5502, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 0.7180045069546973, | |
| "grad_norm": 3.7931643481456794, | |
| "learning_rate": 2.2330121301090362e-06, | |
| "loss": 0.5844, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.7187815681094102, | |
| "grad_norm": 3.4283490865176853, | |
| "learning_rate": 2.221725972369635e-06, | |
| "loss": 0.5568, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.7195586292641231, | |
| "grad_norm": 4.3583902590026895, | |
| "learning_rate": 2.210460255879629e-06, | |
| "loss": 0.5173, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 0.7203356904188359, | |
| "grad_norm": 3.653581931257441, | |
| "learning_rate": 2.1992150635269233e-06, | |
| "loss": 0.5229, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 0.7211127515735488, | |
| "grad_norm": 4.770502864647989, | |
| "learning_rate": 2.187990478048423e-06, | |
| "loss": 0.5761, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.7218898127282617, | |
| "grad_norm": 3.878473847618142, | |
| "learning_rate": 2.1767865820294093e-06, | |
| "loss": 0.4937, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 0.7226668738829746, | |
| "grad_norm": 3.9771101901252157, | |
| "learning_rate": 2.165603457902945e-06, | |
| "loss": 0.5237, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.7234439350376874, | |
| "grad_norm": 3.533717896030411, | |
| "learning_rate": 2.1544411879492597e-06, | |
| "loss": 0.5743, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 0.7242209961924003, | |
| "grad_norm": 3.65695725762207, | |
| "learning_rate": 2.143299854295149e-06, | |
| "loss": 0.5824, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 0.7249980573471132, | |
| "grad_norm": 2.492214523438049, | |
| "learning_rate": 2.13217953891337e-06, | |
| "loss": 0.5274, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 0.7257751185018261, | |
| "grad_norm": 3.386138297909339, | |
| "learning_rate": 2.121080323622038e-06, | |
| "loss": 0.5612, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 0.726552179656539, | |
| "grad_norm": 3.9436014142777096, | |
| "learning_rate": 2.1100022900840208e-06, | |
| "loss": 0.5317, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.7273292408112518, | |
| "grad_norm": 4.412376927983859, | |
| "learning_rate": 2.0989455198063415e-06, | |
| "loss": 0.574, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.7281063019659647, | |
| "grad_norm": 2.3279248382650737, | |
| "learning_rate": 2.0879100941395787e-06, | |
| "loss": 0.5289, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 0.7288833631206776, | |
| "grad_norm": 4.637433311164565, | |
| "learning_rate": 2.076896094277265e-06, | |
| "loss": 0.5622, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 0.7296604242753905, | |
| "grad_norm": 4.904954853760184, | |
| "learning_rate": 2.065903601255297e-06, | |
| "loss": 0.5176, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 0.7304374854301033, | |
| "grad_norm": 3.729037710128586, | |
| "learning_rate": 2.0549326959513287e-06, | |
| "loss": 0.5315, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.7312145465848162, | |
| "grad_norm": 3.5966860873794966, | |
| "learning_rate": 2.0439834590841833e-06, | |
| "loss": 0.5177, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 0.7319916077395291, | |
| "grad_norm": 4.464459321144577, | |
| "learning_rate": 2.0330559712132614e-06, | |
| "loss": 0.5484, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.732768668894242, | |
| "grad_norm": 4.589314499941277, | |
| "learning_rate": 2.022150312737939e-06, | |
| "loss": 0.5467, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 0.7335457300489548, | |
| "grad_norm": 4.017841935745773, | |
| "learning_rate": 2.0112665638969842e-06, | |
| "loss": 0.5266, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 0.7343227912036677, | |
| "grad_norm": 3.0931816369991703, | |
| "learning_rate": 2.0004048047679624e-06, | |
| "loss": 0.5767, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.7350998523583806, | |
| "grad_norm": 4.495169108132031, | |
| "learning_rate": 1.9895651152666538e-06, | |
| "loss": 0.5613, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 0.7358769135130935, | |
| "grad_norm": 4.1470825704755, | |
| "learning_rate": 1.978747575146455e-06, | |
| "loss": 0.5111, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 0.7366539746678064, | |
| "grad_norm": 4.197560473624663, | |
| "learning_rate": 1.967952263997801e-06, | |
| "loss": 0.5538, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.7374310358225192, | |
| "grad_norm": 3.7319528048077246, | |
| "learning_rate": 1.9571792612475747e-06, | |
| "loss": 0.5741, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 0.7382080969772321, | |
| "grad_norm": 5.01956999231008, | |
| "learning_rate": 1.9464286461585223e-06, | |
| "loss": 0.5357, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.738985158131945, | |
| "grad_norm": 3.7344522235830264, | |
| "learning_rate": 1.9357004978286777e-06, | |
| "loss": 0.5369, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 0.7397622192866579, | |
| "grad_norm": 5.534900941588667, | |
| "learning_rate": 1.924994895190772e-06, | |
| "loss": 0.547, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 0.7405392804413707, | |
| "grad_norm": 3.544511900994509, | |
| "learning_rate": 1.9143119170116534e-06, | |
| "loss": 0.5365, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 0.7413163415960836, | |
| "grad_norm": 3.617025368147638, | |
| "learning_rate": 1.9036516418917128e-06, | |
| "loss": 0.576, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.7420934027507965, | |
| "grad_norm": 2.717825183803928, | |
| "learning_rate": 1.8930141482643005e-06, | |
| "loss": 0.5528, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.7428704639055094, | |
| "grad_norm": 3.8576185713414732, | |
| "learning_rate": 1.88239951439515e-06, | |
| "loss": 0.5505, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 0.7436475250602222, | |
| "grad_norm": 5.360570148700179, | |
| "learning_rate": 1.8718078183818094e-06, | |
| "loss": 0.547, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 0.7444245862149351, | |
| "grad_norm": 3.9702986251974126, | |
| "learning_rate": 1.8612391381530548e-06, | |
| "loss": 0.5361, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 0.745201647369648, | |
| "grad_norm": 4.210077667591901, | |
| "learning_rate": 1.8506935514683244e-06, | |
| "loss": 0.5558, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 0.7459787085243609, | |
| "grad_norm": 4.27553292233449, | |
| "learning_rate": 1.8401711359171438e-06, | |
| "loss": 0.5406, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.7467557696790738, | |
| "grad_norm": 5.023769063952561, | |
| "learning_rate": 1.82967196891856e-06, | |
| "loss": 0.5345, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 0.7475328308337866, | |
| "grad_norm": 3.7148918067051353, | |
| "learning_rate": 1.819196127720565e-06, | |
| "loss": 0.5417, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 0.7483098919884995, | |
| "grad_norm": 4.636272948323283, | |
| "learning_rate": 1.808743689399528e-06, | |
| "loss": 0.5792, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 0.7490869531432124, | |
| "grad_norm": 3.103713105912325, | |
| "learning_rate": 1.798314730859637e-06, | |
| "loss": 0.5527, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 0.7498640142979253, | |
| "grad_norm": 3.204765078923141, | |
| "learning_rate": 1.787909328832323e-06, | |
| "loss": 0.5491, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.7506410754526381, | |
| "grad_norm": 4.894522393499138, | |
| "learning_rate": 1.7775275598756974e-06, | |
| "loss": 0.5553, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.751418136607351, | |
| "grad_norm": 3.428628239034369, | |
| "learning_rate": 1.7671695003739935e-06, | |
| "loss": 0.5143, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 0.7521951977620639, | |
| "grad_norm": 4.535044446134579, | |
| "learning_rate": 1.7568352265369987e-06, | |
| "loss": 0.5291, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 0.7529722589167768, | |
| "grad_norm": 4.546057980769502, | |
| "learning_rate": 1.7465248143995011e-06, | |
| "loss": 0.5271, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 0.7537493200714896, | |
| "grad_norm": 3.6725535134363785, | |
| "learning_rate": 1.7362383398207189e-06, | |
| "loss": 0.5665, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.7545263812262025, | |
| "grad_norm": 3.3515951674477793, | |
| "learning_rate": 1.725975878483757e-06, | |
| "loss": 0.5282, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 0.7553034423809154, | |
| "grad_norm": 4.187132180488078, | |
| "learning_rate": 1.7157375058950349e-06, | |
| "loss": 0.5572, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.7560805035356283, | |
| "grad_norm": 3.013413844455128, | |
| "learning_rate": 1.705523297383741e-06, | |
| "loss": 0.5502, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 0.7568575646903412, | |
| "grad_norm": 4.034990404281864, | |
| "learning_rate": 1.6953333281012745e-06, | |
| "loss": 0.5557, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 0.757634625845054, | |
| "grad_norm": 4.2869070311052475, | |
| "learning_rate": 1.6851676730206978e-06, | |
| "loss": 0.5067, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.7584116869997669, | |
| "grad_norm": 2.55851587794808, | |
| "learning_rate": 1.6750264069361755e-06, | |
| "loss": 0.521, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 0.7591887481544798, | |
| "grad_norm": 3.860783467248806, | |
| "learning_rate": 1.664909604462432e-06, | |
| "loss": 0.5162, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 0.7599658093091927, | |
| "grad_norm": 2.964535685167722, | |
| "learning_rate": 1.6548173400341988e-06, | |
| "loss": 0.4662, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.7607428704639055, | |
| "grad_norm": 4.5148211810505, | |
| "learning_rate": 1.6447496879056667e-06, | |
| "loss": 0.5326, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 0.7615199316186184, | |
| "grad_norm": 2.9731810276505595, | |
| "learning_rate": 1.6347067221499441e-06, | |
| "loss": 0.5221, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.7622969927733313, | |
| "grad_norm": 4.225015592243322, | |
| "learning_rate": 1.6246885166585081e-06, | |
| "loss": 0.5404, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 0.7630740539280442, | |
| "grad_norm": 4.195775975703309, | |
| "learning_rate": 1.6146951451406583e-06, | |
| "loss": 0.4837, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 0.763851115082757, | |
| "grad_norm": 2.77408092127348, | |
| "learning_rate": 1.604726681122979e-06, | |
| "loss": 0.4849, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 0.7646281762374699, | |
| "grad_norm": 4.215861830136612, | |
| "learning_rate": 1.5947831979487966e-06, | |
| "loss": 0.5925, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.7654052373921828, | |
| "grad_norm": 3.1030479659610393, | |
| "learning_rate": 1.5848647687776397e-06, | |
| "loss": 0.5019, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.7661822985468957, | |
| "grad_norm": 3.926045471634979, | |
| "learning_rate": 1.574971466584701e-06, | |
| "loss": 0.5124, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.7669593597016086, | |
| "grad_norm": 4.015070211236076, | |
| "learning_rate": 1.5651033641603041e-06, | |
| "loss": 0.5314, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 0.7677364208563214, | |
| "grad_norm": 3.649601860518483, | |
| "learning_rate": 1.555260534109359e-06, | |
| "loss": 0.5089, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 0.7685134820110343, | |
| "grad_norm": 3.604893647217938, | |
| "learning_rate": 1.5454430488508359e-06, | |
| "loss": 0.5472, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 0.7692905431657472, | |
| "grad_norm": 3.3095117069291624, | |
| "learning_rate": 1.5356509806172315e-06, | |
| "loss": 0.5168, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.7700676043204601, | |
| "grad_norm": 3.8970071625899445, | |
| "learning_rate": 1.525884401454033e-06, | |
| "loss": 0.5485, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 0.7708446654751729, | |
| "grad_norm": 2.80658001169654, | |
| "learning_rate": 1.5161433832191902e-06, | |
| "loss": 0.5044, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.7716217266298858, | |
| "grad_norm": 3.1868297865512214, | |
| "learning_rate": 1.5064279975825923e-06, | |
| "loss": 0.4934, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 0.7723987877845987, | |
| "grad_norm": 3.0425811492999366, | |
| "learning_rate": 1.4967383160255316e-06, | |
| "loss": 0.5183, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 0.7731758489393116, | |
| "grad_norm": 4.54933754793044, | |
| "learning_rate": 1.4870744098401819e-06, | |
| "loss": 0.5306, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.7739529100940244, | |
| "grad_norm": 3.931701576666515, | |
| "learning_rate": 1.4774363501290755e-06, | |
| "loss": 0.5415, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.7747299712487373, | |
| "grad_norm": 3.282020379585411, | |
| "learning_rate": 1.4678242078045756e-06, | |
| "loss": 0.5421, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 0.7755070324034502, | |
| "grad_norm": 3.2735246508623366, | |
| "learning_rate": 1.4582380535883622e-06, | |
| "loss": 0.5452, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 0.7762840935581631, | |
| "grad_norm": 3.2961538894269067, | |
| "learning_rate": 1.4486779580109012e-06, | |
| "loss": 0.5254, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 0.777061154712876, | |
| "grad_norm": 4.499334024075413, | |
| "learning_rate": 1.4391439914109367e-06, | |
| "loss": 0.4899, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.777061154712876, | |
| "eval_loss": 0.5171714425086975, | |
| "eval_runtime": 472.4039, | |
| "eval_samples_per_second": 22.94, | |
| "eval_steps_per_second": 2.868, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.7778382158675888, | |
| "grad_norm": 4.012283871593952, | |
| "learning_rate": 1.429636223934963e-06, | |
| "loss": 0.4927, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 0.7786152770223017, | |
| "grad_norm": 3.483797094263642, | |
| "learning_rate": 1.4201547255367165e-06, | |
| "loss": 0.5085, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 0.7793923381770145, | |
| "grad_norm": 4.75329332254169, | |
| "learning_rate": 1.4106995659766547e-06, | |
| "loss": 0.5058, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 0.7801693993317274, | |
| "grad_norm": 3.68815778033119, | |
| "learning_rate": 1.4012708148214522e-06, | |
| "loss": 0.5265, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 0.7809464604864402, | |
| "grad_norm": 3.4635761925286306, | |
| "learning_rate": 1.3918685414434763e-06, | |
| "loss": 0.4623, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.7817235216411531, | |
| "grad_norm": 4.024245798823526, | |
| "learning_rate": 1.3824928150202866e-06, | |
| "loss": 0.4865, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 0.782500582795866, | |
| "grad_norm": 3.876558527294442, | |
| "learning_rate": 1.3731437045341218e-06, | |
| "loss": 0.5297, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 0.7832776439505789, | |
| "grad_norm": 4.13041441043086, | |
| "learning_rate": 1.363821278771391e-06, | |
| "loss": 0.5588, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.7840547051052917, | |
| "grad_norm": 4.828512693632229, | |
| "learning_rate": 1.3545256063221745e-06, | |
| "loss": 0.5241, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 0.7848317662600046, | |
| "grad_norm": 3.330489049598463, | |
| "learning_rate": 1.3452567555797085e-06, | |
| "loss": 0.5351, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.7856088274147175, | |
| "grad_norm": 3.577340154782965, | |
| "learning_rate": 1.3360147947398927e-06, | |
| "loss": 0.4874, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 0.7863858885694304, | |
| "grad_norm": 4.201117799816586, | |
| "learning_rate": 1.3267997918007792e-06, | |
| "loss": 0.5148, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 0.7871629497241432, | |
| "grad_norm": 2.5965256135200643, | |
| "learning_rate": 1.3176118145620775e-06, | |
| "loss": 0.4988, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 0.7879400108788561, | |
| "grad_norm": 2.397365078889302, | |
| "learning_rate": 1.3084509306246562e-06, | |
| "loss": 0.4687, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 0.788717072033569, | |
| "grad_norm": 5.5016070521496, | |
| "learning_rate": 1.29931720739004e-06, | |
| "loss": 0.518, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.7894941331882819, | |
| "grad_norm": 4.9408112199928444, | |
| "learning_rate": 1.2902107120599249e-06, | |
| "loss": 0.5312, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 0.7902711943429948, | |
| "grad_norm": 3.557763106103323, | |
| "learning_rate": 1.2811315116356698e-06, | |
| "loss": 0.5196, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 0.7910482554977076, | |
| "grad_norm": 4.192138798834655, | |
| "learning_rate": 1.2720796729178115e-06, | |
| "loss": 0.527, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 0.7918253166524205, | |
| "grad_norm": 3.586108157059095, | |
| "learning_rate": 1.2630552625055763e-06, | |
| "loss": 0.5347, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 0.7926023778071334, | |
| "grad_norm": 3.9368756234903195, | |
| "learning_rate": 1.2540583467963817e-06, | |
| "loss": 0.4811, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.7933794389618463, | |
| "grad_norm": 4.518574036325759, | |
| "learning_rate": 1.245088991985352e-06, | |
| "loss": 0.5086, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 0.7941565001165591, | |
| "grad_norm": 3.850061816242949, | |
| "learning_rate": 1.2361472640648347e-06, | |
| "loss": 0.4862, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 0.794933561271272, | |
| "grad_norm": 3.5644700141713064, | |
| "learning_rate": 1.227233228823908e-06, | |
| "loss": 0.5303, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 0.7957106224259849, | |
| "grad_norm": 2.1351987055036985, | |
| "learning_rate": 1.2183469518479018e-06, | |
| "loss": 0.5179, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.7964876835806978, | |
| "grad_norm": 5.25048528063306, | |
| "learning_rate": 1.2094884985179117e-06, | |
| "loss": 0.5318, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.7972647447354106, | |
| "grad_norm": 2.1615227439546745, | |
| "learning_rate": 1.200657934010323e-06, | |
| "loss": 0.4547, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 0.7980418058901235, | |
| "grad_norm": 2.6751655695167154, | |
| "learning_rate": 1.1918553232963237e-06, | |
| "loss": 0.5134, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 0.7988188670448364, | |
| "grad_norm": 4.159654861888376, | |
| "learning_rate": 1.1830807311414355e-06, | |
| "loss": 0.524, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 0.7995959281995493, | |
| "grad_norm": 3.6944240100922214, | |
| "learning_rate": 1.1743342221050314e-06, | |
| "loss": 0.5175, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 0.8003729893542622, | |
| "grad_norm": 4.133885672495875, | |
| "learning_rate": 1.1656158605398599e-06, | |
| "loss": 0.4854, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.801150050508975, | |
| "grad_norm": 4.0354219471053305, | |
| "learning_rate": 1.1569257105915743e-06, | |
| "loss": 0.5293, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 0.8019271116636879, | |
| "grad_norm": 4.987229671719538, | |
| "learning_rate": 1.1482638361982595e-06, | |
| "loss": 0.5067, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 0.8027041728184008, | |
| "grad_norm": 4.060534061900532, | |
| "learning_rate": 1.1396303010899623e-06, | |
| "loss": 0.5031, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 0.8034812339731137, | |
| "grad_norm": 3.8027639891295615, | |
| "learning_rate": 1.131025168788225e-06, | |
| "loss": 0.5339, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 0.8042582951278265, | |
| "grad_norm": 4.5696870186179215, | |
| "learning_rate": 1.122448502605611e-06, | |
| "loss": 0.5187, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.8050353562825394, | |
| "grad_norm": 3.4544068898990257, | |
| "learning_rate": 1.1139003656452451e-06, | |
| "loss": 0.5012, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 0.8058124174372523, | |
| "grad_norm": 4.024795478219517, | |
| "learning_rate": 1.1053808208003463e-06, | |
| "loss": 0.5039, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 0.8065894785919652, | |
| "grad_norm": 3.451121303154774, | |
| "learning_rate": 1.0968899307537688e-06, | |
| "loss": 0.5096, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 0.807366539746678, | |
| "grad_norm": 3.5430435341751374, | |
| "learning_rate": 1.088427757977535e-06, | |
| "loss": 0.4995, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 0.8081436009013909, | |
| "grad_norm": 3.3568799457193315, | |
| "learning_rate": 1.0799943647323823e-06, | |
| "loss": 0.4896, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.8089206620561038, | |
| "grad_norm": 2.7324998256576265, | |
| "learning_rate": 1.071589813067298e-06, | |
| "loss": 0.4757, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 0.8096977232108167, | |
| "grad_norm": 3.114681260826415, | |
| "learning_rate": 1.0632141648190685e-06, | |
| "loss": 0.5033, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 0.8104747843655296, | |
| "grad_norm": 3.7347524196800856, | |
| "learning_rate": 1.054867481611822e-06, | |
| "loss": 0.4849, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 0.8112518455202424, | |
| "grad_norm": 2.4431545580868423, | |
| "learning_rate": 1.046549824856574e-06, | |
| "loss": 0.4344, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.8120289066749553, | |
| "grad_norm": 3.370757705323888, | |
| "learning_rate": 1.038261255750781e-06, | |
| "loss": 0.4419, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.8128059678296682, | |
| "grad_norm": 4.176509993840626, | |
| "learning_rate": 1.0300018352778817e-06, | |
| "loss": 0.4905, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 0.8135830289843811, | |
| "grad_norm": 4.2860515845724505, | |
| "learning_rate": 1.0217716242068525e-06, | |
| "loss": 0.4989, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 0.8143600901390939, | |
| "grad_norm": 2.914685646542763, | |
| "learning_rate": 1.0135706830917663e-06, | |
| "loss": 0.4527, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 0.8151371512938068, | |
| "grad_norm": 4.781204814322438, | |
| "learning_rate": 1.0053990722713347e-06, | |
| "loss": 0.5185, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 0.8159142124485197, | |
| "grad_norm": 4.336551191079965, | |
| "learning_rate": 9.97256851868474e-07, | |
| "loss": 0.5453, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8166912736032326, | |
| "grad_norm": 3.980153258528895, | |
| "learning_rate": 9.891440817898569e-07, | |
| "loss": 0.4476, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 0.8174683347579454, | |
| "grad_norm": 4.803099851628047, | |
| "learning_rate": 9.810608217254785e-07, | |
| "loss": 0.4535, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 0.8182453959126583, | |
| "grad_norm": 5.434746877487003, | |
| "learning_rate": 9.730071311482104e-07, | |
| "loss": 0.5266, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 0.8190224570673712, | |
| "grad_norm": 4.132134349770947, | |
| "learning_rate": 9.649830693133649e-07, | |
| "loss": 0.4794, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 0.8197995182220841, | |
| "grad_norm": 3.8042895258614657, | |
| "learning_rate": 9.569886952582613e-07, | |
| "loss": 0.4857, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.820576579376797, | |
| "grad_norm": 4.505324473871432, | |
| "learning_rate": 9.49024067801787e-07, | |
| "loss": 0.4773, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 0.8213536405315098, | |
| "grad_norm": 4.085373275991255, | |
| "learning_rate": 9.410892455439724e-07, | |
| "loss": 0.5123, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 0.8221307016862227, | |
| "grad_norm": 2.8077333631243047, | |
| "learning_rate": 9.331842868655538e-07, | |
| "loss": 0.4766, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 0.8229077628409356, | |
| "grad_norm": 4.995807097173484, | |
| "learning_rate": 9.253092499275435e-07, | |
| "loss": 0.5059, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 0.8236848239956485, | |
| "grad_norm": 3.0312698428527085, | |
| "learning_rate": 9.174641926708028e-07, | |
| "loss": 0.5072, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.8244618851503613, | |
| "grad_norm": 3.6228940116700166, | |
| "learning_rate": 9.096491728156187e-07, | |
| "loss": 0.5157, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 0.8252389463050742, | |
| "grad_norm": 4.4841778480785885, | |
| "learning_rate": 9.018642478612755e-07, | |
| "loss": 0.5325, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 0.8260160074597871, | |
| "grad_norm": 3.7081609263257596, | |
| "learning_rate": 8.941094750856349e-07, | |
| "loss": 0.5225, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 0.8267930686145, | |
| "grad_norm": 2.9403067849013493, | |
| "learning_rate": 8.863849115447121e-07, | |
| "loss": 0.4859, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 0.8275701297692128, | |
| "grad_norm": 3.9121829857836925, | |
| "learning_rate": 8.786906140722551e-07, | |
| "loss": 0.4704, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.8283471909239257, | |
| "grad_norm": 3.7718616897098234, | |
| "learning_rate": 8.710266392793293e-07, | |
| "loss": 0.5054, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 0.8291242520786386, | |
| "grad_norm": 3.108303958961309, | |
| "learning_rate": 8.633930435539023e-07, | |
| "loss": 0.5006, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 0.8299013132333515, | |
| "grad_norm": 2.5549313563071725, | |
| "learning_rate": 8.557898830604239e-07, | |
| "loss": 0.4795, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 0.8306783743880644, | |
| "grad_norm": 3.459144570766454, | |
| "learning_rate": 8.48217213739414e-07, | |
| "loss": 0.5052, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 0.8314554355427772, | |
| "grad_norm": 3.8583077857999992, | |
| "learning_rate": 8.406750913070582e-07, | |
| "loss": 0.5121, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.8322324966974901, | |
| "grad_norm": 3.963740775603707, | |
| "learning_rate": 8.33163571254787e-07, | |
| "loss": 0.4949, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 0.833009557852203, | |
| "grad_norm": 4.576071555267779, | |
| "learning_rate": 8.256827088488756e-07, | |
| "loss": 0.488, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 0.8337866190069159, | |
| "grad_norm": 4.018939367025651, | |
| "learning_rate": 8.182325591300333e-07, | |
| "loss": 0.4584, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 0.8345636801616287, | |
| "grad_norm": 5.537702555635495, | |
| "learning_rate": 8.10813176912999e-07, | |
| "loss": 0.5078, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 0.8353407413163416, | |
| "grad_norm": 4.521346564196193, | |
| "learning_rate": 8.03424616786142e-07, | |
| "loss": 0.5017, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.8361178024710545, | |
| "grad_norm": 4.426790844413774, | |
| "learning_rate": 7.960669331110521e-07, | |
| "loss": 0.4832, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 0.8368948636257674, | |
| "grad_norm": 4.986892159186973, | |
| "learning_rate": 7.887401800221495e-07, | |
| "loss": 0.5278, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 0.8376719247804802, | |
| "grad_norm": 3.034636301392233, | |
| "learning_rate": 7.814444114262786e-07, | |
| "loss": 0.4996, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 0.8384489859351931, | |
| "grad_norm": 2.63148766912681, | |
| "learning_rate": 7.741796810023139e-07, | |
| "loss": 0.4839, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 0.839226047089906, | |
| "grad_norm": 4.33674902614418, | |
| "learning_rate": 7.669460422007657e-07, | |
| "loss": 0.439, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.8400031082446189, | |
| "grad_norm": 4.048856363638596, | |
| "learning_rate": 7.597435482433896e-07, | |
| "loss": 0.4783, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 0.8407801693993318, | |
| "grad_norm": 3.925372203600619, | |
| "learning_rate": 7.525722521227885e-07, | |
| "loss": 0.5017, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 0.8415572305540446, | |
| "grad_norm": 2.3654265887367054, | |
| "learning_rate": 7.45432206602027e-07, | |
| "loss": 0.5123, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 0.8423342917087575, | |
| "grad_norm": 3.754610906804235, | |
| "learning_rate": 7.383234642142422e-07, | |
| "loss": 0.4907, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 0.8431113528634704, | |
| "grad_norm": 4.1554282145692625, | |
| "learning_rate": 7.312460772622565e-07, | |
| "loss": 0.5107, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.8438884140181833, | |
| "grad_norm": 3.319418655291393, | |
| "learning_rate": 7.242000978181963e-07, | |
| "loss": 0.5048, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 0.8446654751728961, | |
| "grad_norm": 4.374110046424012, | |
| "learning_rate": 7.171855777231058e-07, | |
| "loss": 0.4617, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 0.845442536327609, | |
| "grad_norm": 4.441680587693151, | |
| "learning_rate": 7.102025685865622e-07, | |
| "loss": 0.4959, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 0.8462195974823219, | |
| "grad_norm": 2.8350312541634803, | |
| "learning_rate": 7.032511217863031e-07, | |
| "loss": 0.4677, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 0.8469966586370348, | |
| "grad_norm": 3.982485022264907, | |
| "learning_rate": 6.963312884678441e-07, | |
| "loss": 0.4954, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.8477737197917476, | |
| "grad_norm": 4.590377956407083, | |
| "learning_rate": 6.894431195441037e-07, | |
| "loss": 0.5297, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 0.8485507809464605, | |
| "grad_norm": 2.408789067882966, | |
| "learning_rate": 6.825866656950264e-07, | |
| "loss": 0.445, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 0.8493278421011734, | |
| "grad_norm": 4.694687311202965, | |
| "learning_rate": 6.757619773672169e-07, | |
| "loss": 0.493, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 0.8501049032558863, | |
| "grad_norm": 4.491758478617379, | |
| "learning_rate": 6.689691047735597e-07, | |
| "loss": 0.5153, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 0.8508819644105992, | |
| "grad_norm": 3.959513693411194, | |
| "learning_rate": 6.62208097892853e-07, | |
| "loss": 0.4797, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.851659025565312, | |
| "grad_norm": 5.339647237399662, | |
| "learning_rate": 6.554790064694471e-07, | |
| "loss": 0.4897, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 0.8524360867200249, | |
| "grad_norm": 4.541122198536199, | |
| "learning_rate": 6.487818800128692e-07, | |
| "loss": 0.4698, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 0.8532131478747378, | |
| "grad_norm": 4.7468681798060395, | |
| "learning_rate": 6.421167677974622e-07, | |
| "loss": 0.5016, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 0.8539902090294507, | |
| "grad_norm": 4.381332344102587, | |
| "learning_rate": 6.354837188620278e-07, | |
| "loss": 0.51, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 0.8547672701841635, | |
| "grad_norm": 4.1592821906223705, | |
| "learning_rate": 6.288827820094562e-07, | |
| "loss": 0.4875, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.8555443313388764, | |
| "grad_norm": 5.029800475729443, | |
| "learning_rate": 6.223140058063737e-07, | |
| "loss": 0.4549, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 0.8563213924935893, | |
| "grad_norm": 3.254886843193101, | |
| "learning_rate": 6.157774385827847e-07, | |
| "loss": 0.4314, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 0.8570984536483022, | |
| "grad_norm": 3.434364877703452, | |
| "learning_rate": 6.092731284317111e-07, | |
| "loss": 0.4654, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 0.857875514803015, | |
| "grad_norm": 4.488825872633713, | |
| "learning_rate": 6.028011232088471e-07, | |
| "loss": 0.482, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 0.8586525759577279, | |
| "grad_norm": 3.0602137297514638, | |
| "learning_rate": 5.963614705321996e-07, | |
| "loss": 0.4618, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.8594296371124408, | |
| "grad_norm": 4.827196277112413, | |
| "learning_rate": 5.899542177817413e-07, | |
| "loss": 0.4525, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 0.8602066982671537, | |
| "grad_norm": 4.39228489153871, | |
| "learning_rate": 5.835794120990607e-07, | |
| "loss": 0.5458, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 0.8609837594218664, | |
| "grad_norm": 4.013851924684146, | |
| "learning_rate": 5.772371003870147e-07, | |
| "loss": 0.521, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 0.8617608205765793, | |
| "grad_norm": 4.599909020480007, | |
| "learning_rate": 5.709273293093865e-07, | |
| "loss": 0.4641, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 0.8625378817312922, | |
| "grad_norm": 3.522635100581711, | |
| "learning_rate": 5.646501452905406e-07, | |
| "loss": 0.4613, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.8633149428860051, | |
| "grad_norm": 4.170720600102606, | |
| "learning_rate": 5.584055945150807e-07, | |
| "loss": 0.4533, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 0.864092004040718, | |
| "grad_norm": 5.0485560375944365, | |
| "learning_rate": 5.521937229275087e-07, | |
| "loss": 0.4584, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 0.8648690651954308, | |
| "grad_norm": 4.0298286961319105, | |
| "learning_rate": 5.460145762318903e-07, | |
| "loss": 0.5072, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 0.8656461263501437, | |
| "grad_norm": 3.963316318056793, | |
| "learning_rate": 5.398681998915145e-07, | |
| "loss": 0.454, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 0.8664231875048566, | |
| "grad_norm": 2.2989684529089076, | |
| "learning_rate": 5.337546391285647e-07, | |
| "loss": 0.4753, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.8672002486595695, | |
| "grad_norm": 4.488811638369375, | |
| "learning_rate": 5.276739389237778e-07, | |
| "loss": 0.452, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.8679773098142823, | |
| "grad_norm": 4.7387272438267605, | |
| "learning_rate": 5.216261440161236e-07, | |
| "loss": 0.4891, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 0.8687543709689952, | |
| "grad_norm": 5.278573940043423, | |
| "learning_rate": 5.156112989024653e-07, | |
| "loss": 0.477, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 0.8695314321237081, | |
| "grad_norm": 3.9270271390134828, | |
| "learning_rate": 5.096294478372382e-07, | |
| "loss": 0.465, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 0.870308493278421, | |
| "grad_norm": 3.2023556593268427, | |
| "learning_rate": 5.036806348321238e-07, | |
| "loss": 0.4654, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.8710855544331338, | |
| "grad_norm": 4.204967484017854, | |
| "learning_rate": 4.977649036557225e-07, | |
| "loss": 0.4933, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 0.8718626155878467, | |
| "grad_norm": 3.8562465627781743, | |
| "learning_rate": 4.918822978332377e-07, | |
| "loss": 0.4487, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 0.8726396767425596, | |
| "grad_norm": 2.7494815741242484, | |
| "learning_rate": 4.860328606461485e-07, | |
| "loss": 0.4637, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 0.8734167378972725, | |
| "grad_norm": 2.1088033052796895, | |
| "learning_rate": 4.802166351318965e-07, | |
| "loss": 0.4899, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 0.8741937990519854, | |
| "grad_norm": 5.113207022204942, | |
| "learning_rate": 4.7443366408356673e-07, | |
| "loss": 0.5035, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.8749708602066982, | |
| "grad_norm": 3.993509884814402, | |
| "learning_rate": 4.6868399004957266e-07, | |
| "loss": 0.4983, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 0.8757479213614111, | |
| "grad_norm": 6.019062769443196, | |
| "learning_rate": 4.6296765533334345e-07, | |
| "loss": 0.5127, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 0.876524982516124, | |
| "grad_norm": 3.42141410170646, | |
| "learning_rate": 4.57284701993016e-07, | |
| "loss": 0.4686, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 0.8773020436708369, | |
| "grad_norm": 4.401665485132851, | |
| "learning_rate": 4.5163517184111885e-07, | |
| "loss": 0.4423, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 0.8780791048255497, | |
| "grad_norm": 1.7965008908739462, | |
| "learning_rate": 4.460191064442704e-07, | |
| "loss": 0.5013, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.8788561659802626, | |
| "grad_norm": 4.038506349330642, | |
| "learning_rate": 4.4043654712287e-07, | |
| "loss": 0.4681, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 0.8796332271349755, | |
| "grad_norm": 2.6713825342303084, | |
| "learning_rate": 4.348875349507953e-07, | |
| "loss": 0.4723, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 0.8804102882896884, | |
| "grad_norm": 2.5242881927131493, | |
| "learning_rate": 4.293721107551002e-07, | |
| "loss": 0.4948, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 0.8811873494444012, | |
| "grad_norm": 3.089605520005084, | |
| "learning_rate": 4.23890315115712e-07, | |
| "loss": 0.4837, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 0.8819644105991141, | |
| "grad_norm": 4.640356219725602, | |
| "learning_rate": 4.184421883651374e-07, | |
| "loss": 0.4594, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.882741471753827, | |
| "grad_norm": 4.452516441213523, | |
| "learning_rate": 4.1302777058816136e-07, | |
| "loss": 0.5087, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 0.8835185329085399, | |
| "grad_norm": 3.2814252714146903, | |
| "learning_rate": 4.076471016215533e-07, | |
| "loss": 0.4585, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 0.8842955940632528, | |
| "grad_norm": 4.17360304036643, | |
| "learning_rate": 4.023002210537763e-07, | |
| "loss": 0.4808, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 0.8850726552179656, | |
| "grad_norm": 3.4710617417209897, | |
| "learning_rate": 3.9698716822469175e-07, | |
| "loss": 0.4764, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 0.8858497163726785, | |
| "grad_norm": 4.94630365171049, | |
| "learning_rate": 3.917079822252756e-07, | |
| "loss": 0.4676, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.8866267775273914, | |
| "grad_norm": 3.9963020658849295, | |
| "learning_rate": 3.864627018973244e-07, | |
| "loss": 0.4594, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 0.8874038386821043, | |
| "grad_norm": 4.149575936577817, | |
| "learning_rate": 3.8125136583317404e-07, | |
| "loss": 0.4408, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 0.8881808998368171, | |
| "grad_norm": 4.0908393768408535, | |
| "learning_rate": 3.760740123754125e-07, | |
| "loss": 0.4906, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 0.88895796099153, | |
| "grad_norm": 3.2442681217314413, | |
| "learning_rate": 3.709306796166029e-07, | |
| "loss": 0.4602, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 0.8897350221462429, | |
| "grad_norm": 3.2062024108356786, | |
| "learning_rate": 3.658214053989967e-07, | |
| "loss": 0.4291, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.8905120833009558, | |
| "grad_norm": 3.070354137183584, | |
| "learning_rate": 3.6074622731426036e-07, | |
| "loss": 0.4704, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 0.8912891444556686, | |
| "grad_norm": 3.7959986708913136, | |
| "learning_rate": 3.557051827031954e-07, | |
| "loss": 0.4694, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 0.8920662056103815, | |
| "grad_norm": 4.3724752517742145, | |
| "learning_rate": 3.506983086554666e-07, | |
| "loss": 0.4679, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 0.8928432667650944, | |
| "grad_norm": 4.7403654025736035, | |
| "learning_rate": 3.4572564200932634e-07, | |
| "loss": 0.5283, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 0.8936203279198073, | |
| "grad_norm": 4.243101118629279, | |
| "learning_rate": 3.4078721935134397e-07, | |
| "loss": 0.5125, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.8943973890745202, | |
| "grad_norm": 4.475859170580614, | |
| "learning_rate": 3.3588307701614144e-07, | |
| "loss": 0.4869, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 0.895174450229233, | |
| "grad_norm": 4.052974333086782, | |
| "learning_rate": 3.310132510861169e-07, | |
| "loss": 0.497, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.8959515113839459, | |
| "grad_norm": 3.373865018498319, | |
| "learning_rate": 3.2617777739118894e-07, | |
| "loss": 0.4441, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 0.8967285725386588, | |
| "grad_norm": 3.276175321494806, | |
| "learning_rate": 3.213766915085248e-07, | |
| "loss": 0.4451, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 0.8975056336933717, | |
| "grad_norm": 3.908380664561767, | |
| "learning_rate": 3.1661002876228473e-07, | |
| "loss": 0.4243, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.8982826948480845, | |
| "grad_norm": 2.6868106053772003, | |
| "learning_rate": 3.118778242233572e-07, | |
| "loss": 0.4427, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 0.8990597560027974, | |
| "grad_norm": 3.3557801815767285, | |
| "learning_rate": 3.0718011270910455e-07, | |
| "loss": 0.4702, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 0.8998368171575103, | |
| "grad_norm": 3.473766818324853, | |
| "learning_rate": 3.02516928783107e-07, | |
| "loss": 0.4744, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 0.9006138783122232, | |
| "grad_norm": 3.8754395433857503, | |
| "learning_rate": 2.978883067549032e-07, | |
| "loss": 0.4519, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 0.901390939466936, | |
| "grad_norm": 4.145319857126792, | |
| "learning_rate": 2.9329428067974454e-07, | |
| "loss": 0.4612, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.9021680006216489, | |
| "grad_norm": 3.4197421104899424, | |
| "learning_rate": 2.8873488435833983e-07, | |
| "loss": 0.46, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 0.9029450617763618, | |
| "grad_norm": 5.689929153660378, | |
| "learning_rate": 2.8421015133660856e-07, | |
| "loss": 0.4345, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 0.9037221229310747, | |
| "grad_norm": 2.292957288599791, | |
| "learning_rate": 2.797201149054335e-07, | |
| "loss": 0.4454, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 0.9044991840857876, | |
| "grad_norm": 4.486223577334596, | |
| "learning_rate": 2.752648081004183e-07, | |
| "loss": 0.4593, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 0.9052762452405004, | |
| "grad_norm": 3.8405561325920745, | |
| "learning_rate": 2.7084426370163954e-07, | |
| "loss": 0.4888, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.9060533063952133, | |
| "grad_norm": 3.406878245329023, | |
| "learning_rate": 2.6645851423340806e-07, | |
| "loss": 0.4558, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 0.9068303675499262, | |
| "grad_norm": 4.950678382840644, | |
| "learning_rate": 2.621075919640309e-07, | |
| "loss": 0.4762, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 0.9076074287046391, | |
| "grad_norm": 3.322238216032584, | |
| "learning_rate": 2.577915289055727e-07, | |
| "loss": 0.4759, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 0.9083844898593519, | |
| "grad_norm": 3.3945486166885006, | |
| "learning_rate": 2.535103568136205e-07, | |
| "loss": 0.4955, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 0.9091615510140648, | |
| "grad_norm": 3.8694072275201945, | |
| "learning_rate": 2.492641071870489e-07, | |
| "loss": 0.5166, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.9099386121687777, | |
| "grad_norm": 4.7651096314002865, | |
| "learning_rate": 2.450528112677886e-07, | |
| "loss": 0.4971, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 0.9107156733234906, | |
| "grad_norm": 4.469927022538459, | |
| "learning_rate": 2.408765000406005e-07, | |
| "loss": 0.4796, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 0.9114927344782034, | |
| "grad_norm": 4.519223313466715, | |
| "learning_rate": 2.367352042328408e-07, | |
| "loss": 0.4685, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 0.9122697956329163, | |
| "grad_norm": 3.963061942219626, | |
| "learning_rate": 2.3262895431424015e-07, | |
| "loss": 0.4851, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 0.9130468567876292, | |
| "grad_norm": 2.4524133862796313, | |
| "learning_rate": 2.2855778049667653e-07, | |
| "loss": 0.4534, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.9138239179423421, | |
| "grad_norm": 2.834722369254088, | |
| "learning_rate": 2.2452171273395716e-07, | |
| "loss": 0.4548, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 0.914600979097055, | |
| "grad_norm": 3.662017876045297, | |
| "learning_rate": 2.2052078072159143e-07, | |
| "loss": 0.4596, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 0.9153780402517678, | |
| "grad_norm": 4.021945589966396, | |
| "learning_rate": 2.1655501389657941e-07, | |
| "loss": 0.4744, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 0.9161551014064807, | |
| "grad_norm": 3.251036017263966, | |
| "learning_rate": 2.126244414371903e-07, | |
| "loss": 0.4575, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 0.9169321625611936, | |
| "grad_norm": 3.351594261133528, | |
| "learning_rate": 2.087290922627494e-07, | |
| "loss": 0.4722, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.9177092237159065, | |
| "grad_norm": 2.9100443321260645, | |
| "learning_rate": 2.0486899503342595e-07, | |
| "loss": 0.4781, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 0.9184862848706193, | |
| "grad_norm": 5.769177396129288, | |
| "learning_rate": 2.010441781500233e-07, | |
| "loss": 0.4561, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 0.9192633460253322, | |
| "grad_norm": 3.6257554055271703, | |
| "learning_rate": 1.9725466975376585e-07, | |
| "loss": 0.4628, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 0.9200404071800451, | |
| "grad_norm": 5.698219899736846, | |
| "learning_rate": 1.9350049772609568e-07, | |
| "loss": 0.4849, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 0.920817468334758, | |
| "grad_norm": 5.10283696189389, | |
| "learning_rate": 1.8978168968846632e-07, | |
| "loss": 0.4584, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.9215945294894708, | |
| "grad_norm": 2.4057166233933107, | |
| "learning_rate": 1.8609827300213877e-07, | |
| "loss": 0.4575, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 0.9223715906441837, | |
| "grad_norm": 4.039902041938024, | |
| "learning_rate": 1.8245027476798295e-07, | |
| "loss": 0.4237, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 0.9231486517988966, | |
| "grad_norm": 5.0104310640190155, | |
| "learning_rate": 1.7883772182627378e-07, | |
| "loss": 0.4609, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 0.9239257129536095, | |
| "grad_norm": 3.235199066685605, | |
| "learning_rate": 1.7526064075649718e-07, | |
| "loss": 0.4725, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 0.9247027741083224, | |
| "grad_norm": 5.7000179030429, | |
| "learning_rate": 1.7171905787715436e-07, | |
| "loss": 0.4844, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.9254798352630352, | |
| "grad_norm": 4.833515226751012, | |
| "learning_rate": 1.6821299924556557e-07, | |
| "loss": 0.4711, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 0.9262568964177481, | |
| "grad_norm": 4.541973195325704, | |
| "learning_rate": 1.647424906576811e-07, | |
| "loss": 0.4536, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 0.927033957572461, | |
| "grad_norm": 3.1471929054096464, | |
| "learning_rate": 1.613075576478923e-07, | |
| "loss": 0.461, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 0.9278110187271739, | |
| "grad_norm": 5.155810640275875, | |
| "learning_rate": 1.5790822548883921e-07, | |
| "loss": 0.4619, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 0.9285880798818867, | |
| "grad_norm": 4.815168413187984, | |
| "learning_rate": 1.545445191912287e-07, | |
| "loss": 0.4811, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.9293651410365996, | |
| "grad_norm": 4.039603939657306, | |
| "learning_rate": 1.5121646350364784e-07, | |
| "loss": 0.4677, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 0.9301422021913125, | |
| "grad_norm": 3.0484480106622565, | |
| "learning_rate": 1.4792408291238514e-07, | |
| "loss": 0.4621, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 0.9309192633460254, | |
| "grad_norm": 3.30445623378334, | |
| "learning_rate": 1.4466740164124582e-07, | |
| "loss": 0.423, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 0.9316963245007382, | |
| "grad_norm": 5.507483370884143, | |
| "learning_rate": 1.4144644365137906e-07, | |
| "loss": 0.4395, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 0.9324733856554511, | |
| "grad_norm": 4.472623280485502, | |
| "learning_rate": 1.382612326410959e-07, | |
| "loss": 0.4407, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.933250446810164, | |
| "grad_norm": 4.43958885227866, | |
| "learning_rate": 1.3511179204570014e-07, | |
| "loss": 0.4594, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 0.9340275079648769, | |
| "grad_norm": 4.219831856666021, | |
| "learning_rate": 1.3199814503731144e-07, | |
| "loss": 0.4935, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 0.9348045691195898, | |
| "grad_norm": 3.6973725388649887, | |
| "learning_rate": 1.289203145246981e-07, | |
| "loss": 0.4163, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 0.9355816302743026, | |
| "grad_norm": 3.7631001641207087, | |
| "learning_rate": 1.258783231531069e-07, | |
| "loss": 0.4795, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 0.9363586914290155, | |
| "grad_norm": 4.91235572426644, | |
| "learning_rate": 1.2287219330409716e-07, | |
| "loss": 0.4763, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.9371357525837284, | |
| "grad_norm": 5.162011591962256, | |
| "learning_rate": 1.1990194709537496e-07, | |
| "loss": 0.4663, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 0.9379128137384413, | |
| "grad_norm": 4.847494906904684, | |
| "learning_rate": 1.1696760638063243e-07, | |
| "loss": 0.4638, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 0.9386898748931541, | |
| "grad_norm": 3.4104319145126203, | |
| "learning_rate": 1.1406919274938477e-07, | |
| "loss": 0.5046, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 0.939466936047867, | |
| "grad_norm": 3.5111768971357793, | |
| "learning_rate": 1.112067275268125e-07, | |
| "loss": 0.4713, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 0.9402439972025799, | |
| "grad_norm": 2.618733293064988, | |
| "learning_rate": 1.083802317736049e-07, | |
| "loss": 0.4698, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.9410210583572928, | |
| "grad_norm": 3.9733552104692333, | |
| "learning_rate": 1.0558972628580522e-07, | |
| "loss": 0.5037, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 0.9417981195120056, | |
| "grad_norm": 3.876212810601272, | |
| "learning_rate": 1.0283523159465514e-07, | |
| "loss": 0.4538, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 0.9425751806667185, | |
| "grad_norm": 3.2929997252443193, | |
| "learning_rate": 1.0011676796644776e-07, | |
| "loss": 0.4606, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 0.9433522418214313, | |
| "grad_norm": 4.672469430214036, | |
| "learning_rate": 9.743435540237433e-08, | |
| "loss": 0.4695, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 0.9441293029761442, | |
| "grad_norm": 3.462179429994501, | |
| "learning_rate": 9.478801363838052e-08, | |
| "loss": 0.448, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.944906364130857, | |
| "grad_norm": 4.83408662775341, | |
| "learning_rate": 9.217776214501984e-08, | |
| "loss": 0.484, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 0.9456834252855699, | |
| "grad_norm": 4.438140209760804, | |
| "learning_rate": 8.960362012730983e-08, | |
| "loss": 0.4603, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 0.9464604864402828, | |
| "grad_norm": 3.4973998750156543, | |
| "learning_rate": 8.706560652459062e-08, | |
| "loss": 0.4249, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.9472375475949957, | |
| "grad_norm": 4.089297315615882, | |
| "learning_rate": 8.456374001038769e-08, | |
| "loss": 0.4491, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 0.9480146087497086, | |
| "grad_norm": 4.084120402865338, | |
| "learning_rate": 8.209803899227209e-08, | |
| "loss": 0.4535, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.9487916699044214, | |
| "grad_norm": 3.941907831396277, | |
| "learning_rate": 7.966852161172711e-08, | |
| "loss": 0.4496, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 0.9495687310591343, | |
| "grad_norm": 2.833823469782505, | |
| "learning_rate": 7.727520574401127e-08, | |
| "loss": 0.4243, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 0.9503457922138472, | |
| "grad_norm": 4.551822537590359, | |
| "learning_rate": 7.49181089980322e-08, | |
| "loss": 0.4582, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 0.9511228533685601, | |
| "grad_norm": 3.043933176817138, | |
| "learning_rate": 7.259724871621188e-08, | |
| "loss": 0.5034, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.9518999145232729, | |
| "grad_norm": 3.4621240444267665, | |
| "learning_rate": 7.031264197436161e-08, | |
| "loss": 0.4268, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.9526769756779858, | |
| "grad_norm": 4.056375247941382, | |
| "learning_rate": 6.806430558155719e-08, | |
| "loss": 0.4745, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 0.9534540368326987, | |
| "grad_norm": 4.535857419133766, | |
| "learning_rate": 6.585225608001178e-08, | |
| "loss": 0.4308, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 0.9542310979874116, | |
| "grad_norm": 2.4310495050933816, | |
| "learning_rate": 6.367650974495875e-08, | |
| "loss": 0.4222, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 0.9550081591421244, | |
| "grad_norm": 1.8480746534853145, | |
| "learning_rate": 6.153708258452851e-08, | |
| "loss": 0.4637, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 0.9557852202968373, | |
| "grad_norm": 4.469852603004664, | |
| "learning_rate": 5.943399033963182e-08, | |
| "loss": 0.4771, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.9565622814515502, | |
| "grad_norm": 3.2674434265539745, | |
| "learning_rate": 5.7367248483845005e-08, | |
| "loss": 0.4866, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 0.9573393426062631, | |
| "grad_norm": 2.005250278061698, | |
| "learning_rate": 5.533687222329332e-08, | |
| "loss": 0.4144, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 0.958116403760976, | |
| "grad_norm": 2.6745479068375824, | |
| "learning_rate": 5.3342876496542126e-08, | |
| "loss": 0.4685, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 0.9588934649156888, | |
| "grad_norm": 2.9539394159745815, | |
| "learning_rate": 5.138527597448595e-08, | |
| "loss": 0.4639, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 0.9596705260704017, | |
| "grad_norm": 4.017786152412138, | |
| "learning_rate": 4.946408506023958e-08, | |
| "loss": 0.442, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.9604475872251146, | |
| "grad_norm": 3.850870480799147, | |
| "learning_rate": 4.757931788903325e-08, | |
| "loss": 0.4304, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 0.9612246483798275, | |
| "grad_norm": 3.0544561131913586, | |
| "learning_rate": 4.573098832810818e-08, | |
| "loss": 0.4478, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 0.9620017095345403, | |
| "grad_norm": 5.80094396671801, | |
| "learning_rate": 4.391910997661397e-08, | |
| "loss": 0.4821, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 0.9627787706892532, | |
| "grad_norm": 3.5119994742694773, | |
| "learning_rate": 4.214369616550973e-08, | |
| "loss": 0.4362, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 0.9635558318439661, | |
| "grad_norm": 4.914214488501594, | |
| "learning_rate": 4.040475995746529e-08, | |
| "loss": 0.4375, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.964332892998679, | |
| "grad_norm": 3.0958335114663322, | |
| "learning_rate": 3.8702314146766284e-08, | |
| "loss": 0.4565, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 0.9651099541533918, | |
| "grad_norm": 3.156460394460856, | |
| "learning_rate": 3.7036371259216994e-08, | |
| "loss": 0.4625, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 0.9658870153081047, | |
| "grad_norm": 3.411808395407994, | |
| "learning_rate": 3.540694355205099e-08, | |
| "loss": 0.4403, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 0.9666640764628176, | |
| "grad_norm": 3.823904951701004, | |
| "learning_rate": 3.381404301384117e-08, | |
| "loss": 0.4446, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 0.9674411376175305, | |
| "grad_norm": 3.762167967184466, | |
| "learning_rate": 3.225768136440821e-08, | |
| "loss": 0.4588, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.9682181987722434, | |
| "grad_norm": 3.0475232787033835, | |
| "learning_rate": 3.0737870054739496e-08, | |
| "loss": 0.4643, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 0.9689952599269562, | |
| "grad_norm": 3.768638648169802, | |
| "learning_rate": 2.925462026689918e-08, | |
| "loss": 0.4438, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 0.9697723210816691, | |
| "grad_norm": 3.1658559982961942, | |
| "learning_rate": 2.7807942913950504e-08, | |
| "loss": 0.4872, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 0.970549382236382, | |
| "grad_norm": 4.351267552340424, | |
| "learning_rate": 2.6397848639874156e-08, | |
| "loss": 0.4828, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 0.9713264433910949, | |
| "grad_norm": 4.485145700676859, | |
| "learning_rate": 2.502434781948726e-08, | |
| "loss": 0.4754, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.9721035045458077, | |
| "grad_norm": 3.772459688285439, | |
| "learning_rate": 2.3687450558370627e-08, | |
| "loss": 0.4425, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 0.9728805657005206, | |
| "grad_norm": 2.3671995254376474, | |
| "learning_rate": 2.2387166692794392e-08, | |
| "loss": 0.4698, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 0.9736576268552335, | |
| "grad_norm": 4.4933944619724, | |
| "learning_rate": 2.1123505789642507e-08, | |
| "loss": 0.4746, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 0.9744346880099464, | |
| "grad_norm": 4.072967347229249, | |
| "learning_rate": 1.989647714634446e-08, | |
| "loss": 0.4646, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 0.9752117491646592, | |
| "grad_norm": 3.8176450930369965, | |
| "learning_rate": 1.8706089790807014e-08, | |
| "loss": 0.4885, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.9759888103193721, | |
| "grad_norm": 5.406894035256226, | |
| "learning_rate": 1.7552352481347013e-08, | |
| "loss": 0.4495, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 0.976765871474085, | |
| "grad_norm": 2.91507715459867, | |
| "learning_rate": 1.6435273706627564e-08, | |
| "loss": 0.4498, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 0.9775429326287979, | |
| "grad_norm": 3.455836019853387, | |
| "learning_rate": 1.5354861685595855e-08, | |
| "loss": 0.4679, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 0.9783199937835108, | |
| "grad_norm": 3.065621924437169, | |
| "learning_rate": 1.4311124367420992e-08, | |
| "loss": 0.424, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 0.9790970549382236, | |
| "grad_norm": 5.408364243129198, | |
| "learning_rate": 1.3304069431437362e-08, | |
| "loss": 0.4582, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.9798741160929365, | |
| "grad_norm": 3.9623851369922485, | |
| "learning_rate": 1.2333704287087467e-08, | |
| "loss": 0.4733, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 0.9806511772476494, | |
| "grad_norm": 3.6951264488478976, | |
| "learning_rate": 1.1400036073866416e-08, | |
| "loss": 0.46, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 0.9814282384023623, | |
| "grad_norm": 2.8637927854551233, | |
| "learning_rate": 1.0503071661271957e-08, | |
| "loss": 0.4449, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 0.9822052995570751, | |
| "grad_norm": 3.2568596741604523, | |
| "learning_rate": 9.642817648750636e-09, | |
| "loss": 0.4644, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 0.982982360711788, | |
| "grad_norm": 4.000380462168666, | |
| "learning_rate": 8.819280365652827e-09, | |
| "loss": 0.4525, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.9837594218665009, | |
| "grad_norm": 4.048475764438385, | |
| "learning_rate": 8.032465871182227e-09, | |
| "loss": 0.4586, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 0.9845364830212138, | |
| "grad_norm": 3.2880203159325307, | |
| "learning_rate": 7.282379954354768e-09, | |
| "loss": 0.4334, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 0.9853135441759266, | |
| "grad_norm": 4.0643620339312605, | |
| "learning_rate": 6.569028133954214e-09, | |
| "loss": 0.4458, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 0.9860906053306395, | |
| "grad_norm": 4.549795834627539, | |
| "learning_rate": 5.892415658491634e-09, | |
| "loss": 0.4554, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 0.9868676664853524, | |
| "grad_norm": 2.533413360663321, | |
| "learning_rate": 5.252547506167105e-09, | |
| "loss": 0.4535, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.9876447276400653, | |
| "grad_norm": 3.539581600293753, | |
| "learning_rate": 4.649428384833065e-09, | |
| "loss": 0.4591, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 0.9884217887947782, | |
| "grad_norm": 3.392398736378723, | |
| "learning_rate": 4.083062731960463e-09, | |
| "loss": 0.4609, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 0.989198849949491, | |
| "grad_norm": 3.8523386314806305, | |
| "learning_rate": 3.5534547146043318e-09, | |
| "loss": 0.4601, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 0.9899759111042039, | |
| "grad_norm": 4.270954545588355, | |
| "learning_rate": 3.060608229373818e-09, | |
| "loss": 0.4578, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 0.9907529722589168, | |
| "grad_norm": 3.428519580605601, | |
| "learning_rate": 2.6045269024049802e-09, | |
| "loss": 0.4564, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.9915300334136297, | |
| "grad_norm": 3.2452332555408683, | |
| "learning_rate": 2.1852140893319218e-09, | |
| "loss": 0.4291, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 0.9923070945683425, | |
| "grad_norm": 2.788077697667321, | |
| "learning_rate": 1.8026728752634781e-09, | |
| "loss": 0.4726, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 0.9930841557230554, | |
| "grad_norm": 4.551637132581418, | |
| "learning_rate": 1.4569060747610109e-09, | |
| "loss": 0.4655, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 0.9938612168777683, | |
| "grad_norm": 3.8202797170955614, | |
| "learning_rate": 1.1479162318150939e-09, | |
| "loss": 0.4136, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 0.9946382780324812, | |
| "grad_norm": 4.028638686891394, | |
| "learning_rate": 8.757056198294145e-10, | |
| "loss": 0.4866, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.995415339187194, | |
| "grad_norm": 3.652876594672518, | |
| "learning_rate": 6.402762416035657e-10, | |
| "loss": 0.4361, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 0.9961924003419069, | |
| "grad_norm": 3.900658925525932, | |
| "learning_rate": 4.4162982931750255e-10, | |
| "loss": 0.4366, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 0.9969694614966198, | |
| "grad_norm": 4.0222498355179, | |
| "learning_rate": 2.7976784451877457e-10, | |
| "loss": 0.5075, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 0.9977465226513327, | |
| "grad_norm": 3.2583568617059995, | |
| "learning_rate": 1.5469147811308926e-10, | |
| "loss": 0.438, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 0.9985235838060456, | |
| "grad_norm": 3.7370678626951936, | |
| "learning_rate": 6.640165035431967e-11, | |
| "loss": 0.4643, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.9993006449607584, | |
| "grad_norm": 4.275291482479352, | |
| "learning_rate": 1.4899010837288174e-11, | |
| "loss": 0.4825, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 12869, | |
| "total_flos": 626183508787200.0, | |
| "train_loss": 0.7687553066651168, | |
| "train_runtime": 40960.0959, | |
| "train_samples_per_second": 5.027, | |
| "train_steps_per_second": 0.314 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 12869, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 20000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 626183508787200.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |