| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.001098705829648, | |
| "global_step": 400000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00014993743133088564, | |
| "loss": 9.1175, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001498748626617713, | |
| "loss": 8.3544, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00014981229399265691, | |
| "loss": 8.0502, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00014974972532354257, | |
| "loss": 7.8515, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00014968715665442822, | |
| "loss": 7.681, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00014962458798531387, | |
| "loss": 7.5517, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00014956201931619952, | |
| "loss": 7.4159, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00014949945064708517, | |
| "loss": 7.2876, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00014943688197797082, | |
| "loss": 7.1397, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00014937431330885645, | |
| "loss": 6.9971, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001493117446397421, | |
| "loss": 6.8653, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00014924917597062775, | |
| "loss": 6.7289, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001491866073015134, | |
| "loss": 6.6215, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00014912403863239903, | |
| "loss": 6.5087, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00014906146996328468, | |
| "loss": 6.4134, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00014899890129417033, | |
| "loss": 6.3202, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00014893633262505598, | |
| "loss": 6.2245, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00014887376395594163, | |
| "loss": 6.1525, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00014881119528682729, | |
| "loss": 6.0713, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014874862661771294, | |
| "loss": 5.9881, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014868605794859856, | |
| "loss": 5.9234, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014862348927948421, | |
| "loss": 5.8453, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014856092061036987, | |
| "loss": 5.7843, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014849835194125552, | |
| "loss": 5.7166, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014843578327214117, | |
| "loss": 5.6656, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001483732146030268, | |
| "loss": 5.6084, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014831064593391247, | |
| "loss": 5.5446, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001482480772647981, | |
| "loss": 5.5103, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00014818550859568375, | |
| "loss": 5.4623, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001481229399265694, | |
| "loss": 5.4276, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00014806037125745505, | |
| "loss": 5.379, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001479978025883407, | |
| "loss": 5.3413, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00014793523391922633, | |
| "loss": 5.3178, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00014787266525011198, | |
| "loss": 5.2804, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00014781009658099763, | |
| "loss": 5.2634, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00014774752791188328, | |
| "loss": 5.2208, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001476849592427689, | |
| "loss": 5.1902, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00014762239057365459, | |
| "loss": 5.1797, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00014755982190454024, | |
| "loss": 5.1425, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00014749725323542586, | |
| "loss": 5.1115, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001474346845663115, | |
| "loss": 5.0887, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00014737211589719716, | |
| "loss": 5.0949, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00014730954722808282, | |
| "loss": 5.0657, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00014724697855896844, | |
| "loss": 5.0448, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001471844098898541, | |
| "loss": 5.0233, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00014712184122073974, | |
| "loss": 4.9993, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001470592725516254, | |
| "loss": 4.9959, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00014699670388251105, | |
| "loss": 4.9717, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001469341352133967, | |
| "loss": 4.9487, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00014687156654428235, | |
| "loss": 4.944, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00014680899787516798, | |
| "loss": 4.926, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014674642920605363, | |
| "loss": 4.9112, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014668386053693928, | |
| "loss": 4.9049, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014662129186782493, | |
| "loss": 4.8885, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014655872319871058, | |
| "loss": 4.871, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001464961545295962, | |
| "loss": 4.8603, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014643358586048186, | |
| "loss": 4.8336, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001463710171913675, | |
| "loss": 4.8393, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014630844852225316, | |
| "loss": 4.8184, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001462458798531388, | |
| "loss": 4.8204, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00014618331118402446, | |
| "loss": 4.8008, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00014612074251491012, | |
| "loss": 4.7858, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00014605817384579574, | |
| "loss": 4.7695, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001459956051766814, | |
| "loss": 4.7705, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00014593303650756704, | |
| "loss": 4.7617, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001458704678384527, | |
| "loss": 4.7498, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00014580789916933832, | |
| "loss": 4.7427, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00014574533050022397, | |
| "loss": 4.7235, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00014568276183110962, | |
| "loss": 4.7262, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00014562019316199527, | |
| "loss": 4.7172, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00014555762449288093, | |
| "loss": 4.7051, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00014549505582376658, | |
| "loss": 4.6796, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00014543248715465223, | |
| "loss": 4.665, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00014536991848553788, | |
| "loss": 4.6904, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001453073498164235, | |
| "loss": 4.6701, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00014524478114730916, | |
| "loss": 4.6592, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001451822124781948, | |
| "loss": 4.6409, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00014511964380908046, | |
| "loss": 4.6627, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00014505707513996608, | |
| "loss": 4.6439, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00014499450647085174, | |
| "loss": 4.6214, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00014493193780173742, | |
| "loss": 4.6419, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00014486936913262304, | |
| "loss": 4.6312, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001448068004635087, | |
| "loss": 4.6204, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014474423179439434, | |
| "loss": 4.6197, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014468166312528, | |
| "loss": 4.6011, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014461909445616562, | |
| "loss": 4.5853, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014455652578705127, | |
| "loss": 4.5793, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014449395711793692, | |
| "loss": 4.576, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014443138844882257, | |
| "loss": 4.5698, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001443688197797082, | |
| "loss": 4.5658, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014430625111059385, | |
| "loss": 4.5533, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014424368244147953, | |
| "loss": 4.551, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014418111377236515, | |
| "loss": 4.5576, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001441185451032508, | |
| "loss": 4.5412, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014405597643413646, | |
| "loss": 4.5437, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001439934077650221, | |
| "loss": 4.5266, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014393083909590776, | |
| "loss": 4.5125, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014386827042679338, | |
| "loss": 4.5353, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014380570175767904, | |
| "loss": 4.5159, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001437431330885647, | |
| "loss": 4.5024, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014368056441945034, | |
| "loss": 4.4975, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014361799575033596, | |
| "loss": 4.5025, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014355542708122164, | |
| "loss": 4.5117, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001434928584121073, | |
| "loss": 4.5049, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014343028974299292, | |
| "loss": 4.5069, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014336772107387857, | |
| "loss": 4.4901, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014330515240476422, | |
| "loss": 4.4853, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014324258373564987, | |
| "loss": 4.4722, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001431800150665355, | |
| "loss": 4.4653, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014311744639742115, | |
| "loss": 4.4651, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001430548777283068, | |
| "loss": 4.4379, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014299230905919245, | |
| "loss": 4.4491, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014292974039007808, | |
| "loss": 4.4594, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014286717172096376, | |
| "loss": 4.4491, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001428046030518494, | |
| "loss": 4.4344, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014274203438273503, | |
| "loss": 4.4358, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014267946571362068, | |
| "loss": 4.4493, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014261689704450633, | |
| "loss": 4.4361, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.000142554328375392, | |
| "loss": 4.4308, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014249175970627764, | |
| "loss": 4.4219, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014242919103716326, | |
| "loss": 4.4086, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014236662236804891, | |
| "loss": 4.4285, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014230405369893457, | |
| "loss": 4.4069, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014224148502982022, | |
| "loss": 4.4121, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014217891636070587, | |
| "loss": 4.421, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014211634769159152, | |
| "loss": 4.3855, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014205377902247717, | |
| "loss": 4.397, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001419912103533628, | |
| "loss": 4.3656, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014192864168424845, | |
| "loss": 4.3959, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001418660730151341, | |
| "loss": 4.3878, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014180350434601975, | |
| "loss": 4.3796, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014174093567690538, | |
| "loss": 4.3698, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014167836700779103, | |
| "loss": 4.3745, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014161579833867668, | |
| "loss": 4.3698, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014155322966956233, | |
| "loss": 4.3795, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014149066100044798, | |
| "loss": 4.3687, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014142809233133363, | |
| "loss": 4.3577, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014136552366221929, | |
| "loss": 4.3724, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001413029549931049, | |
| "loss": 4.3588, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014124038632399056, | |
| "loss": 4.3452, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001411778176548762, | |
| "loss": 4.3588, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014111524898576187, | |
| "loss": 4.3468, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014105268031664752, | |
| "loss": 4.3428, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014099011164753314, | |
| "loss": 4.3454, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001409275429784188, | |
| "loss": 4.3414, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014086497430930444, | |
| "loss": 4.3335, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001408024056401901, | |
| "loss": 4.3403, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014073983697107575, | |
| "loss": 4.3481, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001406772683019614, | |
| "loss": 4.3331, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014061469963284705, | |
| "loss": 4.3437, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014055213096373268, | |
| "loss": 4.3193, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014048956229461833, | |
| "loss": 4.2995, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014042699362550398, | |
| "loss": 4.3271, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014036442495638963, | |
| "loss": 4.3122, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014030185628727525, | |
| "loss": 4.3048, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001402392876181609, | |
| "loss": 4.3183, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00014017671894904659, | |
| "loss": 4.3096, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001401141502799322, | |
| "loss": 4.2995, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00014005158161081786, | |
| "loss": 4.3082, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001399890129417035, | |
| "loss": 4.297, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00013992644427258916, | |
| "loss": 4.2907, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001398638756034748, | |
| "loss": 4.2937, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00013980130693436044, | |
| "loss": 4.289, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001397387382652461, | |
| "loss": 4.2851, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00013967616959613174, | |
| "loss": 4.298, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001396136009270174, | |
| "loss": 4.286, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00013955103225790302, | |
| "loss": 4.2706, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001394884635887887, | |
| "loss": 4.2687, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00013942589491967432, | |
| "loss": 4.2638, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00013936332625055997, | |
| "loss": 4.2636, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00013930075758144563, | |
| "loss": 4.2626, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00013923818891233128, | |
| "loss": 4.2701, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00013917562024321693, | |
| "loss": 4.2779, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00013911305157410255, | |
| "loss": 4.2702, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001390504829049882, | |
| "loss": 4.2689, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00013898791423587386, | |
| "loss": 4.2625, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001389253455667595, | |
| "loss": 4.2523, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00013886277689764516, | |
| "loss": 4.2561, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001388002082285308, | |
| "loss": 4.2425, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00013873763955941646, | |
| "loss": 4.253, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001386750708903021, | |
| "loss": 4.2466, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00013861250222118774, | |
| "loss": 4.2531, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001385499335520734, | |
| "loss": 4.2428, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00013848736488295904, | |
| "loss": 4.2498, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00013842479621384467, | |
| "loss": 4.2349, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00013836222754473032, | |
| "loss": 4.2522, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00013829965887561597, | |
| "loss": 4.2362, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013823709020650162, | |
| "loss": 4.2368, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013817452153738727, | |
| "loss": 4.2235, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013811195286827293, | |
| "loss": 4.2134, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013804938419915858, | |
| "loss": 4.2103, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001379868155300442, | |
| "loss": 4.2198, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013792424686092985, | |
| "loss": 4.2141, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001378616781918155, | |
| "loss": 4.2192, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013779910952270116, | |
| "loss": 4.2225, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001377365408535868, | |
| "loss": 4.2167, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00013767397218447243, | |
| "loss": 4.1967, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00013761140351535808, | |
| "loss": 4.2036, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00013754883484624374, | |
| "loss": 4.23, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001374862661771294, | |
| "loss": 4.2076, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00013742369750801504, | |
| "loss": 4.198, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001373611288389007, | |
| "loss": 4.2227, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00013729856016978634, | |
| "loss": 4.1795, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00013723599150067197, | |
| "loss": 4.2043, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00013717342283155762, | |
| "loss": 4.1902, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00013711085416244327, | |
| "loss": 4.1818, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00013704828549332892, | |
| "loss": 4.1775, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00013698571682421455, | |
| "loss": 4.2037, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001369231481551002, | |
| "loss": 4.1918, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00013686057948598585, | |
| "loss": 4.2047, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001367980108168715, | |
| "loss": 4.182, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00013673544214775715, | |
| "loss": 4.1929, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001366728734786428, | |
| "loss": 4.2035, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00013661030480952846, | |
| "loss": 4.1702, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00013654773614041408, | |
| "loss": 4.1796, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00013648516747129973, | |
| "loss": 4.1841, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00013642259880218538, | |
| "loss": 4.1764, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00013636003013307104, | |
| "loss": 4.1943, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001362974614639567, | |
| "loss": 4.162, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001362348927948423, | |
| "loss": 4.1719, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00013617232412572796, | |
| "loss": 4.173, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00013610975545661364, | |
| "loss": 4.1535, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00013604718678749927, | |
| "loss": 4.1674, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00013598461811838492, | |
| "loss": 4.181, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00013592204944927057, | |
| "loss": 4.1721, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00013585948078015622, | |
| "loss": 4.2129, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00013579691211104185, | |
| "loss": 4.1508, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001357343434419275, | |
| "loss": 4.1705, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00013567177477281315, | |
| "loss": 4.1426, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001356092061036988, | |
| "loss": 4.1467, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00013554663743458442, | |
| "loss": 4.1589, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001354840687654701, | |
| "loss": 4.1458, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00013542150009635576, | |
| "loss": 4.1691, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00013535893142724138, | |
| "loss": 4.1407, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00013529636275812703, | |
| "loss": 4.1473, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013523379408901268, | |
| "loss": 4.1471, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013517122541989833, | |
| "loss": 4.149, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013510865675078396, | |
| "loss": 4.1389, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001350460880816696, | |
| "loss": 4.1607, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013498351941255526, | |
| "loss": 4.1431, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013492095074344091, | |
| "loss": 4.136, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013485838207432657, | |
| "loss": 4.142, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013479581340521222, | |
| "loss": 4.1344, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00013473324473609787, | |
| "loss": 4.1253, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00013467067606698352, | |
| "loss": 4.1441, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00013460810739786914, | |
| "loss": 4.1341, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001345455387287548, | |
| "loss": 4.1466, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00013448297005964045, | |
| "loss": 4.1522, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001344204013905261, | |
| "loss": 4.1288, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00013435783272141172, | |
| "loss": 4.1256, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00013429526405229738, | |
| "loss": 4.1421, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00013423269538318303, | |
| "loss": 4.1247, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00013417012671406868, | |
| "loss": 4.129, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00013410755804495433, | |
| "loss": 4.1164, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00013404498937583998, | |
| "loss": 4.1279, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00013398242070672563, | |
| "loss": 4.1218, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00013391985203761126, | |
| "loss": 4.1262, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0001338572833684969, | |
| "loss": 4.1298, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00013379471469938256, | |
| "loss": 4.1108, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001337321460302682, | |
| "loss": 4.1112, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00013366957736115384, | |
| "loss": 4.1169, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001336070086920395, | |
| "loss": 4.1235, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00013354444002292514, | |
| "loss": 4.1291, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001334818713538108, | |
| "loss": 4.1244, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00013341930268469644, | |
| "loss": 4.1072, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001333567340155821, | |
| "loss": 4.109, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00013329416534646775, | |
| "loss": 4.1071, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001332315966773534, | |
| "loss": 4.1076, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00013316902800823902, | |
| "loss": 4.1154, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00013310645933912468, | |
| "loss": 4.1136, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00013304389067001033, | |
| "loss": 4.1053, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00013298132200089598, | |
| "loss": 4.1116, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001329187533317816, | |
| "loss": 4.0951, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00013285618466266725, | |
| "loss": 4.089, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00013279361599355293, | |
| "loss": 4.1044, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00013273104732443856, | |
| "loss": 4.1043, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001326684786553242, | |
| "loss": 4.0835, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00013260590998620986, | |
| "loss": 4.0865, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001325433413170955, | |
| "loss": 4.0892, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00013248077264798114, | |
| "loss": 4.1022, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001324182039788668, | |
| "loss": 4.088, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00013235563530975244, | |
| "loss": 4.093, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001322930666406381, | |
| "loss": 4.099, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013223049797152374, | |
| "loss": 4.079, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013216792930240937, | |
| "loss": 4.0908, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013210536063329505, | |
| "loss": 4.1, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013204279196418067, | |
| "loss": 4.0889, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013198022329506632, | |
| "loss": 4.0923, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013191765462595197, | |
| "loss": 4.0708, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013185508595683763, | |
| "loss": 4.0838, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00013179251728772328, | |
| "loss": 4.0742, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001317299486186089, | |
| "loss": 4.0786, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00013166737994949455, | |
| "loss": 4.0673, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001316048112803802, | |
| "loss": 4.0746, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00013154224261126586, | |
| "loss": 4.0839, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00013147967394215148, | |
| "loss": 4.0709, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00013141710527303716, | |
| "loss": 4.0491, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001313545366039228, | |
| "loss": 4.0832, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00013129196793480844, | |
| "loss": 4.064, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001312293992656941, | |
| "loss": 4.075, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00013116683059657974, | |
| "loss": 4.0708, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001311042619274654, | |
| "loss": 4.0776, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00013104169325835102, | |
| "loss": 4.0503, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00013097912458923667, | |
| "loss": 4.059, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00013091655592012232, | |
| "loss": 4.0565, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00013085398725100797, | |
| "loss": 4.0663, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00013079141858189362, | |
| "loss": 4.0696, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00013072884991277927, | |
| "loss": 4.0619, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00013066628124366493, | |
| "loss": 4.0653, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00013060371257455055, | |
| "loss": 4.0456, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001305411439054362, | |
| "loss": 4.076, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00013047857523632185, | |
| "loss": 4.0597, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001304160065672075, | |
| "loss": 4.0575, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00013035343789809316, | |
| "loss": 4.0713, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00013029086922897878, | |
| "loss": 4.0654, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00013022830055986443, | |
| "loss": 4.0516, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00013016573189075008, | |
| "loss": 4.0511, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00013010316322163574, | |
| "loss": 4.0686, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001300405945525214, | |
| "loss": 4.0244, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00012997802588340704, | |
| "loss": 4.0521, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001299154572142927, | |
| "loss": 4.0511, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00012985288854517831, | |
| "loss": 4.0527, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00012979031987606397, | |
| "loss": 4.0404, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00012972775120694962, | |
| "loss": 4.0419, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00012966518253783527, | |
| "loss": 4.0668, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001296026138687209, | |
| "loss": 4.0317, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00012954004519960655, | |
| "loss": 4.0394, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001294774765304922, | |
| "loss": 4.0458, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00012941490786137785, | |
| "loss": 4.0349, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001293523391922635, | |
| "loss": 4.2634, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00012928977052314915, | |
| "loss": 4.0683, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001292272018540348, | |
| "loss": 4.0383, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00012916463318492043, | |
| "loss": 4.0375, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00012910206451580608, | |
| "loss": 4.0459, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00012903949584669173, | |
| "loss": 4.0305, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00012897692717757738, | |
| "loss": 4.0304, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00012891435850846303, | |
| "loss": 4.0349, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00012885178983934866, | |
| "loss": 4.0283, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001287892211702343, | |
| "loss": 4.0258, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00012872665250111996, | |
| "loss": 4.0319, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00012866408383200561, | |
| "loss": 4.0239, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00012860151516289127, | |
| "loss": 4.0318, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00012853894649377692, | |
| "loss": 4.0078, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00012847637782466257, | |
| "loss": 4.0309, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001284138091555482, | |
| "loss": 4.0206, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00012835124048643385, | |
| "loss": 4.0118, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001282886718173195, | |
| "loss": 4.0408, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00012822610314820515, | |
| "loss": 4.0364, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00012816353447909077, | |
| "loss": 4.0211, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00012810096580997642, | |
| "loss": 4.0168, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001280383971408621, | |
| "loss": 4.0254, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00012797582847174773, | |
| "loss": 4.0127, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00012791325980263338, | |
| "loss": 3.9996, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00012785069113351903, | |
| "loss": 4.0224, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00012778812246440468, | |
| "loss": 4.0247, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001277255537952903, | |
| "loss": 4.0129, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00012766298512617596, | |
| "loss": 4.0236, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001276004164570616, | |
| "loss": 4.0203, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00012753784778794726, | |
| "loss": 4.0008, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001274752791188329, | |
| "loss": 4.0208, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00012741271044971854, | |
| "loss": 4.0087, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00012735014178060422, | |
| "loss": 4.0199, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00012728757311148984, | |
| "loss": 4.0143, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001272250044423755, | |
| "loss": 4.0345, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012716243577326114, | |
| "loss": 4.0067, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001270998671041468, | |
| "loss": 4.0124, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012703729843503245, | |
| "loss": 4.0067, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012697472976591807, | |
| "loss": 3.9995, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012691216109680372, | |
| "loss": 4.007, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012684959242768938, | |
| "loss": 4.0085, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012678702375857503, | |
| "loss": 4.0163, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012672445508946068, | |
| "loss": 3.9997, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012666188642034633, | |
| "loss": 3.9908, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012659931775123198, | |
| "loss": 3.9906, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001265367490821176, | |
| "loss": 4.0107, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012647418041300326, | |
| "loss": 4.0004, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001264116117438889, | |
| "loss": 4.0065, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012634904307477456, | |
| "loss": 4.002, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012628647440566019, | |
| "loss": 4.0029, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012622390573654584, | |
| "loss": 3.9801, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001261613370674315, | |
| "loss": 3.9934, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012609876839831714, | |
| "loss": 4.0027, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001260361997292028, | |
| "loss": 4.0057, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012597363106008844, | |
| "loss": 4.0006, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001259110623909741, | |
| "loss": 3.9971, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012584849372185972, | |
| "loss": 3.989, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012578592505274537, | |
| "loss": 3.9951, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012572335638363102, | |
| "loss": 3.9776, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012566078771451667, | |
| "loss": 3.9995, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012559821904540233, | |
| "loss": 3.9752, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012553565037628795, | |
| "loss": 3.9873, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001254730817071736, | |
| "loss": 3.985, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012541051303805925, | |
| "loss": 3.9924, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001253479443689449, | |
| "loss": 3.9778, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012528537569983056, | |
| "loss": 3.9873, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001252228070307162, | |
| "loss": 3.9739, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00012516023836160186, | |
| "loss": 3.9946, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00012509766969248748, | |
| "loss": 4.008, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00012503510102337314, | |
| "loss": 3.9697, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001249725323542588, | |
| "loss": 3.9788, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00012490996368514444, | |
| "loss": 3.9803, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00012484739501603006, | |
| "loss": 3.9948, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00012478482634691572, | |
| "loss": 3.978, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00012472225767780137, | |
| "loss": 3.968, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00012465968900868702, | |
| "loss": 3.9703, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00012459712033957267, | |
| "loss": 3.979, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00012453455167045832, | |
| "loss": 3.9788, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00012447198300134397, | |
| "loss": 3.9779, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00012440941433222963, | |
| "loss": 3.9707, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00012434684566311525, | |
| "loss": 3.958, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001242842769940009, | |
| "loss": 3.9778, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00012422170832488655, | |
| "loss": 3.9846, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001241591396557722, | |
| "loss": 3.9743, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00012409657098665783, | |
| "loss": 3.9612, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00012403400231754348, | |
| "loss": 3.9537, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00012397143364842916, | |
| "loss": 3.9754, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00012390886497931478, | |
| "loss": 3.9616, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00012384629631020044, | |
| "loss": 3.9575, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001237837276410861, | |
| "loss": 3.9535, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012372115897197174, | |
| "loss": 3.9767, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012365859030285736, | |
| "loss": 3.9582, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012359602163374302, | |
| "loss": 3.9737, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012353345296462867, | |
| "loss": 3.9652, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012347088429551432, | |
| "loss": 3.9688, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012340831562639994, | |
| "loss": 3.9344, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012334574695728562, | |
| "loss": 3.9645, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00012328317828817127, | |
| "loss": 3.9539, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001232206096190569, | |
| "loss": 3.9613, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00012315804094994255, | |
| "loss": 3.9479, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001230954722808282, | |
| "loss": 3.9571, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00012303290361171385, | |
| "loss": 3.9622, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001229703349425995, | |
| "loss": 3.952, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00012290776627348513, | |
| "loss": 3.9619, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00012284519760437078, | |
| "loss": 3.9535, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00012278262893525643, | |
| "loss": 3.9575, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00012272006026614208, | |
| "loss": 3.9497, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00012265749159702774, | |
| "loss": 3.9577, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001225949229279134, | |
| "loss": 3.9517, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00012253235425879904, | |
| "loss": 3.9447, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00012246978558968466, | |
| "loss": 3.9566, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00012240721692057031, | |
| "loss": 3.9493, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00012234464825145597, | |
| "loss": 3.9701, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00012228207958234162, | |
| "loss": 3.9576, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00012221951091322724, | |
| "loss": 3.9513, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001221569422441129, | |
| "loss": 3.9434, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00012209437357499855, | |
| "loss": 3.9586, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00012203180490588418, | |
| "loss": 3.9503, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00012196923623676985, | |
| "loss": 3.9385, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001219066675676555, | |
| "loss": 3.9662, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00012184409889854114, | |
| "loss": 3.9432, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00012178153022942679, | |
| "loss": 3.9362, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012171896156031243, | |
| "loss": 3.9571, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012165639289119808, | |
| "loss": 3.9502, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012159382422208372, | |
| "loss": 3.9388, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012153125555296937, | |
| "loss": 3.9471, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012146868688385502, | |
| "loss": 3.9667, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012140611821474066, | |
| "loss": 3.9273, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012134354954562631, | |
| "loss": 3.9378, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012128098087651196, | |
| "loss": 3.9617, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012121841220739761, | |
| "loss": 3.9304, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012115584353828327, | |
| "loss": 3.9296, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001210932748691689, | |
| "loss": 3.9526, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012103070620005456, | |
| "loss": 3.9438, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012096813753094019, | |
| "loss": 3.9403, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012090556886182584, | |
| "loss": 3.9372, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012084300019271148, | |
| "loss": 3.9406, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012078043152359713, | |
| "loss": 3.9351, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012071786285448277, | |
| "loss": 3.942, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012065529418536844, | |
| "loss": 3.9334, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012059272551625409, | |
| "loss": 3.9346, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012053015684713973, | |
| "loss": 3.9501, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012046758817802538, | |
| "loss": 3.9416, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012040501950891102, | |
| "loss": 3.9413, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012034245083979667, | |
| "loss": 3.9312, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001202798821706823, | |
| "loss": 3.9418, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012021731350156796, | |
| "loss": 3.9364, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012015474483245361, | |
| "loss": 3.9398, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012009217616333925, | |
| "loss": 3.9238, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001200296074942249, | |
| "loss": 4.5988, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011996703882511055, | |
| "loss": 3.9397, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001199044701559962, | |
| "loss": 3.9328, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011984190148688184, | |
| "loss": 3.931, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011977933281776749, | |
| "loss": 3.9197, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011971676414865314, | |
| "loss": 3.931, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011965419547953878, | |
| "loss": 3.9401, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011959162681042443, | |
| "loss": 3.9206, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011952905814131007, | |
| "loss": 3.918, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011946648947219572, | |
| "loss": 3.9236, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011940392080308136, | |
| "loss": 3.9311, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011934135213396701, | |
| "loss": 3.9148, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011927878346485268, | |
| "loss": 3.9347, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011921621479573832, | |
| "loss": 3.9306, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011915364612662397, | |
| "loss": 3.9145, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0001190910774575096, | |
| "loss": 3.9325, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011902850878839526, | |
| "loss": 3.9221, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0001189659401192809, | |
| "loss": 3.9248, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011890337145016655, | |
| "loss": 3.9249, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011884080278105219, | |
| "loss": 3.9239, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011877823411193784, | |
| "loss": 3.9186, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011871566544282349, | |
| "loss": 3.9135, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011865309677370913, | |
| "loss": 3.9151, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011859052810459479, | |
| "loss": 3.9165, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011852795943548043, | |
| "loss": 3.9201, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011846539076636608, | |
| "loss": 3.9049, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011840282209725172, | |
| "loss": 3.9179, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011834025342813737, | |
| "loss": 3.9113, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011827768475902302, | |
| "loss": 3.9094, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00011821511608990866, | |
| "loss": 3.9265, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00011815254742079431, | |
| "loss": 3.8923, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00011808997875167995, | |
| "loss": 3.9079, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001180274100825656, | |
| "loss": 3.9078, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00011796484141345124, | |
| "loss": 3.9074, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001179022727443369, | |
| "loss": 3.9031, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00011783970407522256, | |
| "loss": 3.9024, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001177771354061082, | |
| "loss": 3.9096, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011771456673699385, | |
| "loss": 3.9072, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011765199806787948, | |
| "loss": 3.9173, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011758942939876514, | |
| "loss": 3.9193, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011752686072965077, | |
| "loss": 3.9022, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011746429206053643, | |
| "loss": 3.9117, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011740172339142206, | |
| "loss": 3.9061, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011733915472230772, | |
| "loss": 3.914, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00011727658605319338, | |
| "loss": 3.8996, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00011721401738407902, | |
| "loss": 3.8946, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00011715144871496467, | |
| "loss": 3.9097, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00011708888004585031, | |
| "loss": 3.9106, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00011702631137673596, | |
| "loss": 3.9022, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00011696374270762161, | |
| "loss": 3.9096, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00011690117403850725, | |
| "loss": 3.9113, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001168386053693929, | |
| "loss": 3.9066, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00011677603670027854, | |
| "loss": 3.8952, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00011671346803116419, | |
| "loss": 3.9009, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00011665089936204983, | |
| "loss": 3.8977, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0001165883306929355, | |
| "loss": 3.91, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00011652576202382115, | |
| "loss": 3.9027, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00011646319335470678, | |
| "loss": 3.921, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00011640062468559244, | |
| "loss": 3.8889, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00011633805601647807, | |
| "loss": 3.8891, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00011627548734736373, | |
| "loss": 3.91, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00011621291867824936, | |
| "loss": 3.8922, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00011615035000913501, | |
| "loss": 3.8894, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00011608778134002065, | |
| "loss": 3.8887, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001160252126709063, | |
| "loss": 3.9053, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00011596264400179194, | |
| "loss": 3.9023, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00011590007533267761, | |
| "loss": 3.89, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00011583750666356326, | |
| "loss": 3.9114, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001157749379944489, | |
| "loss": 3.8969, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011571236932533455, | |
| "loss": 3.8974, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011564980065622019, | |
| "loss": 3.8966, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011558723198710584, | |
| "loss": 3.8898, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011552466331799149, | |
| "loss": 3.8769, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011546209464887713, | |
| "loss": 3.8784, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011539952597976278, | |
| "loss": 3.9041, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011533695731064842, | |
| "loss": 3.8765, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00011527438864153407, | |
| "loss": 3.8758, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011521181997241972, | |
| "loss": 3.8971, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011514925130330537, | |
| "loss": 3.8909, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011508668263419102, | |
| "loss": 3.8881, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011502411396507666, | |
| "loss": 3.8766, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011496154529596231, | |
| "loss": 3.8711, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011489897662684795, | |
| "loss": 3.8868, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001148364079577336, | |
| "loss": 3.8791, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011477383928861924, | |
| "loss": 3.8802, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001147112706195049, | |
| "loss": 3.8802, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00011464870195039053, | |
| "loss": 3.885, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00011458613328127618, | |
| "loss": 3.8846, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00011452356461216185, | |
| "loss": 3.8984, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00011446099594304749, | |
| "loss": 3.8829, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00011439842727393314, | |
| "loss": 3.8881, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00011433585860481878, | |
| "loss": 3.8964, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00011427328993570443, | |
| "loss": 3.8881, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011421072126659007, | |
| "loss": 3.8769, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011414815259747572, | |
| "loss": 3.878, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011408558392836137, | |
| "loss": 3.91, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011402301525924701, | |
| "loss": 3.8875, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011396044659013266, | |
| "loss": 3.8843, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011389787792101831, | |
| "loss": 3.8807, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011383530925190396, | |
| "loss": 3.8784, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0001137727405827896, | |
| "loss": 3.8835, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00011371017191367525, | |
| "loss": 3.8723, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001136476032445609, | |
| "loss": 3.8745, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00011358503457544654, | |
| "loss": 3.896, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00011352246590633219, | |
| "loss": 3.8835, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00011345989723721783, | |
| "loss": 3.8634, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00011339732856810348, | |
| "loss": 3.8514, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00011333475989898912, | |
| "loss": 3.867, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00011327219122987477, | |
| "loss": 3.8895, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00011320962256076044, | |
| "loss": 3.8819, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00011314705389164608, | |
| "loss": 3.8708, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00011308448522253173, | |
| "loss": 3.8904, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00011302191655341737, | |
| "loss": 3.8782, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00011295934788430302, | |
| "loss": 3.8866, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00011289677921518865, | |
| "loss": 3.8742, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001128342105460743, | |
| "loss": 3.8854, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00011277164187695994, | |
| "loss": 3.867, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0001127090732078456, | |
| "loss": 3.877, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011264650453873125, | |
| "loss": 3.8697, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011258393586961689, | |
| "loss": 3.8574, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011252136720050255, | |
| "loss": 3.8845, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011245879853138819, | |
| "loss": 3.8626, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011239622986227384, | |
| "loss": 3.8609, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011233366119315949, | |
| "loss": 3.8632, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011227109252404513, | |
| "loss": 3.8588, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00011220852385493078, | |
| "loss": 3.8738, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00011214595518581642, | |
| "loss": 3.8531, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00011208338651670207, | |
| "loss": 3.8755, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00011202081784758771, | |
| "loss": 3.8675, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00011195824917847336, | |
| "loss": 3.8623, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.000111895680509359, | |
| "loss": 3.8766, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00011183311184024466, | |
| "loss": 3.8763, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00011177054317113032, | |
| "loss": 3.8474, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00011170797450201595, | |
| "loss": 3.8749, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001116454058329016, | |
| "loss": 3.8519, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00011158283716378724, | |
| "loss": 3.8551, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001115202684946729, | |
| "loss": 3.8674, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00011145769982555853, | |
| "loss": 3.8618, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00011139513115644418, | |
| "loss": 3.8568, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00011133256248732982, | |
| "loss": 3.8617, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00011126999381821547, | |
| "loss": 3.8634, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011120742514910114, | |
| "loss": 3.8671, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011114485647998678, | |
| "loss": 3.8641, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011108228781087243, | |
| "loss": 3.859, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011101971914175807, | |
| "loss": 3.8586, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011095715047264372, | |
| "loss": 3.8569, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011089458180352937, | |
| "loss": 3.8563, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011083201313441501, | |
| "loss": 3.8736, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011076944446530066, | |
| "loss": 3.8589, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001107068757961863, | |
| "loss": 3.8563, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00011064430712707195, | |
| "loss": 3.8621, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00011058173845795759, | |
| "loss": 3.8506, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00011051916978884325, | |
| "loss": 3.8598, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001104566011197289, | |
| "loss": 3.8423, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00011039403245061454, | |
| "loss": 3.8426, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001103314637815002, | |
| "loss": 3.8441, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00011026889511238583, | |
| "loss": 3.8521, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00011020632644327148, | |
| "loss": 3.8747, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00011014375777415712, | |
| "loss": 3.8593, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00011008118910504277, | |
| "loss": 3.8664, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00011001862043592841, | |
| "loss": 3.8536, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00010995605176681406, | |
| "loss": 3.8628, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001098934830976997, | |
| "loss": 3.8362, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00010983091442858537, | |
| "loss": 3.8512, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00010976834575947102, | |
| "loss": 3.8431, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00010970577709035666, | |
| "loss": 3.8705, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00010964320842124231, | |
| "loss": 3.8634, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00010958063975212795, | |
| "loss": 3.8493, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001095180710830136, | |
| "loss": 3.8565, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00010945550241389925, | |
| "loss": 3.8613, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00010939293374478489, | |
| "loss": 3.8443, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00010933036507567054, | |
| "loss": 3.8538, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00010926779640655618, | |
| "loss": 3.8485, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010920522773744183, | |
| "loss": 3.8492, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010914265906832748, | |
| "loss": 3.8408, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010908009039921313, | |
| "loss": 3.856, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010901752173009878, | |
| "loss": 3.8426, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010895495306098442, | |
| "loss": 3.853, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010889238439187007, | |
| "loss": 3.8405, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010882981572275571, | |
| "loss": 3.8473, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00010876724705364136, | |
| "loss": 3.858, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.000108704678384527, | |
| "loss": 3.8421, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00010864210971541265, | |
| "loss": 3.8442, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00010857954104629829, | |
| "loss": 3.8451, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00010851697237718394, | |
| "loss": 3.8518, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00010845440370806961, | |
| "loss": 3.8548, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00010839183503895525, | |
| "loss": 3.8413, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001083292663698409, | |
| "loss": 3.8245, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00010826669770072654, | |
| "loss": 3.8256, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010820412903161219, | |
| "loss": 3.8333, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010814156036249782, | |
| "loss": 3.8235, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010807899169338348, | |
| "loss": 3.8425, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010801642302426913, | |
| "loss": 3.8384, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010795385435515477, | |
| "loss": 3.839, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010789128568604042, | |
| "loss": 3.8271, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010782871701692607, | |
| "loss": 3.8388, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00010776614834781172, | |
| "loss": 3.8421, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00010770357967869737, | |
| "loss": 3.8411, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00010764101100958301, | |
| "loss": 3.8433, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00010757844234046866, | |
| "loss": 3.8453, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0001075158736713543, | |
| "loss": 3.8367, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00010745330500223995, | |
| "loss": 3.8419, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00010739073633312559, | |
| "loss": 3.8371, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00010732816766401124, | |
| "loss": 3.8296, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00010726559899489688, | |
| "loss": 3.8465, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00010720303032578253, | |
| "loss": 3.8562, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001071404616566682, | |
| "loss": 3.8395, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00010707789298755383, | |
| "loss": 3.8356, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00010701532431843949, | |
| "loss": 3.8115, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00010695275564932512, | |
| "loss": 3.8326, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00010689018698021078, | |
| "loss": 3.8343, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00010682761831109641, | |
| "loss": 3.8243, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00010676504964198207, | |
| "loss": 3.8421, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001067024809728677, | |
| "loss": 3.834, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00010663991230375336, | |
| "loss": 3.8354, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.000106577343634639, | |
| "loss": 3.8305, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00010651477496552464, | |
| "loss": 3.8433, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00010645220629641031, | |
| "loss": 3.8429, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00010638963762729595, | |
| "loss": 3.8368, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001063270689581816, | |
| "loss": 3.8596, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00010626450028906725, | |
| "loss": 3.8286, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010620193161995289, | |
| "loss": 3.8355, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010613936295083854, | |
| "loss": 3.8319, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010607679428172418, | |
| "loss": 3.8131, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010601422561260983, | |
| "loss": 3.8343, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010595165694349547, | |
| "loss": 3.8346, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010588908827438112, | |
| "loss": 3.827, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010582651960526676, | |
| "loss": 3.819, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010576395093615242, | |
| "loss": 3.8327, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010570138226703808, | |
| "loss": 3.8175, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010563881359792371, | |
| "loss": 3.827, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010557624492880936, | |
| "loss": 3.8061, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.000105513676259695, | |
| "loss": 3.8093, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010545110759058065, | |
| "loss": 3.8266, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010538853892146629, | |
| "loss": 3.8325, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010532597025235194, | |
| "loss": 3.84, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010526340158323758, | |
| "loss": 3.8243, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010520083291412323, | |
| "loss": 3.8262, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001051382642450089, | |
| "loss": 3.8159, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010507569557589454, | |
| "loss": 3.8309, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010501312690678019, | |
| "loss": 3.8159, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010495055823766583, | |
| "loss": 3.8193, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010488798956855148, | |
| "loss": 3.8276, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010482542089943713, | |
| "loss": 3.8212, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010476285223032277, | |
| "loss": 3.8248, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010470028356120842, | |
| "loss": 3.8317, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010463771489209406, | |
| "loss": 3.816, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010457514622297971, | |
| "loss": 3.8148, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010451257755386535, | |
| "loss": 3.8428, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010445000888475101, | |
| "loss": 3.8101, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010438744021563666, | |
| "loss": 3.8253, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0001043248715465223, | |
| "loss": 3.8362, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010426230287740795, | |
| "loss": 3.8227, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010419973420829359, | |
| "loss": 3.8267, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010413716553917924, | |
| "loss": 3.8239, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010407459687006488, | |
| "loss": 7.9568, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010401202820095053, | |
| "loss": 3.8325, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010394945953183617, | |
| "loss": 3.8136, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010388689086272182, | |
| "loss": 3.8197, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010382432219360747, | |
| "loss": 3.8178, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010376175352449313, | |
| "loss": 3.8233, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010369918485537878, | |
| "loss": 3.8212, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010363661618626442, | |
| "loss": 3.8264, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010357404751715007, | |
| "loss": 3.8236, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0001035114788480357, | |
| "loss": 3.8209, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010344891017892136, | |
| "loss": 3.8263, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010338634150980701, | |
| "loss": 3.8064, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010332377284069265, | |
| "loss": 3.7992, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0001032612041715783, | |
| "loss": 3.8319, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010319863550246394, | |
| "loss": 3.815, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010313606683334959, | |
| "loss": 3.8187, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010307349816423525, | |
| "loss": 3.8098, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010301092949512089, | |
| "loss": 3.822, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010294836082600654, | |
| "loss": 3.8102, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010288579215689218, | |
| "loss": 3.8287, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010282322348777783, | |
| "loss": 3.8026, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010276065481866347, | |
| "loss": 3.8152, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010269808614954912, | |
| "loss": 3.8109, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010263551748043476, | |
| "loss": 3.8145, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010257294881132041, | |
| "loss": 3.8125, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010251038014220605, | |
| "loss": 3.8163, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0001024478114730917, | |
| "loss": 3.8342, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010238524280397737, | |
| "loss": 3.8269, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.000102322674134863, | |
| "loss": 3.8059, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010226010546574866, | |
| "loss": 3.8022, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0001021975367966343, | |
| "loss": 3.8129, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010213496812751995, | |
| "loss": 3.8221, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010207239945840558, | |
| "loss": 3.8143, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010200983078929124, | |
| "loss": 3.8103, | |
| "step": 383500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010194726212017689, | |
| "loss": 3.8045, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010188469345106253, | |
| "loss": 3.7872, | |
| "step": 384500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010182212478194818, | |
| "loss": 3.8087, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010175955611283383, | |
| "loss": 3.7927, | |
| "step": 385500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010169698744371948, | |
| "loss": 3.8145, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010163441877460513, | |
| "loss": 3.8, | |
| "step": 386500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010157185010549077, | |
| "loss": 3.8151, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010150928143637642, | |
| "loss": 3.8143, | |
| "step": 387500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010144671276726206, | |
| "loss": 3.8224, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010138414409814771, | |
| "loss": 3.81, | |
| "step": 388500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010132157542903335, | |
| "loss": 3.7963, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.000101259006759919, | |
| "loss": 3.8033, | |
| "step": 389500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010119643809080464, | |
| "loss": 3.8054, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010113386942169029, | |
| "loss": 3.8188, | |
| "step": 390500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010107130075257596, | |
| "loss": 3.8104, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0001010087320834616, | |
| "loss": 3.8159, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010094616341434725, | |
| "loss": 3.8035, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010088359474523288, | |
| "loss": 3.8063, | |
| "step": 392500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010082102607611853, | |
| "loss": 3.7984, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010075845740700417, | |
| "loss": 3.8028, | |
| "step": 393500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010069588873788982, | |
| "loss": 3.8001, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010063332006877546, | |
| "loss": 3.8099, | |
| "step": 394500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010057075139966111, | |
| "loss": 3.7928, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010050818273054677, | |
| "loss": 3.7977, | |
| "step": 395500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0001004456140614324, | |
| "loss": 3.7979, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010038304539231807, | |
| "loss": 3.8007, | |
| "step": 396500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010032047672320371, | |
| "loss": 3.8017, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010025790805408936, | |
| "loss": 3.7762, | |
| "step": 397500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010019533938497501, | |
| "loss": 3.8018, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010013277071586065, | |
| "loss": 3.8004, | |
| "step": 398500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0001000702020467463, | |
| "loss": 3.7908, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010000763337763194, | |
| "loss": 3.7934, | |
| "step": 399500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 9.994506470851759e-05, | |
| "loss": 3.781, | |
| "step": 400000 | |
| } | |
| ], | |
| "max_steps": 1198683, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.3368536226947138e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |