| { | |
| "best_global_step": 500, | |
| "best_metric": 1.0007914304733276, | |
| "best_model_checkpoint": "./medgemma-finetuned-checkpoints/checkpoint-500", | |
| "epoch": 1.0810810810810811, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005405405405405405, | |
| "grad_norm": 26.92987632751465, | |
| "learning_rate": 0.0, | |
| "loss": 3.0682, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005405405405405406, | |
| "grad_norm": 5.15996789932251, | |
| "learning_rate": 1.8e-05, | |
| "loss": 2.5851, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010810810810810811, | |
| "grad_norm": 1.1928019523620605, | |
| "learning_rate": 3.8e-05, | |
| "loss": 1.3708, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.016216216216216217, | |
| "grad_norm": 0.7614617347717285, | |
| "learning_rate": 5.8e-05, | |
| "loss": 1.0732, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.021621621621621623, | |
| "grad_norm": 0.7028294801712036, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 1.0224, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02702702702702703, | |
| "grad_norm": 0.6575226783752441, | |
| "learning_rate": 9.8e-05, | |
| "loss": 0.963, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.032432432432432434, | |
| "grad_norm": 0.6642696261405945, | |
| "learning_rate": 0.000118, | |
| "loss": 0.9502, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03783783783783784, | |
| "grad_norm": 0.6854572296142578, | |
| "learning_rate": 0.000138, | |
| "loss": 0.9465, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.043243243243243246, | |
| "grad_norm": 0.755558967590332, | |
| "learning_rate": 0.00015800000000000002, | |
| "loss": 0.9392, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04864864864864865, | |
| "grad_norm": 0.6308918595314026, | |
| "learning_rate": 0.00017800000000000002, | |
| "loss": 0.9278, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 0.6420126557350159, | |
| "learning_rate": 0.00019800000000000002, | |
| "loss": 0.9147, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05945945945945946, | |
| "grad_norm": 0.7269052863121033, | |
| "learning_rate": 0.00019999691576447898, | |
| "loss": 0.9179, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06486486486486487, | |
| "grad_norm": 0.6968148350715637, | |
| "learning_rate": 0.00019998625445384374, | |
| "loss": 0.8951, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07027027027027027, | |
| "grad_norm": 0.6975257396697998, | |
| "learning_rate": 0.00019996797880281932, | |
| "loss": 0.9053, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07567567567567568, | |
| "grad_norm": 0.8497925400733948, | |
| "learning_rate": 0.0001999420902031673, | |
| "loss": 0.8878, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08108108108108109, | |
| "grad_norm": 0.7157301902770996, | |
| "learning_rate": 0.00019990859062640477, | |
| "loss": 0.8974, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08648648648648649, | |
| "grad_norm": 0.657098114490509, | |
| "learning_rate": 0.0001998674826236542, | |
| "loss": 0.8784, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0918918918918919, | |
| "grad_norm": 0.6943176984786987, | |
| "learning_rate": 0.00019981876932544917, | |
| "loss": 0.8935, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0972972972972973, | |
| "grad_norm": 0.7490524053573608, | |
| "learning_rate": 0.0001997624544414959, | |
| "loss": 0.8523, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10270270270270271, | |
| "grad_norm": 0.7332305312156677, | |
| "learning_rate": 0.00019969854226039088, | |
| "loss": 0.8589, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 0.6616835594177246, | |
| "learning_rate": 0.00019962703764929413, | |
| "loss": 0.8727, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11351351351351352, | |
| "grad_norm": 0.6945931315422058, | |
| "learning_rate": 0.00019954794605355863, | |
| "loss": 0.8255, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11891891891891893, | |
| "grad_norm": 0.7543134093284607, | |
| "learning_rate": 0.00019946127349631564, | |
| "loss": 0.8157, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12432432432432433, | |
| "grad_norm": 0.7396084070205688, | |
| "learning_rate": 0.00019936702657801587, | |
| "loss": 0.8329, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12972972972972974, | |
| "grad_norm": 0.7503929138183594, | |
| "learning_rate": 0.0001992652124759271, | |
| "loss": 0.8281, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 0.7012407779693604, | |
| "learning_rate": 0.00019915583894358744, | |
| "loss": 0.891, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14054054054054055, | |
| "grad_norm": 0.6991789937019348, | |
| "learning_rate": 0.00019903891431021477, | |
| "loss": 0.8237, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14594594594594595, | |
| "grad_norm": 0.6946726441383362, | |
| "learning_rate": 0.0001989144474800726, | |
| "loss": 0.8132, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15135135135135136, | |
| "grad_norm": 0.7045170068740845, | |
| "learning_rate": 0.00019878244793179197, | |
| "loss": 0.8231, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15675675675675677, | |
| "grad_norm": 0.7510865926742554, | |
| "learning_rate": 0.00019864292571764955, | |
| "loss": 0.8367, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 0.7087928652763367, | |
| "learning_rate": 0.00019849589146280213, | |
| "loss": 0.8024, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16756756756756758, | |
| "grad_norm": 0.7199848890304565, | |
| "learning_rate": 0.00019834135636447747, | |
| "loss": 0.8263, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.17297297297297298, | |
| "grad_norm": 0.7465829253196716, | |
| "learning_rate": 0.00019817933219112158, | |
| "loss": 0.8244, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1783783783783784, | |
| "grad_norm": 0.791543185710907, | |
| "learning_rate": 0.0001980098312815026, | |
| "loss": 0.7822, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1837837837837838, | |
| "grad_norm": 0.8022134900093079, | |
| "learning_rate": 0.00019783286654377106, | |
| "loss": 0.7901, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1891891891891892, | |
| "grad_norm": 0.7753133773803711, | |
| "learning_rate": 0.00019764845145447689, | |
| "loss": 0.818, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1945945945945946, | |
| "grad_norm": 0.729603111743927, | |
| "learning_rate": 0.00019745660005754308, | |
| "loss": 0.8011, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.7126052975654602, | |
| "learning_rate": 0.00019725732696319632, | |
| "loss": 0.7756, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.20540540540540542, | |
| "grad_norm": 0.7011649012565613, | |
| "learning_rate": 0.00019705064734685425, | |
| "loss": 0.7745, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21081081081081082, | |
| "grad_norm": 0.7712651491165161, | |
| "learning_rate": 0.00019683657694796985, | |
| "loss": 0.8, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 1.7429534196853638, | |
| "learning_rate": 0.00019661513206883287, | |
| "loss": 0.8019, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22162162162162163, | |
| "grad_norm": 0.7776182293891907, | |
| "learning_rate": 0.0001963863295733281, | |
| "loss": 0.7547, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.22702702702702704, | |
| "grad_norm": 0.748671293258667, | |
| "learning_rate": 0.0001961501868856515, | |
| "loss": 0.7754, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.23243243243243245, | |
| "grad_norm": 0.7607825994491577, | |
| "learning_rate": 0.00019590672198898295, | |
| "loss": 0.7264, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.23783783783783785, | |
| "grad_norm": 0.7991960644721985, | |
| "learning_rate": 0.000195655953424117, | |
| "loss": 0.7033, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.24324324324324326, | |
| "grad_norm": 0.777020275592804, | |
| "learning_rate": 0.0001953979002880507, | |
| "loss": 0.7713, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.24864864864864866, | |
| "grad_norm": 0.7802757024765015, | |
| "learning_rate": 0.00019513258223252948, | |
| "loss": 0.7435, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.25405405405405407, | |
| "grad_norm": 0.753276526927948, | |
| "learning_rate": 0.00019486001946255046, | |
| "loss": 0.7578, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2594594594594595, | |
| "grad_norm": 0.8009371757507324, | |
| "learning_rate": 0.0001945802327348239, | |
| "loss": 0.7306, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2648648648648649, | |
| "grad_norm": 0.8639614582061768, | |
| "learning_rate": 0.00019429324335619233, | |
| "loss": 0.7406, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.7347239851951599, | |
| "learning_rate": 0.00019399907318200802, | |
| "loss": 0.7526, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "eval_loss": 1.0007914304733276, | |
| "eval_runtime": 1132.9831, | |
| "eval_samples_per_second": 9.225, | |
| "eval_steps_per_second": 2.306, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2756756756756757, | |
| "grad_norm": 0.7618102431297302, | |
| "learning_rate": 0.0001936977446144687, | |
| "loss": 0.732, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2810810810810811, | |
| "grad_norm": 0.7626764178276062, | |
| "learning_rate": 0.00019338928060091143, | |
| "loss": 0.7637, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2864864864864865, | |
| "grad_norm": 0.7320432662963867, | |
| "learning_rate": 0.0001930737046320651, | |
| "loss": 0.7349, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2918918918918919, | |
| "grad_norm": 0.8312086462974548, | |
| "learning_rate": 0.00019275104074026152, | |
| "loss": 0.7686, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2972972972972973, | |
| "grad_norm": 0.7992410659790039, | |
| "learning_rate": 0.00019242131349760534, | |
| "loss": 0.727, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3027027027027027, | |
| "grad_norm": 0.7825806140899658, | |
| "learning_rate": 0.00019208454801410266, | |
| "loss": 0.7416, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3081081081081081, | |
| "grad_norm": 0.7664586305618286, | |
| "learning_rate": 0.00019174076993574884, | |
| "loss": 0.7572, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.31351351351351353, | |
| "grad_norm": 0.7890913486480713, | |
| "learning_rate": 0.00019139000544257558, | |
| "loss": 0.7235, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.31891891891891894, | |
| "grad_norm": 0.7417885065078735, | |
| "learning_rate": 0.00019103228124665712, | |
| "loss": 0.7293, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.32432432432432434, | |
| "grad_norm": 0.7056080102920532, | |
| "learning_rate": 0.0001906676245900759, | |
| "loss": 0.7299, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.32972972972972975, | |
| "grad_norm": 0.7992605566978455, | |
| "learning_rate": 0.00019029606324284814, | |
| "loss": 0.7445, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.33513513513513515, | |
| "grad_norm": 0.7960366606712341, | |
| "learning_rate": 0.00018991762550080906, | |
| "loss": 0.7448, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.34054054054054056, | |
| "grad_norm": 0.7352651357650757, | |
| "learning_rate": 0.0001895323401834578, | |
| "loss": 0.7246, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.34594594594594597, | |
| "grad_norm": 0.7147911190986633, | |
| "learning_rate": 0.00018914023663176306, | |
| "loss": 0.7206, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.35135135135135137, | |
| "grad_norm": 0.7172034382820129, | |
| "learning_rate": 0.00018874134470592835, | |
| "loss": 0.6924, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3567567567567568, | |
| "grad_norm": 0.715886652469635, | |
| "learning_rate": 0.00018833569478311817, | |
| "loss": 0.7051, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3621621621621622, | |
| "grad_norm": 0.6763896346092224, | |
| "learning_rate": 0.0001879233177551447, | |
| "loss": 0.7163, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3675675675675676, | |
| "grad_norm": 0.7396337389945984, | |
| "learning_rate": 0.00018750424502611527, | |
| "loss": 0.753, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.372972972972973, | |
| "grad_norm": 0.7334195971488953, | |
| "learning_rate": 0.00018707850851004058, | |
| "loss": 0.6956, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3783783783783784, | |
| "grad_norm": 0.7924448251724243, | |
| "learning_rate": 0.00018664614062840473, | |
| "loss": 0.7333, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3837837837837838, | |
| "grad_norm": 0.7402865290641785, | |
| "learning_rate": 0.00018620717430769586, | |
| "loss": 0.707, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3891891891891892, | |
| "grad_norm": 0.8460499048233032, | |
| "learning_rate": 0.00018576164297689877, | |
| "loss": 0.688, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3945945945945946, | |
| "grad_norm": 0.7490622401237488, | |
| "learning_rate": 0.00018530958056494932, | |
| "loss": 0.6789, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.8118138909339905, | |
| "learning_rate": 0.00018485102149815038, | |
| "loss": 0.7102, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 0.7157021164894104, | |
| "learning_rate": 0.00018438600069755026, | |
| "loss": 0.6946, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.41081081081081083, | |
| "grad_norm": 0.8860333561897278, | |
| "learning_rate": 0.00018391455357628334, | |
| "loss": 0.6833, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.41621621621621624, | |
| "grad_norm": 0.8259391784667969, | |
| "learning_rate": 0.00018343671603687317, | |
| "loss": 0.7003, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.42162162162162165, | |
| "grad_norm": 0.7634344100952148, | |
| "learning_rate": 0.00018295252446849842, | |
| "loss": 0.7218, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.42702702702702705, | |
| "grad_norm": 0.7924422025680542, | |
| "learning_rate": 0.00018246201574422164, | |
| "loss": 0.6759, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 0.7786385416984558, | |
| "learning_rate": 0.00018196522721818128, | |
| "loss": 0.6768, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.43783783783783786, | |
| "grad_norm": 0.8325049877166748, | |
| "learning_rate": 0.00018146219672274694, | |
| "loss": 0.6845, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.44324324324324327, | |
| "grad_norm": 0.7333595156669617, | |
| "learning_rate": 0.00018095296256563845, | |
| "loss": 0.6891, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4486486486486487, | |
| "grad_norm": 0.6770475506782532, | |
| "learning_rate": 0.00018043756352700846, | |
| "loss": 0.6923, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4540540540540541, | |
| "grad_norm": 0.746793270111084, | |
| "learning_rate": 0.0001799160388564892, | |
| "loss": 0.7027, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4594594594594595, | |
| "grad_norm": 0.8229703307151794, | |
| "learning_rate": 0.00017938842827020348, | |
| "loss": 0.73, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4648648648648649, | |
| "grad_norm": 0.8228402733802795, | |
| "learning_rate": 0.0001788547719477402, | |
| "loss": 0.6763, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.4702702702702703, | |
| "grad_norm": 0.8647485971450806, | |
| "learning_rate": 0.0001783151105290944, | |
| "loss": 0.6937, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.4756756756756757, | |
| "grad_norm": 0.7954670786857605, | |
| "learning_rate": 0.0001777694851115726, | |
| "loss": 0.7183, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4810810810810811, | |
| "grad_norm": 0.7662751078605652, | |
| "learning_rate": 0.00017721793724666268, | |
| "loss": 0.7343, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4864864864864865, | |
| "grad_norm": 0.6992731094360352, | |
| "learning_rate": 0.00017666050893687008, | |
| "loss": 0.674, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4918918918918919, | |
| "grad_norm": 0.7148111462593079, | |
| "learning_rate": 0.0001760972426325187, | |
| "loss": 0.7152, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4972972972972973, | |
| "grad_norm": 1.4267653226852417, | |
| "learning_rate": 0.00017552818122851838, | |
| "loss": 0.6574, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5027027027027027, | |
| "grad_norm": 0.7189494371414185, | |
| "learning_rate": 0.00017495336806109827, | |
| "loss": 0.6553, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5081081081081081, | |
| "grad_norm": 0.7621554136276245, | |
| "learning_rate": 0.00017437284690450654, | |
| "loss": 0.7113, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5135135135135135, | |
| "grad_norm": 0.8519230484962463, | |
| "learning_rate": 0.00017378666196767685, | |
| "loss": 0.6948, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.518918918918919, | |
| "grad_norm": 0.7450538873672485, | |
| "learning_rate": 0.00017319485789086162, | |
| "loss": 0.7074, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5243243243243243, | |
| "grad_norm": 0.7189666032791138, | |
| "learning_rate": 0.00017259747974223265, | |
| "loss": 0.662, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5297297297297298, | |
| "grad_norm": 0.7510509490966797, | |
| "learning_rate": 0.00017199457301444868, | |
| "loss": 0.6841, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5351351351351351, | |
| "grad_norm": 0.8049071431159973, | |
| "learning_rate": 0.00017138618362119137, | |
| "loss": 0.6573, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.7858609557151794, | |
| "learning_rate": 0.00017077235789366842, | |
| "loss": 0.6905, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "eval_loss": 1.0087087154388428, | |
| "eval_runtime": 1125.5024, | |
| "eval_samples_per_second": 9.287, | |
| "eval_steps_per_second": 2.322, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5459459459459459, | |
| "grad_norm": 0.7082162499427795, | |
| "learning_rate": 0.0001701531425770856, | |
| "loss": 0.6264, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5513513513513514, | |
| "grad_norm": 0.6960082054138184, | |
| "learning_rate": 0.00016952858482708656, | |
| "loss": 0.6739, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5567567567567567, | |
| "grad_norm": 0.6909856200218201, | |
| "learning_rate": 0.00016889873220616206, | |
| "loss": 0.7019, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5621621621621622, | |
| "grad_norm": 0.7821714282035828, | |
| "learning_rate": 0.00016826363268002782, | |
| "loss": 0.6896, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5675675675675675, | |
| "grad_norm": 0.7264735102653503, | |
| "learning_rate": 0.00016762333461397156, | |
| "loss": 0.6186, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.572972972972973, | |
| "grad_norm": 0.7788714170455933, | |
| "learning_rate": 0.00016697788676917007, | |
| "loss": 0.6771, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5783783783783784, | |
| "grad_norm": 0.7341744303703308, | |
| "learning_rate": 0.00016632733829897566, | |
| "loss": 0.6633, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5837837837837838, | |
| "grad_norm": 0.7561785578727722, | |
| "learning_rate": 0.00016567173874517307, | |
| "loss": 0.6771, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5891891891891892, | |
| "grad_norm": 0.777637243270874, | |
| "learning_rate": 0.00016501113803420658, | |
| "loss": 0.6717, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5945945945945946, | |
| "grad_norm": 0.775454044342041, | |
| "learning_rate": 0.0001643455864733779, | |
| "loss": 0.6573, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.7935672402381897, | |
| "learning_rate": 0.0001636751347470152, | |
| "loss": 0.65, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6054054054054054, | |
| "grad_norm": 0.6799283623695374, | |
| "learning_rate": 0.00016299983391261324, | |
| "loss": 0.6433, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6108108108108108, | |
| "grad_norm": 0.7489930987358093, | |
| "learning_rate": 0.00016231973539694504, | |
| "loss": 0.656, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6162162162162163, | |
| "grad_norm": 0.8089328408241272, | |
| "learning_rate": 0.0001616348909921457, | |
| "loss": 0.6679, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6216216216216216, | |
| "grad_norm": 0.769966185092926, | |
| "learning_rate": 0.00016094535285176813, | |
| "loss": 0.654, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6270270270270271, | |
| "grad_norm": 0.9080989360809326, | |
| "learning_rate": 0.00016025117348681132, | |
| "loss": 0.6612, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6324324324324324, | |
| "grad_norm": 0.7278012633323669, | |
| "learning_rate": 0.00015955240576172165, | |
| "loss": 0.6392, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6378378378378379, | |
| "grad_norm": 0.7645131945610046, | |
| "learning_rate": 0.0001588491028903667, | |
| "loss": 0.6899, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6432432432432432, | |
| "grad_norm": 0.7604426741600037, | |
| "learning_rate": 0.00015814131843198308, | |
| "loss": 0.6567, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 0.738106369972229, | |
| "learning_rate": 0.00015742910628709756, | |
| "loss": 0.6641, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.654054054054054, | |
| "grad_norm": 0.7150177359580994, | |
| "learning_rate": 0.00015671252069342247, | |
| "loss": 0.6813, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6594594594594595, | |
| "grad_norm": 0.7188438773155212, | |
| "learning_rate": 0.00015599161622172517, | |
| "loss": 0.6387, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6648648648648648, | |
| "grad_norm": 0.7173952460289001, | |
| "learning_rate": 0.00015526644777167219, | |
| "loss": 0.6722, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6702702702702703, | |
| "grad_norm": 0.8392152786254883, | |
| "learning_rate": 0.00015453707056764862, | |
| "loss": 0.6926, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 0.8548230528831482, | |
| "learning_rate": 0.0001538035401545525, | |
| "loss": 0.6296, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6810810810810811, | |
| "grad_norm": 0.7427430748939514, | |
| "learning_rate": 0.00015306591239356475, | |
| "loss": 0.6508, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6864864864864865, | |
| "grad_norm": 0.7263091206550598, | |
| "learning_rate": 0.0001523242434578952, | |
| "loss": 0.6528, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6918918918918919, | |
| "grad_norm": 0.7919740080833435, | |
| "learning_rate": 0.00015157858982850475, | |
| "loss": 0.638, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6972972972972973, | |
| "grad_norm": 0.720586359500885, | |
| "learning_rate": 0.00015082900828980423, | |
| "loss": 0.667, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7027027027027027, | |
| "grad_norm": 0.9061957001686096, | |
| "learning_rate": 0.00015007555592532997, | |
| "loss": 0.6308, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7081081081081081, | |
| "grad_norm": 0.6996462345123291, | |
| "learning_rate": 0.00014931829011339659, | |
| "loss": 0.6463, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7135135135135136, | |
| "grad_norm": 0.7758413553237915, | |
| "learning_rate": 0.00014855726852272753, | |
| "loss": 0.6184, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7189189189189189, | |
| "grad_norm": 0.6599385142326355, | |
| "learning_rate": 0.00014779254910806335, | |
| "loss": 0.6329, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.7243243243243244, | |
| "grad_norm": 0.7816442251205444, | |
| "learning_rate": 0.00014702419010574825, | |
| "loss": 0.6696, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7297297297297297, | |
| "grad_norm": 0.8203967213630676, | |
| "learning_rate": 0.00014625225002929502, | |
| "loss": 0.6835, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7351351351351352, | |
| "grad_norm": 0.8271581530570984, | |
| "learning_rate": 0.00014547678766492917, | |
| "loss": 0.6574, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7405405405405405, | |
| "grad_norm": 0.708739697933197, | |
| "learning_rate": 0.00014469786206711214, | |
| "loss": 0.6094, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.745945945945946, | |
| "grad_norm": 0.6651840209960938, | |
| "learning_rate": 0.00014391553255404385, | |
| "loss": 0.6615, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7513513513513513, | |
| "grad_norm": 0.6395004987716675, | |
| "learning_rate": 0.00014312985870314568, | |
| "loss": 0.6278, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7567567567567568, | |
| "grad_norm": 0.7492053508758545, | |
| "learning_rate": 0.00014234090034652324, | |
| "loss": 0.6139, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7621621621621621, | |
| "grad_norm": 0.7917840480804443, | |
| "learning_rate": 0.00014154871756640996, | |
| "loss": 0.6471, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7675675675675676, | |
| "grad_norm": 0.7364196181297302, | |
| "learning_rate": 0.00014075337069059158, | |
| "loss": 0.6409, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.772972972972973, | |
| "grad_norm": 0.7311397194862366, | |
| "learning_rate": 0.00013995492028781202, | |
| "loss": 0.6093, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7783783783783784, | |
| "grad_norm": 0.8387266397476196, | |
| "learning_rate": 0.00013915342716316076, | |
| "loss": 0.6334, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7837837837837838, | |
| "grad_norm": 0.7672157883644104, | |
| "learning_rate": 0.00013834895235344242, | |
| "loss": 0.6243, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7891891891891892, | |
| "grad_norm": 0.7562130093574524, | |
| "learning_rate": 0.00013754155712252832, | |
| "loss": 0.6561, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7945945945945946, | |
| "grad_norm": 0.7368100881576538, | |
| "learning_rate": 0.0001367313029566913, | |
| "loss": 0.6359, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.7335553765296936, | |
| "learning_rate": 0.0001359182515599231, | |
| "loss": 0.618, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.8054054054054054, | |
| "grad_norm": 0.8272745013237, | |
| "learning_rate": 0.00013510246484923547, | |
| "loss": 0.6431, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.9075089693069458, | |
| "learning_rate": 0.00013428400494994484, | |
| "loss": 0.642, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "eval_loss": 1.1368237733840942, | |
| "eval_runtime": 1125.1843, | |
| "eval_samples_per_second": 9.289, | |
| "eval_steps_per_second": 2.322, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8162162162162162, | |
| "grad_norm": 0.7253302931785583, | |
| "learning_rate": 0.00013346293419094134, | |
| "loss": 0.6315, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.8216216216216217, | |
| "grad_norm": 0.8258981704711914, | |
| "learning_rate": 0.0001326393150999422, | |
| "loss": 0.5844, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.827027027027027, | |
| "grad_norm": 0.7383174896240234, | |
| "learning_rate": 0.00013181321039872993, | |
| "loss": 0.6457, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8324324324324325, | |
| "grad_norm": 0.8157215714454651, | |
| "learning_rate": 0.000130984682998376, | |
| "loss": 0.6183, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.8378378378378378, | |
| "grad_norm": 0.7327470183372498, | |
| "learning_rate": 0.00013015379599444957, | |
| "loss": 0.623, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8432432432432433, | |
| "grad_norm": 0.6934810280799866, | |
| "learning_rate": 0.00012932061266221305, | |
| "loss": 0.6548, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8486486486486486, | |
| "grad_norm": 0.8231585025787354, | |
| "learning_rate": 0.00012848519645180295, | |
| "loss": 0.5803, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8540540540540541, | |
| "grad_norm": 0.6958197355270386, | |
| "learning_rate": 0.0001276476109833981, | |
| "loss": 0.6392, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8594594594594595, | |
| "grad_norm": 0.7134739756584167, | |
| "learning_rate": 0.00012680792004237477, | |
| "loss": 0.6153, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8648648648648649, | |
| "grad_norm": 0.736421525478363, | |
| "learning_rate": 0.00012596618757444917, | |
| "loss": 0.5727, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8702702702702703, | |
| "grad_norm": 0.8574367761611938, | |
| "learning_rate": 0.00012512247768080756, | |
| "loss": 0.6177, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8756756756756757, | |
| "grad_norm": 0.7083766460418701, | |
| "learning_rate": 0.00012427685461322496, | |
| "loss": 0.6445, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8810810810810811, | |
| "grad_norm": 0.7544513940811157, | |
| "learning_rate": 0.00012342938276917187, | |
| "loss": 0.6136, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8864864864864865, | |
| "grad_norm": 0.6936790347099304, | |
| "learning_rate": 0.0001225801266869104, | |
| "loss": 0.5966, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8918918918918919, | |
| "grad_norm": 0.8453409075737, | |
| "learning_rate": 0.00012172915104057919, | |
| "loss": 0.5977, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8972972972972973, | |
| "grad_norm": 0.6956205368041992, | |
| "learning_rate": 0.00012087652063526838, | |
| "loss": 0.6106, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.9027027027027027, | |
| "grad_norm": 0.7600128054618835, | |
| "learning_rate": 0.00012002230040208447, | |
| "loss": 0.6296, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.9081081081081082, | |
| "grad_norm": 0.6999400854110718, | |
| "learning_rate": 0.00011916655539320547, | |
| "loss": 0.5919, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.9135135135135135, | |
| "grad_norm": 0.7615451812744141, | |
| "learning_rate": 0.00011830935077692695, | |
| "loss": 0.6066, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.918918918918919, | |
| "grad_norm": 0.7937692999839783, | |
| "learning_rate": 0.0001174507518326992, | |
| "loss": 0.6108, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9243243243243243, | |
| "grad_norm": 0.6862788796424866, | |
| "learning_rate": 0.00011659082394615607, | |
| "loss": 0.609, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.9297297297297298, | |
| "grad_norm": 0.7376629710197449, | |
| "learning_rate": 0.00011572963260413547, | |
| "loss": 0.6384, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.9351351351351351, | |
| "grad_norm": 0.6539621949195862, | |
| "learning_rate": 0.00011486724338969232, | |
| "loss": 0.5801, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9405405405405406, | |
| "grad_norm": 0.7667025923728943, | |
| "learning_rate": 0.00011400372197710414, | |
| "loss": 0.585, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 0.7780829071998596, | |
| "learning_rate": 0.00011313913412686981, | |
| "loss": 0.6193, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9513513513513514, | |
| "grad_norm": 0.9730708003044128, | |
| "learning_rate": 0.0001122735456807015, | |
| "loss": 0.5813, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9567567567567568, | |
| "grad_norm": 0.8667449355125427, | |
| "learning_rate": 0.00011140702255651063, | |
| "loss": 0.6694, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9621621621621622, | |
| "grad_norm": 0.7646722197532654, | |
| "learning_rate": 0.00011053963074338797, | |
| "loss": 0.6237, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9675675675675676, | |
| "grad_norm": 0.7633317112922668, | |
| "learning_rate": 0.00010967143629657842, | |
| "loss": 0.5807, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.972972972972973, | |
| "grad_norm": 0.7953817844390869, | |
| "learning_rate": 0.00010880250533245038, | |
| "loss": 0.5905, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9783783783783784, | |
| "grad_norm": 0.6951556205749512, | |
| "learning_rate": 0.00010793290402346094, | |
| "loss": 0.5662, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.9837837837837838, | |
| "grad_norm": 0.7796500325202942, | |
| "learning_rate": 0.00010706269859311669, | |
| "loss": 0.6131, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9891891891891892, | |
| "grad_norm": 0.8852221369743347, | |
| "learning_rate": 0.00010619195531093017, | |
| "loss": 0.5827, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9945945945945946, | |
| "grad_norm": 0.7465667128562927, | |
| "learning_rate": 0.00010532074048737364, | |
| "loss": 0.6013, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.85235995054245, | |
| "learning_rate": 0.00010444912046882888, | |
| "loss": 0.5973, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.0054054054054054, | |
| "grad_norm": 0.780853807926178, | |
| "learning_rate": 0.00010357716163253497, | |
| "loss": 0.4934, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.0108108108108107, | |
| "grad_norm": 0.7970598340034485, | |
| "learning_rate": 0.00010270493038153319, | |
| "loss": 0.5365, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.0162162162162163, | |
| "grad_norm": 0.7823016047477722, | |
| "learning_rate": 0.0001018324931396103, | |
| "loss": 0.4971, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.0216216216216216, | |
| "grad_norm": 0.7091385722160339, | |
| "learning_rate": 0.00010095991634624, | |
| "loss": 0.5203, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.027027027027027, | |
| "grad_norm": 0.8729794025421143, | |
| "learning_rate": 0.00010008726645152353, | |
| "loss": 0.4914, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.0324324324324325, | |
| "grad_norm": 0.7777389883995056, | |
| "learning_rate": 9.921460991112891e-05, | |
| "loss": 0.53, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.037837837837838, | |
| "grad_norm": 0.674041211605072, | |
| "learning_rate": 9.834201318123025e-05, | |
| "loss": 0.4955, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.0432432432432432, | |
| "grad_norm": 0.7665605545043945, | |
| "learning_rate": 9.746954271344703e-05, | |
| "loss": 0.5419, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.0486486486486486, | |
| "grad_norm": 0.7776033282279968, | |
| "learning_rate": 9.659726494978325e-05, | |
| "loss": 0.5131, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.054054054054054, | |
| "grad_norm": 0.7598256468772888, | |
| "learning_rate": 9.572524631756778e-05, | |
| "loss": 0.523, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.0594594594594595, | |
| "grad_norm": 0.7463747262954712, | |
| "learning_rate": 9.48535532243956e-05, | |
| "loss": 0.5088, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.0648648648648649, | |
| "grad_norm": 0.7281492948532104, | |
| "learning_rate": 9.398225205307066e-05, | |
| "loss": 0.5112, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.0702702702702702, | |
| "grad_norm": 0.7457937002182007, | |
| "learning_rate": 9.311140915655054e-05, | |
| "loss": 0.5642, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.0756756756756758, | |
| "grad_norm": 0.775947093963623, | |
| "learning_rate": 9.224109085289343e-05, | |
| "loss": 0.5331, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 0.7909854650497437, | |
| "learning_rate": 9.137136342020768e-05, | |
| "loss": 0.5022, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "eval_loss": 1.1171799898147583, | |
| "eval_runtime": 1127.8981, | |
| "eval_samples_per_second": 9.267, | |
| "eval_steps_per_second": 2.317, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3700, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.802421158143263e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |