| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998451612903225, | |
| "eval_steps": 500, | |
| "global_step": 4843, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002064516129032258, | |
| "grad_norm": 2.7178148753284264, | |
| "learning_rate": 2.0618556701030925e-08, | |
| "loss": 0.4276, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004129032258064516, | |
| "grad_norm": 2.5625384878679336, | |
| "learning_rate": 4.123711340206185e-08, | |
| "loss": 0.4189, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.006193548387096774, | |
| "grad_norm": 2.267409247602629, | |
| "learning_rate": 6.185567010309278e-08, | |
| "loss": 0.4065, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.008258064516129033, | |
| "grad_norm": 1.9532754064444633, | |
| "learning_rate": 8.24742268041237e-08, | |
| "loss": 0.3945, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01032258064516129, | |
| "grad_norm": 1.585906241951902, | |
| "learning_rate": 1.0309278350515462e-07, | |
| "loss": 0.376, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.012387096774193548, | |
| "grad_norm": 1.2402093228097, | |
| "learning_rate": 1.2371134020618556e-07, | |
| "loss": 0.3589, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.014451612903225806, | |
| "grad_norm": 1.0130242074966187, | |
| "learning_rate": 1.4432989690721648e-07, | |
| "loss": 0.3489, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.016516129032258065, | |
| "grad_norm": 0.8517244513870358, | |
| "learning_rate": 1.649484536082474e-07, | |
| "loss": 0.3286, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.01858064516129032, | |
| "grad_norm": 0.6911175913558477, | |
| "learning_rate": 1.8556701030927835e-07, | |
| "loss": 0.3177, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02064516129032258, | |
| "grad_norm": 0.604894441447739, | |
| "learning_rate": 2.0618556701030925e-07, | |
| "loss": 0.3146, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02270967741935484, | |
| "grad_norm": 0.5731491989954443, | |
| "learning_rate": 2.268041237113402e-07, | |
| "loss": 0.3156, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.024774193548387096, | |
| "grad_norm": 0.5622949683109659, | |
| "learning_rate": 2.474226804123711e-07, | |
| "loss": 0.3053, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.026838709677419356, | |
| "grad_norm": 0.5173328385018767, | |
| "learning_rate": 2.6804123711340204e-07, | |
| "loss": 0.3021, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02890322580645161, | |
| "grad_norm": 0.502383112182359, | |
| "learning_rate": 2.8865979381443296e-07, | |
| "loss": 0.3024, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03096774193548387, | |
| "grad_norm": 0.5056646956646865, | |
| "learning_rate": 3.0927835051546394e-07, | |
| "loss": 0.299, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03303225806451613, | |
| "grad_norm": 0.48413086309965675, | |
| "learning_rate": 3.298969072164948e-07, | |
| "loss": 0.2947, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03509677419354839, | |
| "grad_norm": 0.46867700011688224, | |
| "learning_rate": 3.5051546391752573e-07, | |
| "loss": 0.295, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03716129032258064, | |
| "grad_norm": 0.463440589569122, | |
| "learning_rate": 3.711340206185567e-07, | |
| "loss": 0.2938, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0392258064516129, | |
| "grad_norm": 0.46043148110607285, | |
| "learning_rate": 3.917525773195876e-07, | |
| "loss": 0.2829, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04129032258064516, | |
| "grad_norm": 0.4452081751770784, | |
| "learning_rate": 4.123711340206185e-07, | |
| "loss": 0.2914, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04335483870967742, | |
| "grad_norm": 0.4389480699132875, | |
| "learning_rate": 4.3298969072164947e-07, | |
| "loss": 0.2875, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04541935483870968, | |
| "grad_norm": 0.44451224796794, | |
| "learning_rate": 4.536082474226804e-07, | |
| "loss": 0.2891, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04748387096774193, | |
| "grad_norm": 0.44134552800104987, | |
| "learning_rate": 4.742268041237113e-07, | |
| "loss": 0.2824, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04954838709677419, | |
| "grad_norm": 0.4447117928290257, | |
| "learning_rate": 4.948453608247422e-07, | |
| "loss": 0.2833, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05161290322580645, | |
| "grad_norm": 0.42778123230774645, | |
| "learning_rate": 5.154639175257731e-07, | |
| "loss": 0.288, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05367741935483871, | |
| "grad_norm": 0.4310604168679218, | |
| "learning_rate": 5.360824742268041e-07, | |
| "loss": 0.2848, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05574193548387097, | |
| "grad_norm": 0.44369498025891935, | |
| "learning_rate": 5.56701030927835e-07, | |
| "loss": 0.2781, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05780645161290322, | |
| "grad_norm": 0.4352776941276996, | |
| "learning_rate": 5.773195876288659e-07, | |
| "loss": 0.2765, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05987096774193548, | |
| "grad_norm": 0.43125432618608434, | |
| "learning_rate": 5.979381443298969e-07, | |
| "loss": 0.2868, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.06193548387096774, | |
| "grad_norm": 0.4088432228941457, | |
| "learning_rate": 6.185567010309279e-07, | |
| "loss": 0.2767, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.40838698266390966, | |
| "learning_rate": 6.391752577319586e-07, | |
| "loss": 0.2805, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06606451612903226, | |
| "grad_norm": 0.450287079898162, | |
| "learning_rate": 6.597938144329896e-07, | |
| "loss": 0.2861, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06812903225806452, | |
| "grad_norm": 0.44464718825256144, | |
| "learning_rate": 6.804123711340206e-07, | |
| "loss": 0.2806, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.07019354838709678, | |
| "grad_norm": 0.4173477445677872, | |
| "learning_rate": 7.010309278350515e-07, | |
| "loss": 0.2775, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.07225806451612904, | |
| "grad_norm": 0.4247873010801217, | |
| "learning_rate": 7.216494845360824e-07, | |
| "loss": 0.2841, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07432258064516128, | |
| "grad_norm": 0.42839718840105573, | |
| "learning_rate": 7.422680412371134e-07, | |
| "loss": 0.2805, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07638709677419354, | |
| "grad_norm": 0.43388241770267927, | |
| "learning_rate": 7.628865979381443e-07, | |
| "loss": 0.2716, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.0784516129032258, | |
| "grad_norm": 0.4326840727745175, | |
| "learning_rate": 7.835051546391752e-07, | |
| "loss": 0.2791, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.08051612903225806, | |
| "grad_norm": 0.4446793301639306, | |
| "learning_rate": 8.041237113402062e-07, | |
| "loss": 0.2754, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.08258064516129032, | |
| "grad_norm": 0.4088616982234986, | |
| "learning_rate": 8.24742268041237e-07, | |
| "loss": 0.2776, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08464516129032258, | |
| "grad_norm": 0.4305234378372906, | |
| "learning_rate": 8.45360824742268e-07, | |
| "loss": 0.275, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08670967741935484, | |
| "grad_norm": 0.4178542921488476, | |
| "learning_rate": 8.659793814432989e-07, | |
| "loss": 0.2712, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.0887741935483871, | |
| "grad_norm": 0.4295628808236906, | |
| "learning_rate": 8.865979381443298e-07, | |
| "loss": 0.2783, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.09083870967741936, | |
| "grad_norm": 0.42059850195120757, | |
| "learning_rate": 9.072164948453608e-07, | |
| "loss": 0.2778, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.09290322580645162, | |
| "grad_norm": 0.41033574022465774, | |
| "learning_rate": 9.278350515463918e-07, | |
| "loss": 0.2791, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09496774193548387, | |
| "grad_norm": 0.44298494519621506, | |
| "learning_rate": 9.484536082474226e-07, | |
| "loss": 0.2791, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.09703225806451612, | |
| "grad_norm": 0.4272735948398401, | |
| "learning_rate": 9.690721649484535e-07, | |
| "loss": 0.277, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.09909677419354838, | |
| "grad_norm": 0.4425115953914259, | |
| "learning_rate": 9.896907216494845e-07, | |
| "loss": 0.2718, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.10116129032258064, | |
| "grad_norm": 0.43188220880067546, | |
| "learning_rate": 9.999967520836107e-07, | |
| "loss": 0.2747, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.1032258064516129, | |
| "grad_norm": 0.45764678748045984, | |
| "learning_rate": 9.999707690056706e-07, | |
| "loss": 0.2829, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10529032258064516, | |
| "grad_norm": 0.4165688525515349, | |
| "learning_rate": 9.999188042000401e-07, | |
| "loss": 0.2766, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.10735483870967742, | |
| "grad_norm": 0.3946488524350012, | |
| "learning_rate": 9.998408603671476e-07, | |
| "loss": 0.2777, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.10941935483870968, | |
| "grad_norm": 0.4371786129352002, | |
| "learning_rate": 9.997369415574612e-07, | |
| "loss": 0.2698, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.11148387096774194, | |
| "grad_norm": 0.42687492357393675, | |
| "learning_rate": 9.996070531712766e-07, | |
| "loss": 0.272, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1135483870967742, | |
| "grad_norm": 0.4388895511669077, | |
| "learning_rate": 9.99451201958438e-07, | |
| "loss": 0.2752, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.11561290322580645, | |
| "grad_norm": 0.4078057120806041, | |
| "learning_rate": 9.992693960179864e-07, | |
| "loss": 0.2714, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1176774193548387, | |
| "grad_norm": 0.43339623355818996, | |
| "learning_rate": 9.99061644797739e-07, | |
| "loss": 0.2794, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.11974193548387096, | |
| "grad_norm": 0.417329955629982, | |
| "learning_rate": 9.988279590937983e-07, | |
| "loss": 0.2748, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.12180645161290322, | |
| "grad_norm": 0.40923444925031505, | |
| "learning_rate": 9.985683510499907e-07, | |
| "loss": 0.2689, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.12387096774193548, | |
| "grad_norm": 0.42272165886149954, | |
| "learning_rate": 9.982828341572362e-07, | |
| "loss": 0.2728, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12593548387096773, | |
| "grad_norm": 0.40035391344182425, | |
| "learning_rate": 9.979714232528463e-07, | |
| "loss": 0.2676, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.41930822994047545, | |
| "learning_rate": 9.97634134519754e-07, | |
| "loss": 0.2754, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.13006451612903225, | |
| "grad_norm": 0.41319545992748746, | |
| "learning_rate": 9.972709854856716e-07, | |
| "loss": 0.2747, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.13212903225806452, | |
| "grad_norm": 0.42917202090046985, | |
| "learning_rate": 9.968819950221812e-07, | |
| "loss": 0.2665, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.13419354838709677, | |
| "grad_norm": 0.41071088182465415, | |
| "learning_rate": 9.964671833437533e-07, | |
| "loss": 0.2735, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.13625806451612904, | |
| "grad_norm": 0.42770202579067185, | |
| "learning_rate": 9.960265720066961e-07, | |
| "loss": 0.2655, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.13832258064516129, | |
| "grad_norm": 0.43110284554769446, | |
| "learning_rate": 9.95560183908036e-07, | |
| "loss": 0.2676, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.14038709677419356, | |
| "grad_norm": 0.4085540494229492, | |
| "learning_rate": 9.950680432843267e-07, | |
| "loss": 0.2711, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1424516129032258, | |
| "grad_norm": 0.4399277307424629, | |
| "learning_rate": 9.94550175710391e-07, | |
| "loss": 0.2714, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.14451612903225808, | |
| "grad_norm": 0.4022098269924505, | |
| "learning_rate": 9.940066080979909e-07, | |
| "loss": 0.2692, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14658064516129032, | |
| "grad_norm": 0.4138221392891085, | |
| "learning_rate": 9.934373686944286e-07, | |
| "loss": 0.2706, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.14864516129032257, | |
| "grad_norm": 0.3973539996922592, | |
| "learning_rate": 9.928424870810804e-07, | |
| "loss": 0.2678, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.15070967741935484, | |
| "grad_norm": 0.43845088028338314, | |
| "learning_rate": 9.922219941718576e-07, | |
| "loss": 0.2686, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1527741935483871, | |
| "grad_norm": 0.4127024501370914, | |
| "learning_rate": 9.915759222116008e-07, | |
| "loss": 0.2714, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.15483870967741936, | |
| "grad_norm": 0.4164519302523563, | |
| "learning_rate": 9.909043047744044e-07, | |
| "loss": 0.2761, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1569032258064516, | |
| "grad_norm": 0.41330928847821996, | |
| "learning_rate": 9.902071767618715e-07, | |
| "loss": 0.27, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.15896774193548388, | |
| "grad_norm": 0.4049309153933818, | |
| "learning_rate": 9.894845744013002e-07, | |
| "loss": 0.2738, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.16103225806451613, | |
| "grad_norm": 0.4074346893357789, | |
| "learning_rate": 9.887365352438015e-07, | |
| "loss": 0.2662, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1630967741935484, | |
| "grad_norm": 0.43051997768812966, | |
| "learning_rate": 9.879630981623475e-07, | |
| "loss": 0.2762, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.16516129032258065, | |
| "grad_norm": 0.4649279575587385, | |
| "learning_rate": 9.871643033497512e-07, | |
| "loss": 0.2711, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1672258064516129, | |
| "grad_norm": 0.40771616358626656, | |
| "learning_rate": 9.863401923165778e-07, | |
| "loss": 0.2712, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.16929032258064516, | |
| "grad_norm": 0.43542803830446203, | |
| "learning_rate": 9.854908078889881e-07, | |
| "loss": 0.2696, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.1713548387096774, | |
| "grad_norm": 0.41224099984054274, | |
| "learning_rate": 9.846161942065123e-07, | |
| "loss": 0.27, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.17341935483870968, | |
| "grad_norm": 0.40015212561755775, | |
| "learning_rate": 9.837163967197567e-07, | |
| "loss": 0.2697, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.17548387096774193, | |
| "grad_norm": 0.4145538327172374, | |
| "learning_rate": 9.827914621880412e-07, | |
| "loss": 0.2719, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1775483870967742, | |
| "grad_norm": 0.4343877153779256, | |
| "learning_rate": 9.818414386769702e-07, | |
| "loss": 0.2724, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.17961290322580645, | |
| "grad_norm": 0.417050446508891, | |
| "learning_rate": 9.808663755559345e-07, | |
| "loss": 0.2685, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.18167741935483872, | |
| "grad_norm": 0.38732244862938475, | |
| "learning_rate": 9.798663234955452e-07, | |
| "loss": 0.2712, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.18374193548387097, | |
| "grad_norm": 0.4004094471829026, | |
| "learning_rate": 9.788413344650013e-07, | |
| "loss": 0.2685, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.18580645161290324, | |
| "grad_norm": 0.43063415997314575, | |
| "learning_rate": 9.777914617293884e-07, | |
| "loss": 0.2625, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.18787096774193549, | |
| "grad_norm": 0.420456518146896, | |
| "learning_rate": 9.767167598469108e-07, | |
| "loss": 0.2697, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.18993548387096773, | |
| "grad_norm": 0.40480539094398127, | |
| "learning_rate": 9.756172846660576e-07, | |
| "loss": 0.2707, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.40767383426144205, | |
| "learning_rate": 9.744930933226977e-07, | |
| "loss": 0.2725, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.19406451612903225, | |
| "grad_norm": 0.40584494188825865, | |
| "learning_rate": 9.73344244237114e-07, | |
| "loss": 0.27, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.19612903225806452, | |
| "grad_norm": 0.3945980426880921, | |
| "learning_rate": 9.721707971109645e-07, | |
| "loss": 0.2677, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.19819354838709677, | |
| "grad_norm": 0.4092447523280703, | |
| "learning_rate": 9.709728129241817e-07, | |
| "loss": 0.2644, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.20025806451612904, | |
| "grad_norm": 0.4269916844465542, | |
| "learning_rate": 9.69750353931803e-07, | |
| "loss": 0.2666, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.2023225806451613, | |
| "grad_norm": 0.4269986054049296, | |
| "learning_rate": 9.685034836607358e-07, | |
| "loss": 0.274, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.20438709677419356, | |
| "grad_norm": 0.39811569845801287, | |
| "learning_rate": 9.672322669064552e-07, | |
| "loss": 0.2713, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2064516129032258, | |
| "grad_norm": 0.4061545807740594, | |
| "learning_rate": 9.659367697296393e-07, | |
| "loss": 0.2703, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.20851612903225805, | |
| "grad_norm": 0.4275991643602954, | |
| "learning_rate": 9.646170594527327e-07, | |
| "loss": 0.2667, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.21058064516129033, | |
| "grad_norm": 0.41304510032617703, | |
| "learning_rate": 9.632732046564517e-07, | |
| "loss": 0.2783, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.21264516129032257, | |
| "grad_norm": 0.4321186888978969, | |
| "learning_rate": 9.619052751762172e-07, | |
| "loss": 0.2697, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.21470967741935484, | |
| "grad_norm": 0.4143847837898666, | |
| "learning_rate": 9.605133420985276e-07, | |
| "loss": 0.2646, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2167741935483871, | |
| "grad_norm": 0.39735153219116426, | |
| "learning_rate": 9.590974777572643e-07, | |
| "loss": 0.2643, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.21883870967741936, | |
| "grad_norm": 0.4471117773040206, | |
| "learning_rate": 9.576577557299323e-07, | |
| "loss": 0.2684, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2209032258064516, | |
| "grad_norm": 0.41005490851887794, | |
| "learning_rate": 9.561942508338368e-07, | |
| "loss": 0.2724, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.22296774193548388, | |
| "grad_norm": 0.39495815975173343, | |
| "learning_rate": 9.547070391221953e-07, | |
| "loss": 0.2672, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.22503225806451613, | |
| "grad_norm": 0.41286773837253543, | |
| "learning_rate": 9.531961978801855e-07, | |
| "loss": 0.2736, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.2270967741935484, | |
| "grad_norm": 0.40880461152364334, | |
| "learning_rate": 9.516618056209291e-07, | |
| "loss": 0.2629, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.22916129032258065, | |
| "grad_norm": 0.4082631780670623, | |
| "learning_rate": 9.501039420814111e-07, | |
| "loss": 0.2697, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.2312258064516129, | |
| "grad_norm": 0.42142149331893225, | |
| "learning_rate": 9.485226882183372e-07, | |
| "loss": 0.2726, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.23329032258064517, | |
| "grad_norm": 0.4228180963201584, | |
| "learning_rate": 9.469181262039256e-07, | |
| "loss": 0.2735, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.2353548387096774, | |
| "grad_norm": 0.4068752038231123, | |
| "learning_rate": 9.45290339421638e-07, | |
| "loss": 0.2633, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.23741935483870968, | |
| "grad_norm": 0.4525962035081602, | |
| "learning_rate": 9.436394124618454e-07, | |
| "loss": 0.2756, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.23948387096774193, | |
| "grad_norm": 0.4317406005985711, | |
| "learning_rate": 9.419654311174329e-07, | |
| "loss": 0.2649, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.2415483870967742, | |
| "grad_norm": 0.40937736054209056, | |
| "learning_rate": 9.40268482379341e-07, | |
| "loss": 0.2726, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.24361290322580645, | |
| "grad_norm": 0.4032924973019577, | |
| "learning_rate": 9.385486544320451e-07, | |
| "loss": 0.2682, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.24567741935483872, | |
| "grad_norm": 0.41250674679202737, | |
| "learning_rate": 9.368060366489732e-07, | |
| "loss": 0.2636, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.24774193548387097, | |
| "grad_norm": 0.4110118676412479, | |
| "learning_rate": 9.350407195878607e-07, | |
| "loss": 0.2692, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2498064516129032, | |
| "grad_norm": 0.4071563287492707, | |
| "learning_rate": 9.332527949860451e-07, | |
| "loss": 0.2721, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.25187096774193546, | |
| "grad_norm": 0.4207844628857985, | |
| "learning_rate": 9.314423557556986e-07, | |
| "loss": 0.2707, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.25393548387096776, | |
| "grad_norm": 0.39530786208260144, | |
| "learning_rate": 9.296094959789994e-07, | |
| "loss": 0.2625, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.41291391579777315, | |
| "learning_rate": 9.277543109032433e-07, | |
| "loss": 0.268, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.25806451612903225, | |
| "grad_norm": 0.4069311969256028, | |
| "learning_rate": 9.258768969358933e-07, | |
| "loss": 0.2691, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2601290322580645, | |
| "grad_norm": 0.4266996746716126, | |
| "learning_rate": 9.2397735163957e-07, | |
| "loss": 0.264, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.2621935483870968, | |
| "grad_norm": 0.4168110125071047, | |
| "learning_rate": 9.220557737269816e-07, | |
| "loss": 0.2671, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.26425806451612904, | |
| "grad_norm": 0.402938966442535, | |
| "learning_rate": 9.201122630557943e-07, | |
| "loss": 0.2721, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.2663225806451613, | |
| "grad_norm": 0.4433703754564369, | |
| "learning_rate": 9.181469206234422e-07, | |
| "loss": 0.2724, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.26838709677419353, | |
| "grad_norm": 0.4146953645023786, | |
| "learning_rate": 9.161598485618803e-07, | |
| "loss": 0.2686, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2704516129032258, | |
| "grad_norm": 0.40920394467675797, | |
| "learning_rate": 9.141511501322758e-07, | |
| "loss": 0.2654, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.2725161290322581, | |
| "grad_norm": 0.4291563961337551, | |
| "learning_rate": 9.121209297196424e-07, | |
| "loss": 0.2698, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.2745806451612903, | |
| "grad_norm": 0.4197741898993113, | |
| "learning_rate": 9.100692928274158e-07, | |
| "loss": 0.2678, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.27664516129032257, | |
| "grad_norm": 0.40761323746825606, | |
| "learning_rate": 9.079963460719714e-07, | |
| "loss": 0.2606, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.2787096774193548, | |
| "grad_norm": 0.4102466153208199, | |
| "learning_rate": 9.059021971770828e-07, | |
| "loss": 0.2686, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.2807741935483871, | |
| "grad_norm": 0.42536876796556666, | |
| "learning_rate": 9.037869549683252e-07, | |
| "loss": 0.2619, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.28283870967741936, | |
| "grad_norm": 0.3915639897213394, | |
| "learning_rate": 9.016507293674187e-07, | |
| "loss": 0.27, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.2849032258064516, | |
| "grad_norm": 0.40399941997235145, | |
| "learning_rate": 8.994936313865171e-07, | |
| "loss": 0.2728, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.28696774193548386, | |
| "grad_norm": 0.39944877551284785, | |
| "learning_rate": 8.973157731224385e-07, | |
| "loss": 0.274, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.28903225806451616, | |
| "grad_norm": 0.4092856245914817, | |
| "learning_rate": 8.9511726775084e-07, | |
| "loss": 0.2681, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2910967741935484, | |
| "grad_norm": 0.481471630902478, | |
| "learning_rate": 8.928982295203367e-07, | |
| "loss": 0.2653, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.29316129032258065, | |
| "grad_norm": 0.4198523321425174, | |
| "learning_rate": 8.906587737465642e-07, | |
| "loss": 0.2683, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.2952258064516129, | |
| "grad_norm": 0.4293784170337724, | |
| "learning_rate": 8.883990168061863e-07, | |
| "loss": 0.2667, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.29729032258064514, | |
| "grad_norm": 0.409021082775393, | |
| "learning_rate": 8.861190761308472e-07, | |
| "loss": 0.2669, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.29935483870967744, | |
| "grad_norm": 0.43214579094992417, | |
| "learning_rate": 8.838190702010693e-07, | |
| "loss": 0.268, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3014193548387097, | |
| "grad_norm": 0.4323768086041837, | |
| "learning_rate": 8.814991185400951e-07, | |
| "loss": 0.2657, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.30348387096774193, | |
| "grad_norm": 0.4140826556545066, | |
| "learning_rate": 8.791593417076781e-07, | |
| "loss": 0.2616, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.3055483870967742, | |
| "grad_norm": 0.412829397246139, | |
| "learning_rate": 8.767998612938152e-07, | |
| "loss": 0.2679, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3076129032258065, | |
| "grad_norm": 0.42525391109992733, | |
| "learning_rate": 8.744207999124301e-07, | |
| "loss": 0.272, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.3096774193548387, | |
| "grad_norm": 0.41049108158888165, | |
| "learning_rate": 8.720222811950004e-07, | |
| "loss": 0.262, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.31174193548387097, | |
| "grad_norm": 0.40142790659401983, | |
| "learning_rate": 8.696044297841334e-07, | |
| "loss": 0.2637, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.3138064516129032, | |
| "grad_norm": 0.40612502217256863, | |
| "learning_rate": 8.671673713270886e-07, | |
| "loss": 0.2623, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.31587096774193546, | |
| "grad_norm": 0.4174546028220698, | |
| "learning_rate": 8.647112324692481e-07, | |
| "loss": 0.2648, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.31793548387096776, | |
| "grad_norm": 0.4075955189469561, | |
| "learning_rate": 8.622361408475361e-07, | |
| "loss": 0.2671, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.42757900653877373, | |
| "learning_rate": 8.597422250837848e-07, | |
| "loss": 0.2692, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.32206451612903225, | |
| "grad_norm": 0.4104754908146659, | |
| "learning_rate": 8.572296147780515e-07, | |
| "loss": 0.2627, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.3241290322580645, | |
| "grad_norm": 0.43397940214884867, | |
| "learning_rate": 8.546984405018834e-07, | |
| "loss": 0.2711, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.3261935483870968, | |
| "grad_norm": 0.43370385059956773, | |
| "learning_rate": 8.521488337915318e-07, | |
| "loss": 0.2731, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.32825806451612904, | |
| "grad_norm": 0.44403794562813276, | |
| "learning_rate": 8.49580927141117e-07, | |
| "loss": 0.2681, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.3303225806451613, | |
| "grad_norm": 0.40739732247633187, | |
| "learning_rate": 8.469948539957432e-07, | |
| "loss": 0.2704, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.33238709677419354, | |
| "grad_norm": 0.43367593462825726, | |
| "learning_rate": 8.443907487445634e-07, | |
| "loss": 0.2622, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.3344516129032258, | |
| "grad_norm": 0.40927201300965355, | |
| "learning_rate": 8.417687467137964e-07, | |
| "loss": 0.2682, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.3365161290322581, | |
| "grad_norm": 0.417183568246897, | |
| "learning_rate": 8.391289841596934e-07, | |
| "loss": 0.2623, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.33858064516129033, | |
| "grad_norm": 0.4099669038108638, | |
| "learning_rate": 8.364715982614577e-07, | |
| "loss": 0.2679, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.3406451612903226, | |
| "grad_norm": 0.40232008744229203, | |
| "learning_rate": 8.337967271141164e-07, | |
| "loss": 0.2686, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3427096774193548, | |
| "grad_norm": 0.39988507473787327, | |
| "learning_rate": 8.311045097213431e-07, | |
| "loss": 0.274, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.3447741935483871, | |
| "grad_norm": 0.4279758729822823, | |
| "learning_rate": 8.283950859882358e-07, | |
| "loss": 0.2744, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.34683870967741937, | |
| "grad_norm": 0.4113391707725698, | |
| "learning_rate": 8.256685967140449e-07, | |
| "loss": 0.2658, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.3489032258064516, | |
| "grad_norm": 0.41525159484662594, | |
| "learning_rate": 8.229251835848573e-07, | |
| "loss": 0.2655, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.35096774193548386, | |
| "grad_norm": 0.4107278132335921, | |
| "learning_rate": 8.201649891662334e-07, | |
| "loss": 0.269, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3530322580645161, | |
| "grad_norm": 0.43928129973031016, | |
| "learning_rate": 8.173881568957986e-07, | |
| "loss": 0.2654, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.3550967741935484, | |
| "grad_norm": 0.43005570640611046, | |
| "learning_rate": 8.145948310757886e-07, | |
| "loss": 0.2644, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.35716129032258065, | |
| "grad_norm": 0.41366298799946843, | |
| "learning_rate": 8.117851568655517e-07, | |
| "loss": 0.2664, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.3592258064516129, | |
| "grad_norm": 0.4107479182292912, | |
| "learning_rate": 8.089592802740039e-07, | |
| "loss": 0.2668, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.36129032258064514, | |
| "grad_norm": 0.4356758167861173, | |
| "learning_rate": 8.061173481520429e-07, | |
| "loss": 0.2636, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.36335483870967744, | |
| "grad_norm": 0.37618623784174804, | |
| "learning_rate": 8.032595081849154e-07, | |
| "loss": 0.2614, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.3654193548387097, | |
| "grad_norm": 0.41446607257101076, | |
| "learning_rate": 8.003859088845436e-07, | |
| "loss": 0.2642, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.36748387096774193, | |
| "grad_norm": 0.42523528400660005, | |
| "learning_rate": 7.974966995818066e-07, | |
| "loss": 0.2729, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.3695483870967742, | |
| "grad_norm": 0.3875366408745518, | |
| "learning_rate": 7.945920304187806e-07, | |
| "loss": 0.2667, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.3716129032258065, | |
| "grad_norm": 0.4057888088552019, | |
| "learning_rate": 7.916720523409366e-07, | |
| "loss": 0.2658, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3736774193548387, | |
| "grad_norm": 0.4246975366858289, | |
| "learning_rate": 7.887369170892964e-07, | |
| "loss": 0.2704, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.37574193548387097, | |
| "grad_norm": 0.405560542887072, | |
| "learning_rate": 7.857867771925468e-07, | |
| "loss": 0.2645, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.3778064516129032, | |
| "grad_norm": 0.3976055398193886, | |
| "learning_rate": 7.828217859591133e-07, | |
| "loss": 0.266, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.37987096774193546, | |
| "grad_norm": 0.4071477013255938, | |
| "learning_rate": 7.798420974691935e-07, | |
| "loss": 0.267, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.38193548387096776, | |
| "grad_norm": 0.42926938061270814, | |
| "learning_rate": 7.7684786656675e-07, | |
| "loss": 0.267, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.40068409678383476, | |
| "learning_rate": 7.738392488514627e-07, | |
| "loss": 0.2638, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.38606451612903225, | |
| "grad_norm": 0.41545866046107405, | |
| "learning_rate": 7.70816400670645e-07, | |
| "loss": 0.2648, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.3881290322580645, | |
| "grad_norm": 0.4023140436496999, | |
| "learning_rate": 7.677794791111168e-07, | |
| "loss": 0.2637, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.3901935483870968, | |
| "grad_norm": 0.4196388286712214, | |
| "learning_rate": 7.647286419910426e-07, | |
| "loss": 0.2647, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.39225806451612905, | |
| "grad_norm": 0.40124208669384503, | |
| "learning_rate": 7.616640478517293e-07, | |
| "loss": 0.2654, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3943225806451613, | |
| "grad_norm": 0.41126451275454146, | |
| "learning_rate": 7.585858559493885e-07, | |
| "loss": 0.2631, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.39638709677419354, | |
| "grad_norm": 0.41848510984577836, | |
| "learning_rate": 7.554942262468593e-07, | |
| "loss": 0.2682, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.3984516129032258, | |
| "grad_norm": 0.43454563702411014, | |
| "learning_rate": 7.523893194052966e-07, | |
| "loss": 0.2619, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.4005161290322581, | |
| "grad_norm": 0.43881054463213787, | |
| "learning_rate": 7.49271296775821e-07, | |
| "loss": 0.2664, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.40258064516129033, | |
| "grad_norm": 0.4027456476422296, | |
| "learning_rate": 7.461403203911355e-07, | |
| "loss": 0.2659, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4046451612903226, | |
| "grad_norm": 0.4130317888357524, | |
| "learning_rate": 7.429965529571036e-07, | |
| "loss": 0.261, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.4067096774193548, | |
| "grad_norm": 0.40488753602787214, | |
| "learning_rate": 7.398401578442952e-07, | |
| "loss": 0.263, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.4087741935483871, | |
| "grad_norm": 0.40658072770043274, | |
| "learning_rate": 7.366712990794961e-07, | |
| "loss": 0.2706, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.41083870967741937, | |
| "grad_norm": 0.4417207769552733, | |
| "learning_rate": 7.334901413371847e-07, | |
| "loss": 0.2695, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.4129032258064516, | |
| "grad_norm": 0.40600956756974876, | |
| "learning_rate": 7.302968499309737e-07, | |
| "loss": 0.265, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.41496774193548386, | |
| "grad_norm": 0.421884057260553, | |
| "learning_rate": 7.270915908050204e-07, | |
| "loss": 0.2631, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.4170322580645161, | |
| "grad_norm": 0.4060635608602976, | |
| "learning_rate": 7.238745305254019e-07, | |
| "loss": 0.2697, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.4190967741935484, | |
| "grad_norm": 0.44059203724336116, | |
| "learning_rate": 7.206458362714602e-07, | |
| "loss": 0.2565, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.42116129032258065, | |
| "grad_norm": 0.3968336903067748, | |
| "learning_rate": 7.17405675827114e-07, | |
| "loss": 0.2717, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.4232258064516129, | |
| "grad_norm": 0.41171391357598364, | |
| "learning_rate": 7.1415421757214e-07, | |
| "loss": 0.2645, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.42529032258064514, | |
| "grad_norm": 0.3876345602992099, | |
| "learning_rate": 7.10891630473422e-07, | |
| "loss": 0.2652, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.42735483870967744, | |
| "grad_norm": 0.39450051059803076, | |
| "learning_rate": 7.076180840761714e-07, | |
| "loss": 0.2632, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.4294193548387097, | |
| "grad_norm": 0.4358288881727055, | |
| "learning_rate": 7.043337484951154e-07, | |
| "loss": 0.2642, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.43148387096774193, | |
| "grad_norm": 0.41920035477123707, | |
| "learning_rate": 7.010387944056576e-07, | |
| "loss": 0.261, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.4335483870967742, | |
| "grad_norm": 0.4059103398768672, | |
| "learning_rate": 6.97733393035008e-07, | |
| "loss": 0.2628, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4356129032258064, | |
| "grad_norm": 0.3982345695440609, | |
| "learning_rate": 6.944177161532851e-07, | |
| "loss": 0.2597, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.4376774193548387, | |
| "grad_norm": 0.4144966185638575, | |
| "learning_rate": 6.910919360645902e-07, | |
| "loss": 0.27, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.43974193548387097, | |
| "grad_norm": 0.4233480513748313, | |
| "learning_rate": 6.877562255980519e-07, | |
| "loss": 0.2681, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.4418064516129032, | |
| "grad_norm": 0.40910710385731447, | |
| "learning_rate": 6.844107580988471e-07, | |
| "loss": 0.2662, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.44387096774193546, | |
| "grad_norm": 0.4069330452268534, | |
| "learning_rate": 6.810557074191899e-07, | |
| "loss": 0.2633, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.44593548387096776, | |
| "grad_norm": 0.4033815804757132, | |
| "learning_rate": 6.776912479093001e-07, | |
| "loss": 0.2606, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.41960546237510354, | |
| "learning_rate": 6.743175544083403e-07, | |
| "loss": 0.2623, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.45006451612903225, | |
| "grad_norm": 0.41168810646722503, | |
| "learning_rate": 6.709348022353318e-07, | |
| "loss": 0.264, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.4521290322580645, | |
| "grad_norm": 0.40385762651618623, | |
| "learning_rate": 6.675431671800436e-07, | |
| "loss": 0.2651, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.4541935483870968, | |
| "grad_norm": 0.4032346492996413, | |
| "learning_rate": 6.64142825493856e-07, | |
| "loss": 0.2632, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.45625806451612905, | |
| "grad_norm": 0.4053652025972668, | |
| "learning_rate": 6.607339538806034e-07, | |
| "loss": 0.2608, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.4583225806451613, | |
| "grad_norm": 0.41770642881401504, | |
| "learning_rate": 6.573167294873898e-07, | |
| "loss": 0.2558, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.46038709677419354, | |
| "grad_norm": 0.4112896752142016, | |
| "learning_rate": 6.538913298953845e-07, | |
| "loss": 0.2646, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.4624516129032258, | |
| "grad_norm": 0.41305858313874766, | |
| "learning_rate": 6.504579331105928e-07, | |
| "loss": 0.2661, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.4645161290322581, | |
| "grad_norm": 0.3990060429263245, | |
| "learning_rate": 6.470167175546063e-07, | |
| "loss": 0.2552, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.46658064516129033, | |
| "grad_norm": 0.4068327568812493, | |
| "learning_rate": 6.435678620553306e-07, | |
| "loss": 0.2625, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.4686451612903226, | |
| "grad_norm": 0.4122018568807427, | |
| "learning_rate": 6.401115458376924e-07, | |
| "loss": 0.2561, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.4707096774193548, | |
| "grad_norm": 0.4075890652073717, | |
| "learning_rate": 6.366479485143257e-07, | |
| "loss": 0.2655, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.4727741935483871, | |
| "grad_norm": 0.4041531603835199, | |
| "learning_rate": 6.331772500762382e-07, | |
| "loss": 0.2608, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.47483870967741937, | |
| "grad_norm": 0.401347899960604, | |
| "learning_rate": 6.296996308834575e-07, | |
| "loss": 0.2626, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4769032258064516, | |
| "grad_norm": 0.4046833913093797, | |
| "learning_rate": 6.262152716556586e-07, | |
| "loss": 0.2645, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.47896774193548386, | |
| "grad_norm": 0.4112501171173604, | |
| "learning_rate": 6.227243534627724e-07, | |
| "loss": 0.2627, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.4810322580645161, | |
| "grad_norm": 0.4119800419244549, | |
| "learning_rate": 6.192270577155764e-07, | |
| "loss": 0.2666, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.4830967741935484, | |
| "grad_norm": 0.4067394550798155, | |
| "learning_rate": 6.157235661562672e-07, | |
| "loss": 0.2609, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.48516129032258065, | |
| "grad_norm": 0.4161737935572665, | |
| "learning_rate": 6.122140608490157e-07, | |
| "loss": 0.2631, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.4872258064516129, | |
| "grad_norm": 0.41837478258205407, | |
| "learning_rate": 6.086987241705066e-07, | |
| "loss": 0.2613, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.48929032258064514, | |
| "grad_norm": 0.37786501702152653, | |
| "learning_rate": 6.051777388004603e-07, | |
| "loss": 0.2659, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.49135483870967744, | |
| "grad_norm": 0.4026726334273472, | |
| "learning_rate": 6.0165128771214e-07, | |
| "loss": 0.2668, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.4934193548387097, | |
| "grad_norm": 0.43109902087561547, | |
| "learning_rate": 5.981195541628431e-07, | |
| "loss": 0.2665, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.49548387096774194, | |
| "grad_norm": 0.40800799018925554, | |
| "learning_rate": 5.945827216843779e-07, | |
| "loss": 0.2637, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4975483870967742, | |
| "grad_norm": 0.40953889079215977, | |
| "learning_rate": 5.910409740735263e-07, | |
| "loss": 0.267, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.4996129032258064, | |
| "grad_norm": 0.40899543891195417, | |
| "learning_rate": 5.874944953824919e-07, | |
| "loss": 0.2583, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.5016774193548387, | |
| "grad_norm": 0.4251510009606146, | |
| "learning_rate": 5.83943469909337e-07, | |
| "loss": 0.268, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.5037419354838709, | |
| "grad_norm": 0.4058826730393694, | |
| "learning_rate": 5.803880821884032e-07, | |
| "loss": 0.2579, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.5058064516129033, | |
| "grad_norm": 0.4240346499686985, | |
| "learning_rate": 5.768285169807233e-07, | |
| "loss": 0.2618, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.5078709677419355, | |
| "grad_norm": 0.4054651824039119, | |
| "learning_rate": 5.73264959264419e-07, | |
| "loss": 0.2587, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.5099354838709678, | |
| "grad_norm": 0.4264297129067109, | |
| "learning_rate": 5.696975942250896e-07, | |
| "loss": 0.2608, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.38763232793736896, | |
| "learning_rate": 5.661266072461866e-07, | |
| "loss": 0.2624, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.5140645161290323, | |
| "grad_norm": 0.39972658320821614, | |
| "learning_rate": 5.625521838993814e-07, | |
| "loss": 0.2628, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.5161290322580645, | |
| "grad_norm": 0.4094650932180596, | |
| "learning_rate": 5.589745099349219e-07, | |
| "loss": 0.2654, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5181935483870967, | |
| "grad_norm": 0.40303191617270684, | |
| "learning_rate": 5.55393771271978e-07, | |
| "loss": 0.2626, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.520258064516129, | |
| "grad_norm": 0.4432634297156684, | |
| "learning_rate": 5.518101539889828e-07, | |
| "loss": 0.2631, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.5223225806451612, | |
| "grad_norm": 0.4019002372120617, | |
| "learning_rate": 5.482238443139597e-07, | |
| "loss": 0.2641, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.5243870967741936, | |
| "grad_norm": 0.3922554382852571, | |
| "learning_rate": 5.446350286148471e-07, | |
| "loss": 0.2588, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.5264516129032258, | |
| "grad_norm": 0.41838300381220644, | |
| "learning_rate": 5.410438933898127e-07, | |
| "loss": 0.2603, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5285161290322581, | |
| "grad_norm": 0.4056835013387469, | |
| "learning_rate": 5.374506252575612e-07, | |
| "loss": 0.2655, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.5305806451612903, | |
| "grad_norm": 0.40888463409102277, | |
| "learning_rate": 5.338554109476379e-07, | |
| "loss": 0.2661, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.5326451612903226, | |
| "grad_norm": 0.4221212778601083, | |
| "learning_rate": 5.302584372907236e-07, | |
| "loss": 0.2643, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.5347096774193548, | |
| "grad_norm": 0.40948851318864904, | |
| "learning_rate": 5.266598912089259e-07, | |
| "loss": 0.2646, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.5367741935483871, | |
| "grad_norm": 0.3945682130480978, | |
| "learning_rate": 5.230599597060666e-07, | |
| "loss": 0.2622, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5388387096774193, | |
| "grad_norm": 0.3959558379054268, | |
| "learning_rate": 5.194588298579623e-07, | |
| "loss": 0.2631, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.5409032258064516, | |
| "grad_norm": 0.4081097575523178, | |
| "learning_rate": 5.158566888027038e-07, | |
| "loss": 0.2596, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.5429677419354839, | |
| "grad_norm": 0.39268286624553117, | |
| "learning_rate": 5.122537237309305e-07, | |
| "loss": 0.2604, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.5450322580645162, | |
| "grad_norm": 0.39342604284402627, | |
| "learning_rate": 5.086501218761033e-07, | |
| "loss": 0.2586, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.5470967741935484, | |
| "grad_norm": 0.40285421355756235, | |
| "learning_rate": 5.050460705047742e-07, | |
| "loss": 0.2572, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5491612903225807, | |
| "grad_norm": 0.4020063207626797, | |
| "learning_rate": 5.014417569068554e-07, | |
| "loss": 0.2643, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.5512258064516129, | |
| "grad_norm": 0.42131953987445797, | |
| "learning_rate": 4.978373683858858e-07, | |
| "loss": 0.2643, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.5532903225806451, | |
| "grad_norm": 0.3945931863557437, | |
| "learning_rate": 4.942330922492975e-07, | |
| "loss": 0.2645, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.5553548387096774, | |
| "grad_norm": 0.40345218634673974, | |
| "learning_rate": 4.906291157986834e-07, | |
| "loss": 0.2629, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.5574193548387096, | |
| "grad_norm": 0.4053650394538655, | |
| "learning_rate": 4.870256263200616e-07, | |
| "loss": 0.2596, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5594838709677419, | |
| "grad_norm": 0.38082232410527134, | |
| "learning_rate": 4.834228110741447e-07, | |
| "loss": 0.2686, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.5615483870967742, | |
| "grad_norm": 0.4244897702153668, | |
| "learning_rate": 4.798208572866074e-07, | |
| "loss": 0.2654, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.5636129032258065, | |
| "grad_norm": 0.4168979030949182, | |
| "learning_rate": 4.762199521383575e-07, | |
| "loss": 0.2656, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.5656774193548387, | |
| "grad_norm": 0.4008885860052325, | |
| "learning_rate": 4.72620282755809e-07, | |
| "loss": 0.2641, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.567741935483871, | |
| "grad_norm": 0.4133977944936882, | |
| "learning_rate": 4.690220362011573e-07, | |
| "loss": 0.2624, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5698064516129032, | |
| "grad_norm": 0.39400301829128825, | |
| "learning_rate": 4.6542539946265823e-07, | |
| "loss": 0.2598, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.5718709677419355, | |
| "grad_norm": 0.40929967482891283, | |
| "learning_rate": 4.618305594449113e-07, | |
| "loss": 0.263, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.5739354838709677, | |
| "grad_norm": 0.4056920391576304, | |
| "learning_rate": 4.5823770295914706e-07, | |
| "loss": 0.2595, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.4107265334626862, | |
| "learning_rate": 4.5464701671351815e-07, | |
| "loss": 0.2647, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.5780645161290323, | |
| "grad_norm": 0.3913927583282506, | |
| "learning_rate": 4.5105868730339785e-07, | |
| "loss": 0.2597, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5801290322580646, | |
| "grad_norm": 0.41022478158711445, | |
| "learning_rate": 4.4747290120168316e-07, | |
| "loss": 0.2614, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.5821935483870968, | |
| "grad_norm": 0.4048943417693181, | |
| "learning_rate": 4.438898447491036e-07, | |
| "loss": 0.2623, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.584258064516129, | |
| "grad_norm": 0.41841621193034295, | |
| "learning_rate": 4.403097041445395e-07, | |
| "loss": 0.2626, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.5863225806451613, | |
| "grad_norm": 0.41332586169270713, | |
| "learning_rate": 4.367326654353436e-07, | |
| "loss": 0.2619, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.5883870967741935, | |
| "grad_norm": 0.42980292999632475, | |
| "learning_rate": 4.331589145076746e-07, | |
| "loss": 0.2662, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5904516129032258, | |
| "grad_norm": 0.4085925990454811, | |
| "learning_rate": 4.295886370768367e-07, | |
| "loss": 0.2661, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.592516129032258, | |
| "grad_norm": 0.3938359040261475, | |
| "learning_rate": 4.2602201867762875e-07, | |
| "loss": 0.2549, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.5945806451612903, | |
| "grad_norm": 0.4009963393291789, | |
| "learning_rate": 4.224592446547024e-07, | |
| "loss": 0.2584, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.5966451612903226, | |
| "grad_norm": 0.40377828376876823, | |
| "learning_rate": 4.1890050015293036e-07, | |
| "loss": 0.2642, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.5987096774193549, | |
| "grad_norm": 0.4084739109095333, | |
| "learning_rate": 4.153459701077856e-07, | |
| "loss": 0.2637, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6007741935483871, | |
| "grad_norm": 0.4355917374420026, | |
| "learning_rate": 4.117958392357303e-07, | |
| "loss": 0.267, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.6028387096774194, | |
| "grad_norm": 0.4111357071539257, | |
| "learning_rate": 4.082502920246173e-07, | |
| "loss": 0.2571, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.6049032258064516, | |
| "grad_norm": 0.4300310059373518, | |
| "learning_rate": 4.047095127241026e-07, | |
| "loss": 0.2663, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.6069677419354839, | |
| "grad_norm": 0.41504350370170773, | |
| "learning_rate": 4.011736853360702e-07, | |
| "loss": 0.2668, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.6090322580645161, | |
| "grad_norm": 0.39347081849396953, | |
| "learning_rate": 3.976429936050709e-07, | |
| "loss": 0.262, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.6110967741935484, | |
| "grad_norm": 0.4001140349315572, | |
| "learning_rate": 3.941176210087737e-07, | |
| "loss": 0.2607, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.6131612903225806, | |
| "grad_norm": 0.41999186019603213, | |
| "learning_rate": 3.905977507484304e-07, | |
| "loss": 0.2631, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.615225806451613, | |
| "grad_norm": 0.40433153562642776, | |
| "learning_rate": 3.870835657393558e-07, | |
| "loss": 0.2674, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.6172903225806452, | |
| "grad_norm": 0.41600570415494387, | |
| "learning_rate": 3.8357524860142205e-07, | |
| "loss": 0.2633, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.6193548387096774, | |
| "grad_norm": 0.39498755788207757, | |
| "learning_rate": 3.8007298164956866e-07, | |
| "loss": 0.2677, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6214193548387097, | |
| "grad_norm": 0.4222886993315553, | |
| "learning_rate": 3.765769468843284e-07, | |
| "loss": 0.2513, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.6234838709677419, | |
| "grad_norm": 0.3886939892314166, | |
| "learning_rate": 3.7308732598236896e-07, | |
| "loss": 0.2589, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.6255483870967742, | |
| "grad_norm": 0.41019034012665107, | |
| "learning_rate": 3.696043002870521e-07, | |
| "loss": 0.2602, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.6276129032258064, | |
| "grad_norm": 0.41109492024152083, | |
| "learning_rate": 3.6612805079900954e-07, | |
| "loss": 0.2615, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.6296774193548387, | |
| "grad_norm": 0.40387895619985564, | |
| "learning_rate": 3.6265875816673784e-07, | |
| "loss": 0.2623, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.6317419354838709, | |
| "grad_norm": 0.41900812828527817, | |
| "learning_rate": 3.591966026772096e-07, | |
| "loss": 0.2588, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.6338064516129033, | |
| "grad_norm": 0.40787156582167655, | |
| "learning_rate": 3.557417642465055e-07, | |
| "loss": 0.2639, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.6358709677419355, | |
| "grad_norm": 0.3898972418936542, | |
| "learning_rate": 3.522944224104643e-07, | |
| "loss": 0.2649, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.6379354838709678, | |
| "grad_norm": 0.4141718763979276, | |
| "learning_rate": 3.4885475631535253e-07, | |
| "loss": 0.2692, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.41506013689499804, | |
| "learning_rate": 3.4542294470855626e-07, | |
| "loss": 0.2602, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6420645161290323, | |
| "grad_norm": 0.39476928102019726, | |
| "learning_rate": 3.4199916592929055e-07, | |
| "loss": 0.261, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.6441290322580645, | |
| "grad_norm": 0.416429380761246, | |
| "learning_rate": 3.3858359789933313e-07, | |
| "loss": 0.2627, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.6461935483870968, | |
| "grad_norm": 0.40310812066816015, | |
| "learning_rate": 3.351764181137771e-07, | |
| "loss": 0.2636, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.648258064516129, | |
| "grad_norm": 0.3946905030128788, | |
| "learning_rate": 3.3177780363180923e-07, | |
| "loss": 0.2622, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.6503225806451612, | |
| "grad_norm": 0.42397756082438576, | |
| "learning_rate": 3.2838793106750625e-07, | |
| "loss": 0.2603, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6523870967741936, | |
| "grad_norm": 0.4169594318618278, | |
| "learning_rate": 3.250069765806586e-07, | |
| "loss": 0.2613, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.6544516129032258, | |
| "grad_norm": 0.41756086123454705, | |
| "learning_rate": 3.2163511586761566e-07, | |
| "loss": 0.2617, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.6565161290322581, | |
| "grad_norm": 0.40493296338507545, | |
| "learning_rate": 3.1827252415215454e-07, | |
| "loss": 0.2656, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.6585806451612903, | |
| "grad_norm": 0.4086571674620378, | |
| "learning_rate": 3.149193761763764e-07, | |
| "loss": 0.2611, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.6606451612903226, | |
| "grad_norm": 0.4171128006614707, | |
| "learning_rate": 3.115758461916229e-07, | |
| "loss": 0.2597, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6627096774193548, | |
| "grad_norm": 0.3880167728055982, | |
| "learning_rate": 3.0824210794942333e-07, | |
| "loss": 0.263, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.6647741935483871, | |
| "grad_norm": 0.40448031317959615, | |
| "learning_rate": 3.049183346924643e-07, | |
| "loss": 0.2632, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.6668387096774193, | |
| "grad_norm": 0.41914072800271474, | |
| "learning_rate": 3.016046991455865e-07, | |
| "loss": 0.2579, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.6689032258064516, | |
| "grad_norm": 0.3860018527353455, | |
| "learning_rate": 2.983013735068105e-07, | |
| "loss": 0.2647, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.6709677419354839, | |
| "grad_norm": 0.40975610194833284, | |
| "learning_rate": 2.9500852943838603e-07, | |
| "loss": 0.2573, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6730322580645162, | |
| "grad_norm": 0.42646644618083696, | |
| "learning_rate": 2.9172633805787293e-07, | |
| "loss": 0.2653, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.6750967741935484, | |
| "grad_norm": 0.4086698532968832, | |
| "learning_rate": 2.88454969929248e-07, | |
| "loss": 0.2574, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.6771612903225807, | |
| "grad_norm": 0.4178375142437365, | |
| "learning_rate": 2.851945950540419e-07, | |
| "loss": 0.2623, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.6792258064516129, | |
| "grad_norm": 0.4057288769351614, | |
| "learning_rate": 2.819453828625038e-07, | |
| "loss": 0.2651, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.6812903225806451, | |
| "grad_norm": 0.4158815157531405, | |
| "learning_rate": 2.787075022047981e-07, | |
| "loss": 0.263, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6833548387096774, | |
| "grad_norm": 0.4049970316693273, | |
| "learning_rate": 2.754811213422288e-07, | |
| "loss": 0.2645, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.6854193548387096, | |
| "grad_norm": 0.39671368245623034, | |
| "learning_rate": 2.722664079384954e-07, | |
| "loss": 0.2589, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.6874838709677419, | |
| "grad_norm": 0.4039120828242038, | |
| "learning_rate": 2.6906352905098116e-07, | |
| "loss": 0.2614, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.6895483870967742, | |
| "grad_norm": 0.4170702189116275, | |
| "learning_rate": 2.658726511220712e-07, | |
| "loss": 0.2657, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.6916129032258065, | |
| "grad_norm": 0.4237911525616141, | |
| "learning_rate": 2.6269393997050205e-07, | |
| "loss": 0.2618, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6936774193548387, | |
| "grad_norm": 0.4111737496158447, | |
| "learning_rate": 2.595275607827457e-07, | |
| "loss": 0.2606, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.695741935483871, | |
| "grad_norm": 0.38990938914606454, | |
| "learning_rate": 2.563736781044262e-07, | |
| "loss": 0.2577, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.6978064516129032, | |
| "grad_norm": 0.407914899989363, | |
| "learning_rate": 2.5323245583176667e-07, | |
| "loss": 0.2609, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.6998709677419355, | |
| "grad_norm": 0.39451690942708684, | |
| "learning_rate": 2.5010405720307355e-07, | |
| "loss": 0.2595, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.7019354838709677, | |
| "grad_norm": 0.4092342022576978, | |
| "learning_rate": 2.469886447902541e-07, | |
| "loss": 0.2627, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.39988371342369805, | |
| "learning_rate": 2.438863804903666e-07, | |
| "loss": 0.262, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.7060645161290322, | |
| "grad_norm": 0.3964335866793338, | |
| "learning_rate": 2.4079742551720887e-07, | |
| "loss": 0.2592, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.7081290322580646, | |
| "grad_norm": 0.4315689515435995, | |
| "learning_rate": 2.3772194039293896e-07, | |
| "loss": 0.2629, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.7101935483870968, | |
| "grad_norm": 0.4070533032882783, | |
| "learning_rate": 2.3466008493973477e-07, | |
| "loss": 0.2634, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.712258064516129, | |
| "grad_norm": 0.4139707970034922, | |
| "learning_rate": 2.3161201827148725e-07, | |
| "loss": 0.2668, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.7143225806451613, | |
| "grad_norm": 0.4080257693965919, | |
| "learning_rate": 2.2857789878553309e-07, | |
| "loss": 0.2639, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.7163870967741935, | |
| "grad_norm": 0.38944573511805763, | |
| "learning_rate": 2.2555788415442288e-07, | |
| "loss": 0.2591, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.7184516129032258, | |
| "grad_norm": 0.40451831206378125, | |
| "learning_rate": 2.22552131317727e-07, | |
| "loss": 0.2608, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.720516129032258, | |
| "grad_norm": 0.40945571442698814, | |
| "learning_rate": 2.1956079647388025e-07, | |
| "loss": 0.254, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.7225806451612903, | |
| "grad_norm": 0.4120262445632641, | |
| "learning_rate": 2.165840350720655e-07, | |
| "loss": 0.2634, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7246451612903225, | |
| "grad_norm": 0.38102966990936765, | |
| "learning_rate": 2.1362200180413481e-07, | |
| "loss": 0.2612, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.7267096774193549, | |
| "grad_norm": 0.40448293811396235, | |
| "learning_rate": 2.1067485059657032e-07, | |
| "loss": 0.2654, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.7287741935483871, | |
| "grad_norm": 0.42379282553088654, | |
| "learning_rate": 2.0774273460248577e-07, | |
| "loss": 0.266, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.7308387096774194, | |
| "grad_norm": 0.3958635594454363, | |
| "learning_rate": 2.0482580619366796e-07, | |
| "loss": 0.2599, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.7329032258064516, | |
| "grad_norm": 0.3929275138734596, | |
| "learning_rate": 2.019242169526581e-07, | |
| "loss": 0.2614, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.7349677419354839, | |
| "grad_norm": 0.4099908819814468, | |
| "learning_rate": 1.9903811766487426e-07, | |
| "loss": 0.2595, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.7370322580645161, | |
| "grad_norm": 0.4136881756772272, | |
| "learning_rate": 1.9616765831077603e-07, | |
| "loss": 0.268, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.7390967741935484, | |
| "grad_norm": 0.4103952583965791, | |
| "learning_rate": 1.9331298805807095e-07, | |
| "loss": 0.2667, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.7411612903225806, | |
| "grad_norm": 0.4043875841263039, | |
| "learning_rate": 1.9047425525396161e-07, | |
| "loss": 0.2591, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.743225806451613, | |
| "grad_norm": 0.40432519757481955, | |
| "learning_rate": 1.876516074174379e-07, | |
| "loss": 0.2583, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7452903225806452, | |
| "grad_norm": 0.3923980263069265, | |
| "learning_rate": 1.848451912316103e-07, | |
| "loss": 0.2622, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.7473548387096774, | |
| "grad_norm": 0.41193174262995386, | |
| "learning_rate": 1.8205515253608688e-07, | |
| "loss": 0.2591, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.7494193548387097, | |
| "grad_norm": 0.3997627378786646, | |
| "learning_rate": 1.792816363193952e-07, | |
| "loss": 0.2632, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.7514838709677419, | |
| "grad_norm": 0.4203820131288817, | |
| "learning_rate": 1.7652478671144755e-07, | |
| "loss": 0.2603, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.7535483870967742, | |
| "grad_norm": 0.41133420796621517, | |
| "learning_rate": 1.7378474697605128e-07, | |
| "loss": 0.2642, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.7556129032258064, | |
| "grad_norm": 0.4037148313043383, | |
| "learning_rate": 1.7106165950346318e-07, | |
| "loss": 0.2656, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.7576774193548387, | |
| "grad_norm": 0.4010420205215928, | |
| "learning_rate": 1.683556658029903e-07, | |
| "loss": 0.2601, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.7597419354838709, | |
| "grad_norm": 0.4278976559359214, | |
| "learning_rate": 1.656669064956368e-07, | |
| "loss": 0.2586, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.7618064516129033, | |
| "grad_norm": 0.41318596694086984, | |
| "learning_rate": 1.6299552130679578e-07, | |
| "loss": 0.2652, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.7638709677419355, | |
| "grad_norm": 0.41756399746678385, | |
| "learning_rate": 1.6034164905898768e-07, | |
| "loss": 0.2634, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7659354838709678, | |
| "grad_norm": 0.45439314697770533, | |
| "learning_rate": 1.5770542766464773e-07, | |
| "loss": 0.2616, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.3959443888976775, | |
| "learning_rate": 1.5508699411895738e-07, | |
| "loss": 0.2668, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.7700645161290323, | |
| "grad_norm": 0.39850312610937977, | |
| "learning_rate": 1.524864844927266e-07, | |
| "loss": 0.2646, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.7721290322580645, | |
| "grad_norm": 0.41376874719752166, | |
| "learning_rate": 1.4990403392532159e-07, | |
| "loss": 0.2577, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.7741935483870968, | |
| "grad_norm": 0.4063544496998586, | |
| "learning_rate": 1.473397766176431e-07, | |
| "loss": 0.2649, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.776258064516129, | |
| "grad_norm": 0.4353014336434687, | |
| "learning_rate": 1.4479384582515153e-07, | |
| "loss": 0.2643, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.7783225806451612, | |
| "grad_norm": 0.4018175153121587, | |
| "learning_rate": 1.4226637385094247e-07, | |
| "loss": 0.2618, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.7803870967741936, | |
| "grad_norm": 0.42157127333864497, | |
| "learning_rate": 1.3975749203887228e-07, | |
| "loss": 0.2651, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.7824516129032258, | |
| "grad_norm": 0.43186635599469986, | |
| "learning_rate": 1.3726733076673085e-07, | |
| "loss": 0.2633, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.7845161290322581, | |
| "grad_norm": 0.3964540005139217, | |
| "learning_rate": 1.3479601943946761e-07, | |
| "loss": 0.2566, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7865806451612903, | |
| "grad_norm": 0.44845859907363916, | |
| "learning_rate": 1.323436864824664e-07, | |
| "loss": 0.2635, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.7886451612903226, | |
| "grad_norm": 0.39439621730318203, | |
| "learning_rate": 1.299104593348721e-07, | |
| "loss": 0.267, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.7907096774193548, | |
| "grad_norm": 0.4258443910811124, | |
| "learning_rate": 1.2749646444296703e-07, | |
| "loss": 0.2629, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.7927741935483871, | |
| "grad_norm": 0.4061749201396272, | |
| "learning_rate": 1.2510182725360086e-07, | |
| "loss": 0.256, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.7948387096774193, | |
| "grad_norm": 0.4053363945100216, | |
| "learning_rate": 1.2272667220767158e-07, | |
| "loss": 0.267, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7969032258064516, | |
| "grad_norm": 0.39724264268663745, | |
| "learning_rate": 1.2037112273365818e-07, | |
| "loss": 0.2601, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.7989677419354839, | |
| "grad_norm": 0.40962689626618026, | |
| "learning_rate": 1.1803530124120714e-07, | |
| "loss": 0.2614, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.8010322580645162, | |
| "grad_norm": 0.39171805970284057, | |
| "learning_rate": 1.157193291147705e-07, | |
| "loss": 0.2604, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.8030967741935484, | |
| "grad_norm": 0.38720737774209957, | |
| "learning_rate": 1.1342332670729882e-07, | |
| "loss": 0.258, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.8051612903225807, | |
| "grad_norm": 0.398012624254619, | |
| "learning_rate": 1.1114741333398592e-07, | |
| "loss": 0.2621, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8072258064516129, | |
| "grad_norm": 0.40289439621605116, | |
| "learning_rate": 1.0889170726606933e-07, | |
| "loss": 0.2586, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.8092903225806451, | |
| "grad_norm": 0.4232682123948614, | |
| "learning_rate": 1.066563257246838e-07, | |
| "loss": 0.2569, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.8113548387096774, | |
| "grad_norm": 0.4028783048716607, | |
| "learning_rate": 1.0444138487476944e-07, | |
| "loss": 0.2609, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.8134193548387096, | |
| "grad_norm": 0.4002686348680681, | |
| "learning_rate": 1.0224699981903517e-07, | |
| "loss": 0.2654, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.8154838709677419, | |
| "grad_norm": 0.39543469139493387, | |
| "learning_rate": 1.0007328459197778e-07, | |
| "loss": 0.2628, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.8175483870967742, | |
| "grad_norm": 0.4157630096664263, | |
| "learning_rate": 9.792035215395556e-08, | |
| "loss": 0.263, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.8196129032258065, | |
| "grad_norm": 0.41685800951642293, | |
| "learning_rate": 9.578831438531776e-08, | |
| "loss": 0.2669, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.8216774193548387, | |
| "grad_norm": 0.4061936200965336, | |
| "learning_rate": 9.3677282080591e-08, | |
| "loss": 0.2622, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.823741935483871, | |
| "grad_norm": 0.40925273863210293, | |
| "learning_rate": 9.158736494272179e-08, | |
| "loss": 0.2612, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.8258064516129032, | |
| "grad_norm": 0.40618534177947946, | |
| "learning_rate": 8.951867157737558e-08, | |
| "loss": 0.2682, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8278709677419355, | |
| "grad_norm": 0.3979706890222173, | |
| "learning_rate": 8.747130948729226e-08, | |
| "loss": 0.2596, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.8299354838709677, | |
| "grad_norm": 0.4071872432393417, | |
| "learning_rate": 8.544538506670074e-08, | |
| "loss": 0.2587, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.40836170061759924, | |
| "learning_rate": 8.344100359578904e-08, | |
| "loss": 0.2588, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.8340645161290322, | |
| "grad_norm": 0.3895000386110786, | |
| "learning_rate": 8.145826923523358e-08, | |
| "loss": 0.2547, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.8361290322580646, | |
| "grad_norm": 0.40154696748699115, | |
| "learning_rate": 7.949728502078668e-08, | |
| "loss": 0.2584, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8381935483870968, | |
| "grad_norm": 0.4376818236899298, | |
| "learning_rate": 7.755815285792172e-08, | |
| "loss": 0.2688, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.840258064516129, | |
| "grad_norm": 0.4046160460755729, | |
| "learning_rate": 7.564097351653742e-08, | |
| "loss": 0.2705, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.8423225806451613, | |
| "grad_norm": 0.39547101151078623, | |
| "learning_rate": 7.374584662572142e-08, | |
| "loss": 0.2659, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.8443870967741935, | |
| "grad_norm": 0.37332809148900736, | |
| "learning_rate": 7.187287066857289e-08, | |
| "loss": 0.2574, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.8464516129032258, | |
| "grad_norm": 0.46033640896289585, | |
| "learning_rate": 7.002214297708481e-08, | |
| "loss": 0.2569, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.848516129032258, | |
| "grad_norm": 0.39191927118713255, | |
| "learning_rate": 6.819375972708536e-08, | |
| "loss": 0.258, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.8505806451612903, | |
| "grad_norm": 0.41428077941004704, | |
| "learning_rate": 6.6387815933241e-08, | |
| "loss": 0.2618, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.8526451612903225, | |
| "grad_norm": 0.4107387220224137, | |
| "learning_rate": 6.460440544411777e-08, | |
| "loss": 0.2622, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.8547096774193549, | |
| "grad_norm": 0.39642375544117775, | |
| "learning_rate": 6.284362093730545e-08, | |
| "loss": 0.2585, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.8567741935483871, | |
| "grad_norm": 0.37930337406912923, | |
| "learning_rate": 6.110555391460026e-08, | |
| "loss": 0.2617, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.8588387096774194, | |
| "grad_norm": 0.377232478401539, | |
| "learning_rate": 5.9390294697251045e-08, | |
| "loss": 0.2583, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.8609032258064516, | |
| "grad_norm": 0.3888835569603603, | |
| "learning_rate": 5.7697932421264415e-08, | |
| "loss": 0.2529, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.8629677419354839, | |
| "grad_norm": 0.3934785671025381, | |
| "learning_rate": 5.602855503277376e-08, | |
| "loss": 0.2593, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.8650322580645161, | |
| "grad_norm": 0.38987380628908475, | |
| "learning_rate": 5.438224928346791e-08, | |
| "loss": 0.2611, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.8670967741935484, | |
| "grad_norm": 0.40191458902063176, | |
| "learning_rate": 5.275910072608408e-08, | |
| "loss": 0.2648, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8691612903225806, | |
| "grad_norm": 0.40555133430607165, | |
| "learning_rate": 5.115919370996097e-08, | |
| "loss": 0.2646, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.8712258064516128, | |
| "grad_norm": 0.39189346564182975, | |
| "learning_rate": 4.9582611376655924e-08, | |
| "loss": 0.2697, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.8732903225806452, | |
| "grad_norm": 0.3977403739928427, | |
| "learning_rate": 4.8029435655624785e-08, | |
| "loss": 0.256, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.8753548387096775, | |
| "grad_norm": 0.4107418168891212, | |
| "learning_rate": 4.6499747259963254e-08, | |
| "loss": 0.2618, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.8774193548387097, | |
| "grad_norm": 0.4152156746116232, | |
| "learning_rate": 4.499362568221327e-08, | |
| "loss": 0.2562, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8794838709677419, | |
| "grad_norm": 0.41269224554676137, | |
| "learning_rate": 4.351114919023197e-08, | |
| "loss": 0.2638, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.8815483870967742, | |
| "grad_norm": 0.39019715520848103, | |
| "learning_rate": 4.205239482312445e-08, | |
| "loss": 0.2608, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.8836129032258064, | |
| "grad_norm": 0.39460366251134543, | |
| "learning_rate": 4.0617438387239746e-08, | |
| "loss": 0.2644, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.8856774193548387, | |
| "grad_norm": 0.38841049201774, | |
| "learning_rate": 3.9206354452232135e-08, | |
| "loss": 0.2586, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.8877419354838709, | |
| "grad_norm": 0.3995452257322332, | |
| "learning_rate": 3.7819216347185715e-08, | |
| "loss": 0.2632, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8898064516129032, | |
| "grad_norm": 0.3921492973317569, | |
| "learning_rate": 3.6456096156803616e-08, | |
| "loss": 0.2591, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.8918709677419355, | |
| "grad_norm": 0.37963166184003594, | |
| "learning_rate": 3.5117064717662406e-08, | |
| "loss": 0.2574, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.8939354838709678, | |
| "grad_norm": 0.4129813226681988, | |
| "learning_rate": 3.380219161453063e-08, | |
| "loss": 0.2662, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.40974368918108645, | |
| "learning_rate": 3.251154517675264e-08, | |
| "loss": 0.2604, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.8980645161290323, | |
| "grad_norm": 0.4245296466579207, | |
| "learning_rate": 3.124519247469814e-08, | |
| "loss": 0.2607, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.9001290322580645, | |
| "grad_norm": 0.42346220490081854, | |
| "learning_rate": 3.0003199316276486e-08, | |
| "loss": 0.2568, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.9021935483870968, | |
| "grad_norm": 0.384390730131664, | |
| "learning_rate": 2.8785630243517156e-08, | |
| "loss": 0.2554, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.904258064516129, | |
| "grad_norm": 0.4161617174420004, | |
| "learning_rate": 2.759254852921522e-08, | |
| "loss": 0.2611, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.9063225806451612, | |
| "grad_norm": 0.4096140142802804, | |
| "learning_rate": 2.6424016173643816e-08, | |
| "loss": 0.2613, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.9083870967741936, | |
| "grad_norm": 0.4036134224250512, | |
| "learning_rate": 2.5280093901331957e-08, | |
| "loss": 0.2564, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.9104516129032258, | |
| "grad_norm": 0.3951213596923248, | |
| "learning_rate": 2.4160841157908894e-08, | |
| "loss": 0.2626, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.9125161290322581, | |
| "grad_norm": 0.4255511159427451, | |
| "learning_rate": 2.3066316107014984e-08, | |
| "loss": 0.267, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.9145806451612903, | |
| "grad_norm": 0.39941397499664477, | |
| "learning_rate": 2.199657562727919e-08, | |
| "loss": 0.2644, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.9166451612903226, | |
| "grad_norm": 0.41202712449315076, | |
| "learning_rate": 2.0951675309363038e-08, | |
| "loss": 0.2662, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.9187096774193548, | |
| "grad_norm": 0.4141354253620865, | |
| "learning_rate": 1.9931669453072064e-08, | |
| "loss": 0.2682, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.9207741935483871, | |
| "grad_norm": 0.407975000927539, | |
| "learning_rate": 1.893661106453387e-08, | |
| "loss": 0.2657, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.9228387096774193, | |
| "grad_norm": 0.4080505082932939, | |
| "learning_rate": 1.7966551853443813e-08, | |
| "loss": 0.2591, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.9249032258064516, | |
| "grad_norm": 0.4177794728607882, | |
| "learning_rate": 1.7021542230377495e-08, | |
| "loss": 0.2602, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.9269677419354839, | |
| "grad_norm": 0.4337602592447264, | |
| "learning_rate": 1.610163130417119e-08, | |
| "loss": 0.2629, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.9290322580645162, | |
| "grad_norm": 0.40025965085906845, | |
| "learning_rate": 1.520686687937006e-08, | |
| "loss": 0.2601, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9310967741935484, | |
| "grad_norm": 0.3954536343382873, | |
| "learning_rate": 1.4337295453743848e-08, | |
| "loss": 0.2625, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.9331612903225807, | |
| "grad_norm": 0.4030179174929269, | |
| "learning_rate": 1.3492962215870208e-08, | |
| "loss": 0.2625, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.9352258064516129, | |
| "grad_norm": 0.40819467221659395, | |
| "learning_rate": 1.2673911042786812e-08, | |
| "loss": 0.2623, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.9372903225806452, | |
| "grad_norm": 0.3975548605776835, | |
| "learning_rate": 1.188018449771111e-08, | |
| "loss": 0.2587, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.9393548387096774, | |
| "grad_norm": 0.38620015604289587, | |
| "learning_rate": 1.1111823827828438e-08, | |
| "loss": 0.2629, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.9414193548387096, | |
| "grad_norm": 0.3915952951322532, | |
| "learning_rate": 1.0368868962148446e-08, | |
| "loss": 0.2602, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.9434838709677419, | |
| "grad_norm": 0.42822828805165253, | |
| "learning_rate": 9.651358509430385e-09, | |
| "loss": 0.2668, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.9455483870967742, | |
| "grad_norm": 0.4110557870118209, | |
| "learning_rate": 8.959329756176359e-09, | |
| "loss": 0.2614, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.9476129032258065, | |
| "grad_norm": 0.4051400508982645, | |
| "learning_rate": 8.292818664694223e-09, | |
| "loss": 0.2579, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.9496774193548387, | |
| "grad_norm": 0.41096745151064146, | |
| "learning_rate": 7.651859871228072e-09, | |
| "loss": 0.2638, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.951741935483871, | |
| "grad_norm": 0.40156811491530275, | |
| "learning_rate": 7.0364866841589045e-09, | |
| "loss": 0.2631, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.9538064516129032, | |
| "grad_norm": 0.4336183195339135, | |
| "learning_rate": 6.446731082273449e-09, | |
| "loss": 0.2626, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.9558709677419355, | |
| "grad_norm": 0.41525878972145586, | |
| "learning_rate": 5.8826237131022196e-09, | |
| "loss": 0.2637, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.9579354838709677, | |
| "grad_norm": 0.39563865795758707, | |
| "learning_rate": 5.344193891327286e-09, | |
| "loss": 0.2599, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.3902289846827882, | |
| "learning_rate": 4.831469597258331e-09, | |
| "loss": 0.2588, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9620645161290322, | |
| "grad_norm": 0.4265778114435462, | |
| "learning_rate": 4.344477475379027e-09, | |
| "loss": 0.263, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.9641290322580646, | |
| "grad_norm": 0.3952736993136822, | |
| "learning_rate": 3.883242832962319e-09, | |
| "loss": 0.266, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.9661935483870968, | |
| "grad_norm": 0.4204128593760075, | |
| "learning_rate": 3.4477896387552497e-09, | |
| "loss": 0.2632, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.9682580645161291, | |
| "grad_norm": 0.39870257721591323, | |
| "learning_rate": 3.0381405217333457e-09, | |
| "loss": 0.2598, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.9703225806451613, | |
| "grad_norm": 0.40504192880628953, | |
| "learning_rate": 2.6543167699247248e-09, | |
| "loss": 0.2663, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9723870967741935, | |
| "grad_norm": 0.4276504248620953, | |
| "learning_rate": 2.2963383293039264e-09, | |
| "loss": 0.2681, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.9744516129032258, | |
| "grad_norm": 0.4194237088370349, | |
| "learning_rate": 1.964223802755238e-09, | |
| "loss": 0.2642, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.976516129032258, | |
| "grad_norm": 0.41830150874097594, | |
| "learning_rate": 1.6579904491059726e-09, | |
| "loss": 0.257, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.9785806451612903, | |
| "grad_norm": 0.381130263031969, | |
| "learning_rate": 1.3776541822297926e-09, | |
| "loss": 0.2652, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.9806451612903225, | |
| "grad_norm": 0.4000580182685434, | |
| "learning_rate": 1.1232295702193751e-09, | |
| "loss": 0.2635, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9827096774193549, | |
| "grad_norm": 0.41681278877265543, | |
| "learning_rate": 8.947298346296816e-10, | |
| "loss": 0.2683, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.9847741935483871, | |
| "grad_norm": 0.40430126615446177, | |
| "learning_rate": 6.921668497907873e-10, | |
| "loss": 0.262, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.9868387096774194, | |
| "grad_norm": 0.41793940361219006, | |
| "learning_rate": 5.155511421906511e-10, | |
| "loss": 0.2619, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.9889032258064516, | |
| "grad_norm": 0.4020044075973662, | |
| "learning_rate": 3.64891889928276e-10, | |
| "loss": 0.262, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.9909677419354839, | |
| "grad_norm": 0.39705009551285064, | |
| "learning_rate": 2.4019692223675727e-10, | |
| "loss": 0.2597, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9930322580645161, | |
| "grad_norm": 0.41207414069949416, | |
| "learning_rate": 1.4147271907621883e-10, | |
| "loss": 0.2605, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.9950967741935484, | |
| "grad_norm": 0.4262315482989849, | |
| "learning_rate": 6.87244107974716e-11, | |
| "loss": 0.2601, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.9971612903225806, | |
| "grad_norm": 0.4195255805487681, | |
| "learning_rate": 2.1955777874838045e-11, | |
| "loss": 0.2636, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.9992258064516129, | |
| "grad_norm": 0.40670072777909505, | |
| "learning_rate": 1.1692507103089334e-12, | |
| "loss": 0.2565, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.9998451612903225, | |
| "step": 4843, | |
| "total_flos": 1061427576569856.0, | |
| "train_loss": 0.09964955267816591, | |
| "train_runtime": 121089.0768, | |
| "train_samples_per_second": 10.24, | |
| "train_steps_per_second": 0.04 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4843, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1061427576569856.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |