| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 5025, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009950248756218905, | |
| "grad_norm": 37.25846862792969, | |
| "learning_rate": 2.45e-07, | |
| "logits/chosen": 5.909375190734863, | |
| "logits/rejected": 6.022812366485596, | |
| "logps/chosen": -153.30499267578125, | |
| "logps/rejected": -145.34500122070312, | |
| "loss": 23.1951, | |
| "rewards/accuracies": 0.5337499976158142, | |
| "rewards/chosen": 86.37000274658203, | |
| "rewards/margins": 8.612265586853027, | |
| "rewards/rejected": 77.75499725341797, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01990049751243781, | |
| "grad_norm": 46.854312896728516, | |
| "learning_rate": 4.95e-07, | |
| "logits/chosen": 5.64968729019165, | |
| "logits/rejected": 5.914999961853027, | |
| "logps/chosen": -149.85000610351562, | |
| "logps/rejected": -148.31500244140625, | |
| "loss": 23.2079, | |
| "rewards/accuracies": 0.5256249904632568, | |
| "rewards/chosen": 84.31500244140625, | |
| "rewards/margins": 5.061445236206055, | |
| "rewards/rejected": 79.23249816894531, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.029850746268656716, | |
| "grad_norm": 53.076805114746094, | |
| "learning_rate": 4.998778891959453e-07, | |
| "logits/chosen": 5.153749942779541, | |
| "logits/rejected": 5.412187576293945, | |
| "logps/chosen": -150.7550048828125, | |
| "logps/rejected": -148.91000366210938, | |
| "loss": 23.4129, | |
| "rewards/accuracies": 0.5212500095367432, | |
| "rewards/chosen": 82.07499694824219, | |
| "rewards/margins": 5.790234565734863, | |
| "rewards/rejected": 76.28500366210938, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03980099502487562, | |
| "grad_norm": 76.89788818359375, | |
| "learning_rate": 4.99501662760924e-07, | |
| "logits/chosen": 4.105234146118164, | |
| "logits/rejected": 4.434531211853027, | |
| "logps/chosen": -165.125, | |
| "logps/rejected": -161.82749938964844, | |
| "loss": 22.0375, | |
| "rewards/accuracies": 0.5193750262260437, | |
| "rewards/chosen": 80.40499877929688, | |
| "rewards/margins": 5.854726791381836, | |
| "rewards/rejected": 74.58000183105469, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04975124378109453, | |
| "grad_norm": 106.3976821899414, | |
| "learning_rate": 4.988716525160205e-07, | |
| "logits/chosen": 2.590937614440918, | |
| "logits/rejected": 2.960390567779541, | |
| "logps/chosen": -181.55999755859375, | |
| "logps/rejected": -182.18499755859375, | |
| "loss": 22.0983, | |
| "rewards/accuracies": 0.5206249952316284, | |
| "rewards/chosen": 79.50749969482422, | |
| "rewards/margins": 5.624882698059082, | |
| "rewards/rejected": 73.90750122070312, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05970149253731343, | |
| "grad_norm": 154.4518585205078, | |
| "learning_rate": 4.979884992842194e-07, | |
| "logits/chosen": 1.157080054283142, | |
| "logits/rejected": 1.4771264791488647, | |
| "logps/chosen": -199.74000549316406, | |
| "logps/rejected": -208.74000549316406, | |
| "loss": 23.0821, | |
| "rewards/accuracies": 0.4806250035762787, | |
| "rewards/chosen": 75.35250091552734, | |
| "rewards/margins": 1.8104979991912842, | |
| "rewards/rejected": 73.51750183105469, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06965174129353234, | |
| "grad_norm": 159.42955017089844, | |
| "learning_rate": 4.968531013761348e-07, | |
| "logits/chosen": -0.5976855754852295, | |
| "logits/rejected": -0.2811816334724426, | |
| "logps/chosen": -253.47000122070312, | |
| "logps/rejected": -241.38999938964844, | |
| "loss": 19.8918, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 79.19000244140625, | |
| "rewards/margins": 8.498632431030273, | |
| "rewards/rejected": 70.72000122070312, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07960199004975124, | |
| "grad_norm": 113.71151733398438, | |
| "learning_rate": 4.954666136762819e-07, | |
| "logits/chosen": -2.210566520690918, | |
| "logits/rejected": -1.936132788658142, | |
| "logps/chosen": -294.9599914550781, | |
| "logps/rejected": -279.9599914550781, | |
| "loss": 18.7674, | |
| "rewards/accuracies": 0.5331249833106995, | |
| "rewards/chosen": 72.56375122070312, | |
| "rewards/margins": 8.443652153015137, | |
| "rewards/rejected": 64.11750030517578, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08955223880597014, | |
| "grad_norm": 420.630859375, | |
| "learning_rate": 4.938304464683715e-07, | |
| "logits/chosen": -3.8620312213897705, | |
| "logits/rejected": -3.5835156440734863, | |
| "logps/chosen": -352.239990234375, | |
| "logps/rejected": -348.32000732421875, | |
| "loss": 18.0366, | |
| "rewards/accuracies": 0.5162500143051147, | |
| "rewards/chosen": 59.02375030517578, | |
| "rewards/margins": 2.2487499713897705, | |
| "rewards/rejected": 56.75749969482422, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09950248756218906, | |
| "grad_norm": 1939.998046875, | |
| "learning_rate": 4.91946264000822e-07, | |
| "logits/chosen": -4.11453104019165, | |
| "logits/rejected": -4.022890567779541, | |
| "logps/chosen": -524.02001953125, | |
| "logps/rejected": -501.67999267578125, | |
| "loss": 13.3907, | |
| "rewards/accuracies": 0.5350000262260437, | |
| "rewards/chosen": 50.61375045776367, | |
| "rewards/margins": 6.952011585235596, | |
| "rewards/rejected": 43.663124084472656, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10945273631840796, | |
| "grad_norm": 1124.046142578125, | |
| "learning_rate": 4.898159827939476e-07, | |
| "logits/chosen": -4.222187519073486, | |
| "logits/rejected": -4.111406326293945, | |
| "logps/chosen": -715.0800170898438, | |
| "logps/rejected": -685.260009765625, | |
| "loss": 10.9502, | |
| "rewards/accuracies": 0.5181249976158142, | |
| "rewards/chosen": 25.342500686645508, | |
| "rewards/margins": 0.6623925566673279, | |
| "rewards/rejected": 24.693124771118164, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.11940298507462686, | |
| "grad_norm": 1316.1282958984375, | |
| "learning_rate": 4.874417696905456e-07, | |
| "logits/chosen": -4.313593864440918, | |
| "logits/rejected": -4.233281135559082, | |
| "logps/chosen": -738.239990234375, | |
| "logps/rejected": -716.5800170898438, | |
| "loss": 8.9983, | |
| "rewards/accuracies": 0.5450000166893005, | |
| "rewards/chosen": 25.038436889648438, | |
| "rewards/margins": 3.051743268966675, | |
| "rewards/rejected": 21.988750457763672, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12935323383084577, | |
| "grad_norm": 1739.2020263671875, | |
| "learning_rate": 4.848260396518637e-07, | |
| "logits/chosen": -4.20578145980835, | |
| "logits/rejected": -4.150312423706055, | |
| "logps/chosen": -768.8599853515625, | |
| "logps/rejected": -742.8200073242188, | |
| "loss": 9.0056, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": 24.521249771118164, | |
| "rewards/margins": 3.205258846282959, | |
| "rewards/rejected": 21.316171646118164, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.13930348258706468, | |
| "grad_norm": 1328.8701171875, | |
| "learning_rate": 4.819714533011918e-07, | |
| "logits/chosen": -4.449375152587891, | |
| "logits/rejected": -4.36984395980835, | |
| "logps/chosen": -743.5399780273438, | |
| "logps/rejected": -737.8800048828125, | |
| "loss": 7.7838, | |
| "rewards/accuracies": 0.5268750190734863, | |
| "rewards/chosen": 19.766250610351562, | |
| "rewards/margins": 2.2691991329193115, | |
| "rewards/rejected": 17.490938186645508, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 1090.959716796875, | |
| "learning_rate": 4.788809142175751e-07, | |
| "logits/chosen": -4.560468673706055, | |
| "logits/rejected": -4.488906383514404, | |
| "logps/chosen": -800.739990234375, | |
| "logps/rejected": -747.9600219726562, | |
| "loss": 7.3371, | |
| "rewards/accuracies": 0.5674999952316284, | |
| "rewards/chosen": 18.475936889648438, | |
| "rewards/margins": 3.2167186737060547, | |
| "rewards/rejected": 15.255346298217773, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.15920398009950248, | |
| "grad_norm": 3159.435302734375, | |
| "learning_rate": 4.755575659824014e-07, | |
| "logits/chosen": -4.382031440734863, | |
| "logits/rejected": -4.326250076293945, | |
| "logps/chosen": -796.97998046875, | |
| "logps/rejected": -792.5800170898438, | |
| "loss": 8.1944, | |
| "rewards/accuracies": 0.5537499785423279, | |
| "rewards/chosen": 16.225391387939453, | |
| "rewards/margins": 1.8960351943969727, | |
| "rewards/rejected": 14.33435344696045, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1691542288557214, | |
| "grad_norm": 2522.934326171875, | |
| "learning_rate": 4.7200478898186656e-07, | |
| "logits/chosen": -4.401249885559082, | |
| "logits/rejected": -4.393125057220459, | |
| "logps/chosen": -801.2000122070312, | |
| "logps/rejected": -761.7000122070312, | |
| "loss": 7.0774, | |
| "rewards/accuracies": 0.5487499833106995, | |
| "rewards/chosen": 15.95101547241211, | |
| "rewards/margins": 1.5917773246765137, | |
| "rewards/rejected": 14.3623046875, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1791044776119403, | |
| "grad_norm": 1642.070068359375, | |
| "learning_rate": 4.68226196968572e-07, | |
| "logits/chosen": -3.7705469131469727, | |
| "logits/rejected": -3.7598438262939453, | |
| "logps/chosen": -800.5999755859375, | |
| "logps/rejected": -739.3800048828125, | |
| "loss": 7.2798, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 17.5234375, | |
| "rewards/margins": 1.022646427154541, | |
| "rewards/rejected": 16.5008602142334, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.1890547263681592, | |
| "grad_norm": 1916.802001953125, | |
| "learning_rate": 4.642256333857497e-07, | |
| "logits/chosen": -3.6234374046325684, | |
| "logits/rejected": -3.5637500286102295, | |
| "logps/chosen": -764.1599731445312, | |
| "logps/rejected": -759.6799926757812, | |
| "loss": 6.8376, | |
| "rewards/accuracies": 0.5568749904632568, | |
| "rewards/chosen": 17.314218521118164, | |
| "rewards/margins": 2.4442381858825684, | |
| "rewards/rejected": 14.867304801940918, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.19900497512437812, | |
| "grad_norm": 2218.57373046875, | |
| "learning_rate": 4.600071674578551e-07, | |
| "logits/chosen": -4.034062385559082, | |
| "logits/rejected": -4.010156154632568, | |
| "logps/chosen": -813.4600219726562, | |
| "logps/rejected": -766.9000244140625, | |
| "loss": 6.9447, | |
| "rewards/accuracies": 0.5418750047683716, | |
| "rewards/chosen": 16.6539249420166, | |
| "rewards/margins": 2.051767587661743, | |
| "rewards/rejected": 14.60546875, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.208955223880597, | |
| "grad_norm": 4437.78369140625, | |
| "learning_rate": 4.555750900515026e-07, | |
| "logits/chosen": -4.153124809265137, | |
| "logits/rejected": -4.062812328338623, | |
| "logps/chosen": -789.47998046875, | |
| "logps/rejected": -759.8200073242188, | |
| "loss": 6.6345, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 15.9857816696167, | |
| "rewards/margins": 1.8259130716323853, | |
| "rewards/rejected": 14.157539367675781, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.21890547263681592, | |
| "grad_norm": 1672.605224609375, | |
| "learning_rate": 4.5093390931095656e-07, | |
| "logits/chosen": -4.221562385559082, | |
| "logits/rejected": -4.203437328338623, | |
| "logps/chosen": -795.239990234375, | |
| "logps/rejected": -751.6400146484375, | |
| "loss": 6.3275, | |
| "rewards/accuracies": 0.5362499952316284, | |
| "rewards/chosen": 16.673358917236328, | |
| "rewards/margins": 1.631040096282959, | |
| "rewards/rejected": 15.046093940734863, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.22885572139303484, | |
| "grad_norm": 845.5272216796875, | |
| "learning_rate": 4.4608834607261394e-07, | |
| "logits/chosen": -4.139531135559082, | |
| "logits/rejected": -4.091875076293945, | |
| "logps/chosen": -826.239990234375, | |
| "logps/rejected": -776.7999877929688, | |
| "loss": 5.9411, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": 15.388359069824219, | |
| "rewards/margins": 2.7884082794189453, | |
| "rewards/rejected": 12.600312232971191, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.23880597014925373, | |
| "grad_norm": 2127.64697265625, | |
| "learning_rate": 4.4104332906314545e-07, | |
| "logits/chosen": -4.543749809265137, | |
| "logits/rejected": -4.498437404632568, | |
| "logps/chosen": -763.5999755859375, | |
| "logps/rejected": -782.280029296875, | |
| "loss": 6.0186, | |
| "rewards/accuracies": 0.5381249785423279, | |
| "rewards/chosen": 16.952342987060547, | |
| "rewards/margins": 2.063539981842041, | |
| "rewards/rejected": 14.888437271118164, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.24875621890547264, | |
| "grad_norm": 3592.465576171875, | |
| "learning_rate": 4.358039898861784e-07, | |
| "logits/chosen": -3.616874933242798, | |
| "logits/rejected": -3.5975780487060547, | |
| "logps/chosen": -805.9600219726562, | |
| "logps/rejected": -754.1599731445312, | |
| "loss": 7.1137, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 16.315702438354492, | |
| "rewards/margins": 1.9189550876617432, | |
| "rewards/rejected": 14.396132469177246, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.25870646766169153, | |
| "grad_norm": 2655.087646484375, | |
| "learning_rate": 4.303756578026196e-07, | |
| "logits/chosen": -4.052499771118164, | |
| "logits/rejected": -3.9873437881469727, | |
| "logps/chosen": -822.5599975585938, | |
| "logps/rejected": -769.0599975585938, | |
| "loss": 6.3728, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 13.059394836425781, | |
| "rewards/margins": 1.4543017148971558, | |
| "rewards/rejected": 11.605507850646973, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.26865671641791045, | |
| "grad_norm": 3106.407470703125, | |
| "learning_rate": 4.247638543099302e-07, | |
| "logits/chosen": -4.597812652587891, | |
| "logits/rejected": -4.58078145980835, | |
| "logps/chosen": -821.6799926757812, | |
| "logps/rejected": -774.0399780273438, | |
| "loss": 5.3592, | |
| "rewards/accuracies": 0.5612499713897705, | |
| "rewards/chosen": 15.785625457763672, | |
| "rewards/margins": 2.451181650161743, | |
| "rewards/rejected": 13.333086013793945, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.27860696517412936, | |
| "grad_norm": 3253.3349609375, | |
| "learning_rate": 4.189742875258636e-07, | |
| "logits/chosen": -4.145625114440918, | |
| "logits/rejected": -4.125, | |
| "logps/chosen": -818.6799926757812, | |
| "logps/rejected": -775.5999755859375, | |
| "loss": 5.955, | |
| "rewards/accuracies": 0.5543749928474426, | |
| "rewards/chosen": 14.581796646118164, | |
| "rewards/margins": 0.9402441382408142, | |
| "rewards/rejected": 13.644218444824219, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2885572139303483, | |
| "grad_norm": 2794.8505859375, | |
| "learning_rate": 4.1301284638238023e-07, | |
| "logits/chosen": -4.417500019073486, | |
| "logits/rejected": -4.430781364440918, | |
| "logps/chosen": -873.97998046875, | |
| "logps/rejected": -809.7999877929688, | |
| "loss": 5.9541, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 16.624374389648438, | |
| "rewards/margins": 2.9456982612609863, | |
| "rewards/rejected": 13.676972389221191, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 3549.135009765625, | |
| "learning_rate": 4.068855946356451e-07, | |
| "logits/chosen": -4.357968807220459, | |
| "logits/rejected": -4.272500038146973, | |
| "logps/chosen": -815.0999755859375, | |
| "logps/rejected": -799.0999755859375, | |
| "loss": 7.1658, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 12.853320121765137, | |
| "rewards/margins": -0.4855078160762787, | |
| "rewards/rejected": 13.337441444396973, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.30845771144278605, | |
| "grad_norm": 2740.423095703125, | |
| "learning_rate": 4.005987646982011e-07, | |
| "logits/chosen": -4.377812385559082, | |
| "logits/rejected": -4.360000133514404, | |
| "logps/chosen": -861.0, | |
| "logps/rejected": -819.9000244140625, | |
| "loss": 6.0543, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 13.93810749053955, | |
| "rewards/margins": 1.2174170017242432, | |
| "rewards/rejected": 12.72454833984375, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.31840796019900497, | |
| "grad_norm": 2517.54248046875, | |
| "learning_rate": 3.9415875129958994e-07, | |
| "logits/chosen": -4.250625133514404, | |
| "logits/rejected": -4.236562728881836, | |
| "logps/chosen": -870.3599853515625, | |
| "logps/rejected": -832.8400268554688, | |
| "loss": 6.4818, | |
| "rewards/accuracies": 0.5443750023841858, | |
| "rewards/chosen": 12.261445045471191, | |
| "rewards/margins": 0.11241699010133743, | |
| "rewards/rejected": 12.149633407592773, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3283582089552239, | |
| "grad_norm": 4087.48828125, | |
| "learning_rate": 3.875721049818718e-07, | |
| "logits/chosen": -4.099062442779541, | |
| "logits/rejected": -4.049062728881836, | |
| "logps/chosen": -868.47998046875, | |
| "logps/rejected": -826.6400146484375, | |
| "loss": 5.7788, | |
| "rewards/accuracies": 0.5299999713897705, | |
| "rewards/chosen": 12.918557167053223, | |
| "rewards/margins": 0.6001172065734863, | |
| "rewards/rejected": 12.3140230178833, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3383084577114428, | |
| "grad_norm": 3558.606689453125, | |
| "learning_rate": 3.808455254366574e-07, | |
| "logits/chosen": -3.7817187309265137, | |
| "logits/rejected": -3.768437385559082, | |
| "logps/chosen": -857.0599975585938, | |
| "logps/rejected": -832.239990234375, | |
| "loss": 6.1453, | |
| "rewards/accuracies": 0.5162500143051147, | |
| "rewards/chosen": 13.769579887390137, | |
| "rewards/margins": 1.5330761671066284, | |
| "rewards/rejected": 12.234726905822754, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3482587064676617, | |
| "grad_norm": 3828.70068359375, | |
| "learning_rate": 3.739858546904308e-07, | |
| "logits/chosen": -4.390937328338623, | |
| "logits/rejected": -4.3046875, | |
| "logps/chosen": -837.1799926757812, | |
| "logps/rejected": -835.3599853515625, | |
| "loss": 6.6707, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 11.527656555175781, | |
| "rewards/margins": 0.20983397960662842, | |
| "rewards/rejected": 11.319659233093262, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.3582089552238806, | |
| "grad_norm": 4480.52392578125, | |
| "learning_rate": 3.6700007014509514e-07, | |
| "logits/chosen": -4.233593940734863, | |
| "logits/rejected": -4.196249961853027, | |
| "logps/chosen": -868.5, | |
| "logps/rejected": -844.9000244140625, | |
| "loss": 5.0152, | |
| "rewards/accuracies": 0.5418750047683716, | |
| "rewards/chosen": 11.055917739868164, | |
| "rewards/margins": 1.0635205507278442, | |
| "rewards/rejected": 9.988080978393555, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3681592039800995, | |
| "grad_norm": 2393.2080078125, | |
| "learning_rate": 3.5989527748081805e-07, | |
| "logits/chosen": -4.220937728881836, | |
| "logits/rejected": -4.229062557220459, | |
| "logps/chosen": -885.8800048828125, | |
| "logps/rejected": -871.0800170898438, | |
| "loss": 5.564, | |
| "rewards/accuracies": 0.5206249952316284, | |
| "rewards/chosen": 10.777030944824219, | |
| "rewards/margins": 0.5477758646011353, | |
| "rewards/rejected": 10.22183609008789, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.3781094527363184, | |
| "grad_norm": 2142.15185546875, | |
| "learning_rate": 3.52678703428399e-07, | |
| "logits/chosen": -3.959531307220459, | |
| "logits/rejected": -3.898750066757202, | |
| "logps/chosen": -828.0800170898438, | |
| "logps/rejected": -837.0, | |
| "loss": 4.9398, | |
| "rewards/accuracies": 0.5493749976158142, | |
| "rewards/chosen": 10.465898513793945, | |
| "rewards/margins": 1.3795897960662842, | |
| "rewards/rejected": 9.086328506469727, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3880597014925373, | |
| "grad_norm": 5701.14794921875, | |
| "learning_rate": 3.45357688418507e-07, | |
| "logits/chosen": -3.465625047683716, | |
| "logits/rejected": -3.4301562309265137, | |
| "logps/chosen": -863.3200073242188, | |
| "logps/rejected": -833.1799926757812, | |
| "loss": 4.9191, | |
| "rewards/accuracies": 0.5206249952316284, | |
| "rewards/chosen": 11.544062614440918, | |
| "rewards/margins": 1.2966210842132568, | |
| "rewards/rejected": 10.246211051940918, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.39800995024875624, | |
| "grad_norm": 2166.156005859375, | |
| "learning_rate": 3.3793967911526797e-07, | |
| "logits/chosen": -4.175624847412109, | |
| "logits/rejected": -4.157968521118164, | |
| "logps/chosen": -864.739990234375, | |
| "logps/rejected": -818.3200073242188, | |
| "loss": 4.9636, | |
| "rewards/accuracies": 0.5587499737739563, | |
| "rewards/chosen": 10.2691011428833, | |
| "rewards/margins": 1.404970645904541, | |
| "rewards/rejected": 8.860605239868164, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4079601990049751, | |
| "grad_norm": 1748.567626953125, | |
| "learning_rate": 3.3043222084179477e-07, | |
| "logits/chosen": -4.447812557220459, | |
| "logits/rejected": -4.435625076293945, | |
| "logps/chosen": -864.1799926757812, | |
| "logps/rejected": -806.9400024414062, | |
| "loss": 4.3649, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": 11.546367645263672, | |
| "rewards/margins": 1.7490723133087158, | |
| "rewards/rejected": 9.790781021118164, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.417910447761194, | |
| "grad_norm": 4250.59228515625, | |
| "learning_rate": 3.228429499053651e-07, | |
| "logits/chosen": -3.7835936546325684, | |
| "logits/rejected": -3.764218807220459, | |
| "logps/chosen": -896.280029296875, | |
| "logps/rejected": -840.52001953125, | |
| "loss": 5.556, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 12.199726104736328, | |
| "rewards/margins": 0.49269530177116394, | |
| "rewards/rejected": 11.707152366638184, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.42786069651741293, | |
| "grad_norm": 2269.677734375, | |
| "learning_rate": 3.151795858300542e-07, | |
| "logits/chosen": -4.282343864440918, | |
| "logits/rejected": -4.28640604019165, | |
| "logps/chosen": -864.52001953125, | |
| "logps/rejected": -833.02001953125, | |
| "loss": 4.4658, | |
| "rewards/accuracies": 0.5256249904632568, | |
| "rewards/chosen": 10.688271522521973, | |
| "rewards/margins": 0.9122143387794495, | |
| "rewards/rejected": 9.776113510131836, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.43781094527363185, | |
| "grad_norm": 2995.49267578125, | |
| "learning_rate": 3.0744992350472184e-07, | |
| "logits/chosen": -4.102499961853027, | |
| "logits/rejected": -4.065000057220459, | |
| "logps/chosen": -891.3599853515625, | |
| "logps/rejected": -809.739990234375, | |
| "loss": 3.9283, | |
| "rewards/accuracies": 0.5575000047683716, | |
| "rewards/chosen": 10.750624656677246, | |
| "rewards/margins": 1.9478063583374023, | |
| "rewards/rejected": 8.801519393920898, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 2685.581298828125, | |
| "learning_rate": 2.9966182525434136e-07, | |
| "logits/chosen": -4.429843902587891, | |
| "logits/rejected": -4.410468578338623, | |
| "logps/chosen": -917.9000244140625, | |
| "logps/rejected": -884.8599853515625, | |
| "loss": 4.9653, | |
| "rewards/accuracies": 0.5181249976158142, | |
| "rewards/chosen": 8.635839462280273, | |
| "rewards/margins": 0.5861572027206421, | |
| "rewards/rejected": 8.053730010986328, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4577114427860697, | |
| "grad_norm": 1810.1524658203125, | |
| "learning_rate": 2.9182321284273524e-07, | |
| "logits/chosen": -4.380312442779541, | |
| "logits/rejected": -4.308281421661377, | |
| "logps/chosen": -892.0999755859375, | |
| "logps/rejected": -817.1799926757812, | |
| "loss": 4.4186, | |
| "rewards/accuracies": 0.5537499785423279, | |
| "rewards/chosen": 8.945687294006348, | |
| "rewards/margins": 1.2103466987609863, | |
| "rewards/rejected": 7.736120223999023, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.46766169154228854, | |
| "grad_norm": 1638.470458984375, | |
| "learning_rate": 2.839420594148518e-07, | |
| "logits/chosen": -4.286562442779541, | |
| "logits/rejected": -4.321406364440918, | |
| "logps/chosen": -856.7000122070312, | |
| "logps/rejected": -857.2999877929688, | |
| "loss": 4.0532, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 9.412128448486328, | |
| "rewards/margins": 1.397641658782959, | |
| "rewards/rejected": 8.009862899780273, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.47761194029850745, | |
| "grad_norm": 4396.177734375, | |
| "learning_rate": 2.7602638138677834e-07, | |
| "logits/chosen": -4.463749885559082, | |
| "logits/rejected": -4.425156116485596, | |
| "logps/chosen": -903.4199829101562, | |
| "logps/rejected": -882.0800170898438, | |
| "loss": 4.5126, | |
| "rewards/accuracies": 0.5487499833106995, | |
| "rewards/chosen": 7.66628885269165, | |
| "rewards/margins": 1.1259644031524658, | |
| "rewards/rejected": 6.541113376617432, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.48756218905472637, | |
| "grad_norm": 4004.349609375, | |
| "learning_rate": 2.6808423029174143e-07, | |
| "logits/chosen": -4.335468769073486, | |
| "logits/rejected": -4.302812576293945, | |
| "logps/chosen": -876.6799926757812, | |
| "logps/rejected": -830.239990234375, | |
| "loss": 4.9104, | |
| "rewards/accuracies": 0.5393750071525574, | |
| "rewards/chosen": 10.196874618530273, | |
| "rewards/margins": 0.7247558832168579, | |
| "rewards/rejected": 9.472156524658203, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.4975124378109453, | |
| "grad_norm": 2645.156494140625, | |
| "learning_rate": 2.6012368459038625e-07, | |
| "logits/chosen": -4.241718769073486, | |
| "logits/rejected": -4.247031211853027, | |
| "logps/chosen": -940.6400146484375, | |
| "logps/rejected": -873.52001953125, | |
| "loss": 4.4678, | |
| "rewards/accuracies": 0.5193750262260437, | |
| "rewards/chosen": 7.638674259185791, | |
| "rewards/margins": 0.7611132860183716, | |
| "rewards/rejected": 6.874751091003418, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5074626865671642, | |
| "grad_norm": 4464.42724609375, | |
| "learning_rate": 2.5215284145366754e-07, | |
| "logits/chosen": -4.28781270980835, | |
| "logits/rejected": -4.303593635559082, | |
| "logps/chosen": -898.02001953125, | |
| "logps/rejected": -856.219970703125, | |
| "loss": 4.8918, | |
| "rewards/accuracies": 0.5393750071525574, | |
| "rewards/chosen": 9.058222770690918, | |
| "rewards/margins": 0.16966308653354645, | |
| "rewards/rejected": 8.891836166381836, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5174129353233831, | |
| "grad_norm": 1550.342041015625, | |
| "learning_rate": 2.4417980852670795e-07, | |
| "logits/chosen": -4.276875019073486, | |
| "logits/rejected": -4.283124923706055, | |
| "logps/chosen": -908.8800048828125, | |
| "logps/rejected": -837.219970703125, | |
| "loss": 3.7542, | |
| "rewards/accuracies": 0.5493749976158142, | |
| "rewards/chosen": 8.9493408203125, | |
| "rewards/margins": 1.7262645959854126, | |
| "rewards/rejected": 7.21969747543335, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.527363184079602, | |
| "grad_norm": 1139.1016845703125, | |
| "learning_rate": 2.3621269568200348e-07, | |
| "logits/chosen": -4.569843769073486, | |
| "logits/rejected": -4.572031021118164, | |
| "logps/chosen": -863.0399780273438, | |
| "logps/rejected": -832.1199951171875, | |
| "loss": 4.4595, | |
| "rewards/accuracies": 0.5274999737739563, | |
| "rewards/chosen": 9.005471229553223, | |
| "rewards/margins": 0.9330615401268005, | |
| "rewards/rejected": 8.067304611206055, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5373134328358209, | |
| "grad_norm": 3259.9931640625, | |
| "learning_rate": 2.2825960677036263e-07, | |
| "logits/chosen": -5.025000095367432, | |
| "logits/rejected": -5.025312423706055, | |
| "logps/chosen": -900.02001953125, | |
| "logps/rejected": -855.4600219726562, | |
| "loss": 3.8295, | |
| "rewards/accuracies": 0.5162500143051147, | |
| "rewards/chosen": 7.501829624176025, | |
| "rewards/margins": 0.9494946002960205, | |
| "rewards/rejected": 6.554053783416748, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5472636815920398, | |
| "grad_norm": 1576.5633544921875, | |
| "learning_rate": 2.2032863137797098e-07, | |
| "logits/chosen": -4.931250095367432, | |
| "logits/rejected": -4.935312271118164, | |
| "logps/chosen": -888.0, | |
| "logps/rejected": -890.8200073242188, | |
| "loss": 3.6169, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 6.200995922088623, | |
| "rewards/margins": 2.0712647438049316, | |
| "rewards/rejected": 4.132159233093262, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5572139303482587, | |
| "grad_norm": 2286.4580078125, | |
| "learning_rate": 2.1242783659796472e-07, | |
| "logits/chosen": -5.111249923706055, | |
| "logits/rejected": -5.120625019073486, | |
| "logps/chosen": -899.4000244140625, | |
| "logps/rejected": -861.1199951171875, | |
| "loss": 4.0467, | |
| "rewards/accuracies": 0.5299999713897705, | |
| "rewards/chosen": 6.854379653930664, | |
| "rewards/margins": 0.9951757788658142, | |
| "rewards/rejected": 5.860227108001709, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5671641791044776, | |
| "grad_norm": 1399.0577392578125, | |
| "learning_rate": 2.0456525882488414e-07, | |
| "logits/chosen": -5.425624847412109, | |
| "logits/rejected": -5.343437671661377, | |
| "logps/chosen": -907.280029296875, | |
| "logps/rejected": -847.0599975585938, | |
| "loss": 4.3571, | |
| "rewards/accuracies": 0.5137500166893005, | |
| "rewards/chosen": 7.121167182922363, | |
| "rewards/margins": 0.05256347730755806, | |
| "rewards/rejected": 7.0656046867370605, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5771144278606966, | |
| "grad_norm": 1395.7008056640625, | |
| "learning_rate": 1.967488955803515e-07, | |
| "logits/chosen": -5.565000057220459, | |
| "logits/rejected": -5.533124923706055, | |
| "logps/chosen": -921.8800048828125, | |
| "logps/rejected": -867.5599975585938, | |
| "loss": 3.6962, | |
| "rewards/accuracies": 0.5237500071525574, | |
| "rewards/chosen": 8.013593673706055, | |
| "rewards/margins": 1.2249804735183716, | |
| "rewards/rejected": 6.789748668670654, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5870646766169154, | |
| "grad_norm": 1817.9039306640625, | |
| "learning_rate": 1.8898669737829009e-07, | |
| "logits/chosen": -5.284999847412109, | |
| "logits/rejected": -5.328750133514404, | |
| "logps/chosen": -901.7999877929688, | |
| "logps/rejected": -848.260009765625, | |
| "loss": 3.4712, | |
| "rewards/accuracies": 0.5325000286102295, | |
| "rewards/chosen": 8.152949333190918, | |
| "rewards/margins": 1.7032690048217773, | |
| "rewards/rejected": 6.451176643371582, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 2175.98291015625, | |
| "learning_rate": 1.8128655963795654e-07, | |
| "logits/chosen": -5.226718902587891, | |
| "logits/rejected": -5.177187442779541, | |
| "logps/chosen": -896.52001953125, | |
| "logps/rejected": -842.5, | |
| "loss": 4.3828, | |
| "rewards/accuracies": 0.5337499976158142, | |
| "rewards/chosen": 6.346333026885986, | |
| "rewards/margins": -0.23146240413188934, | |
| "rewards/rejected": 6.57891845703125, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6069651741293532, | |
| "grad_norm": 2423.90869140625, | |
| "learning_rate": 1.736563146530148e-07, | |
| "logits/chosen": -5.147812366485596, | |
| "logits/rejected": -5.121250152587891, | |
| "logps/chosen": -906.780029296875, | |
| "logps/rejected": -842.0599975585938, | |
| "loss": 3.618, | |
| "rewards/accuracies": 0.5256249904632568, | |
| "rewards/chosen": 7.217099666595459, | |
| "rewards/margins": 1.0019750595092773, | |
| "rewards/rejected": 6.212661266326904, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.6169154228855721, | |
| "grad_norm": 1796.0404052734375, | |
| "learning_rate": 1.6610372362481795e-07, | |
| "logits/chosen": -5.517499923706055, | |
| "logits/rejected": -5.500937461853027, | |
| "logps/chosen": -888.3800048828125, | |
| "logps/rejected": -870.8200073242188, | |
| "loss": 3.9941, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 6.678945541381836, | |
| "rewards/margins": 0.21024902164936066, | |
| "rewards/rejected": 6.4722514152526855, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6268656716417911, | |
| "grad_norm": 3228.182373046875, | |
| "learning_rate": 1.5863646876800294e-07, | |
| "logits/chosen": -5.522500038146973, | |
| "logits/rejected": -5.519062519073486, | |
| "logps/chosen": -917.4600219726562, | |
| "logps/rejected": -894.3400268554688, | |
| "loss": 3.9995, | |
| "rewards/accuracies": 0.5049999952316284, | |
| "rewards/chosen": 6.917697906494141, | |
| "rewards/margins": 0.9786840677261353, | |
| "rewards/rejected": 5.941035270690918, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6368159203980099, | |
| "grad_norm": 1673.2705078125, | |
| "learning_rate": 1.512621454964278e-07, | |
| "logits/chosen": -5.52468729019165, | |
| "logits/rejected": -5.51687479019165, | |
| "logps/chosen": -920.6400146484375, | |
| "logps/rejected": -874.0399780273438, | |
| "loss": 3.5676, | |
| "rewards/accuracies": 0.5256249904632568, | |
| "rewards/chosen": 6.860390663146973, | |
| "rewards/margins": 0.9590514898300171, | |
| "rewards/rejected": 5.902841567993164, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6467661691542289, | |
| "grad_norm": 3450.835693359375, | |
| "learning_rate": 1.439882546973991e-07, | |
| "logits/chosen": -5.425468921661377, | |
| "logits/rejected": -5.380000114440918, | |
| "logps/chosen": -896.6799926757812, | |
| "logps/rejected": -860.0, | |
| "loss": 3.9909, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": 6.930351734161377, | |
| "rewards/margins": 0.6515478491783142, | |
| "rewards/rejected": 6.279133319854736, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6567164179104478, | |
| "grad_norm": 2366.854736328125, | |
| "learning_rate": 1.3682219510204828e-07, | |
| "logits/chosen": -5.55343770980835, | |
| "logits/rejected": -5.555312633514404, | |
| "logps/chosen": -918.0800170898438, | |
| "logps/rejected": -867.8400268554688, | |
| "loss": 3.9592, | |
| "rewards/accuracies": 0.5318750143051147, | |
| "rewards/chosen": 6.138139724731445, | |
| "rewards/margins": 0.7850878834724426, | |
| "rewards/rejected": 5.353430271148682, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2788.40771484375, | |
| "learning_rate": 1.2977125575961799e-07, | |
| "logits/chosen": -5.831562519073486, | |
| "logits/rejected": -5.809062480926514, | |
| "logps/chosen": -918.0999755859375, | |
| "logps/rejected": -877.780029296875, | |
| "loss": 3.6818, | |
| "rewards/accuracies": 0.5325000286102295, | |
| "rewards/chosen": 5.774457931518555, | |
| "rewards/margins": 0.3942529261112213, | |
| "rewards/rejected": 5.381279468536377, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6766169154228856, | |
| "grad_norm": 2038.4530029296875, | |
| "learning_rate": 1.2284260862331184e-07, | |
| "logits/chosen": -5.459531307220459, | |
| "logits/rejected": -5.452187538146973, | |
| "logps/chosen": -868.5599975585938, | |
| "logps/rejected": -831.5, | |
| "loss": 4.1592, | |
| "rewards/accuracies": 0.5149999856948853, | |
| "rewards/chosen": 8.009712219238281, | |
| "rewards/margins": 0.3110009729862213, | |
| "rewards/rejected": 7.698652267456055, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6865671641791045, | |
| "grad_norm": 2557.6064453125, | |
| "learning_rate": 1.1604330125525078e-07, | |
| "logits/chosen": -5.480000019073486, | |
| "logits/rejected": -5.461249828338623, | |
| "logps/chosen": -930.9000244140625, | |
| "logps/rejected": -879.5800170898438, | |
| "loss": 3.8611, | |
| "rewards/accuracies": 0.5181249976158142, | |
| "rewards/chosen": 6.370607852935791, | |
| "rewards/margins": 0.5133349895477295, | |
| "rewards/rejected": 5.857964038848877, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.6965174129353234, | |
| "grad_norm": 2376.09716796875, | |
| "learning_rate": 1.0938024965795506e-07, | |
| "logits/chosen": -5.44406270980835, | |
| "logits/rejected": -5.42312479019165, | |
| "logps/chosen": -893.1400146484375, | |
| "logps/rejected": -862.0, | |
| "loss": 3.7593, | |
| "rewards/accuracies": 0.5168750286102295, | |
| "rewards/chosen": 7.158564567565918, | |
| "rewards/margins": 0.6185815334320068, | |
| "rewards/rejected": 6.541041851043701, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7064676616915423, | |
| "grad_norm": 3130.968017578125, | |
| "learning_rate": 1.0286023123964326e-07, | |
| "logits/chosen": -5.474531173706055, | |
| "logits/rejected": -5.479062557220459, | |
| "logps/chosen": -889.4000244140625, | |
| "logps/rejected": -889.760009765625, | |
| "loss": 3.6067, | |
| "rewards/accuracies": 0.5049999952316284, | |
| "rewards/chosen": 4.780278205871582, | |
| "rewards/margins": 0.41691163182258606, | |
| "rewards/rejected": 4.363155364990234, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.7164179104477612, | |
| "grad_norm": 2820.47412109375, | |
| "learning_rate": 9.64898779205055e-08, | |
| "logits/chosen": -5.624687671661377, | |
| "logits/rejected": -5.600468635559082, | |
| "logps/chosen": -903.4400024414062, | |
| "logps/rejected": -850.5, | |
| "loss": 3.9096, | |
| "rewards/accuracies": 0.5049999952316284, | |
| "rewards/chosen": 6.33207893371582, | |
| "rewards/margins": 0.01932373084127903, | |
| "rewards/rejected": 6.315381050109863, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7263681592039801, | |
| "grad_norm": 3085.365478515625, | |
| "learning_rate": 9.027566938696051e-08, | |
| "logits/chosen": -5.869375228881836, | |
| "logits/rejected": -5.823437690734863, | |
| "logps/chosen": -913.4400024414062, | |
| "logps/rejected": -874.3599853515625, | |
| "loss": 4.0442, | |
| "rewards/accuracies": 0.5274999737739563, | |
| "rewards/chosen": 5.950512886047363, | |
| "rewards/margins": 0.31416991353034973, | |
| "rewards/rejected": 5.635661602020264, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.736318407960199, | |
| "grad_norm": 2496.829833984375, | |
| "learning_rate": 8.42239265007595e-08, | |
| "logits/chosen": -5.743750095367432, | |
| "logits/rejected": -5.717812538146973, | |
| "logps/chosen": -893.8800048828125, | |
| "logps/rejected": -847.2999877929688, | |
| "loss": 3.7924, | |
| "rewards/accuracies": 0.5131250023841858, | |
| "rewards/chosen": 7.561201095581055, | |
| "rewards/margins": 0.5582299828529358, | |
| "rewards/rejected": 6.997402191162109, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 3231.818115234375, | |
| "learning_rate": 7.834080486964115e-08, | |
| "logits/chosen": -5.849374771118164, | |
| "logits/rejected": -5.823437690734863, | |
| "logps/chosen": -919.1199951171875, | |
| "logps/rejected": -881.9600219726562, | |
| "loss": 4.0168, | |
| "rewards/accuracies": 0.4975000023841858, | |
| "rewards/chosen": 6.423149585723877, | |
| "rewards/margins": -0.03854003921151161, | |
| "rewards/rejected": 6.461066722869873, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7562189054726368, | |
| "grad_norm": 2785.632080078125, | |
| "learning_rate": 7.263228858607615e-08, | |
| "logits/chosen": -6.016250133514404, | |
| "logits/rejected": -5.974062442779541, | |
| "logps/chosen": -896.739990234375, | |
| "logps/rejected": -847.219970703125, | |
| "loss": 3.5495, | |
| "rewards/accuracies": 0.5462499856948853, | |
| "rewards/chosen": 6.750986099243164, | |
| "rewards/margins": 0.5915331840515137, | |
| "rewards/rejected": 6.161344051361084, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7661691542288557, | |
| "grad_norm": 1095.79541015625, | |
| "learning_rate": 6.7104184140471e-08, | |
| "logits/chosen": -5.880312442779541, | |
| "logits/rejected": -5.940000057220459, | |
| "logps/chosen": -925.3200073242188, | |
| "logps/rejected": -870.6199951171875, | |
| "loss": 3.5409, | |
| "rewards/accuracies": 0.5456249713897705, | |
| "rewards/chosen": 8.115625381469727, | |
| "rewards/margins": 1.7515722513198853, | |
| "rewards/rejected": 6.366718769073486, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7761194029850746, | |
| "grad_norm": 1527.818115234375, | |
| "learning_rate": 6.176211451502181e-08, | |
| "logits/chosen": -5.776249885559082, | |
| "logits/rejected": -5.784687519073486, | |
| "logps/chosen": -915.5800170898438, | |
| "logps/rejected": -894.719970703125, | |
| "loss": 3.6188, | |
| "rewards/accuracies": 0.5287500023841858, | |
| "rewards/chosen": 5.876552581787109, | |
| "rewards/margins": 1.2620117664337158, | |
| "rewards/rejected": 4.617353439331055, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7860696517412935, | |
| "grad_norm": 2342.368896484375, | |
| "learning_rate": 5.66115134642263e-08, | |
| "logits/chosen": -5.654062271118164, | |
| "logits/rejected": -5.647812366485596, | |
| "logps/chosen": -949.5, | |
| "logps/rejected": -899.1599731445312, | |
| "loss": 3.6019, | |
| "rewards/accuracies": 0.5174999833106995, | |
| "rewards/chosen": 5.2860107421875, | |
| "rewards/margins": 0.6428442597389221, | |
| "rewards/rejected": 4.6402587890625, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.7960199004975125, | |
| "grad_norm": 1863.121337890625, | |
| "learning_rate": 5.1657619987870657e-08, | |
| "logits/chosen": -5.65500020980835, | |
| "logits/rejected": -5.638437271118164, | |
| "logps/chosen": -906.2000122070312, | |
| "logps/rejected": -842.8800048828125, | |
| "loss": 4.1956, | |
| "rewards/accuracies": 0.5256249904632568, | |
| "rewards/chosen": 5.409960746765137, | |
| "rewards/margins": 0.055903319269418716, | |
| "rewards/rejected": 5.3602614402771, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8059701492537313, | |
| "grad_norm": 3470.02197265625, | |
| "learning_rate": 4.690547300211392e-08, | |
| "logits/chosen": -5.610937595367432, | |
| "logits/rejected": -5.582656383514404, | |
| "logps/chosen": -865.780029296875, | |
| "logps/rejected": -821.719970703125, | |
| "loss": 3.7337, | |
| "rewards/accuracies": 0.5174999833106995, | |
| "rewards/chosen": 6.724204063415527, | |
| "rewards/margins": 0.2500012218952179, | |
| "rewards/rejected": 6.473584175109863, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8159203980099502, | |
| "grad_norm": 1288.328369140625, | |
| "learning_rate": 4.235990621408972e-08, | |
| "logits/chosen": -5.644999980926514, | |
| "logits/rejected": -5.604687690734863, | |
| "logps/chosen": -901.4000244140625, | |
| "logps/rejected": -848.6799926757812, | |
| "loss": 3.7236, | |
| "rewards/accuracies": 0.5337499976158142, | |
| "rewards/chosen": 5.766840934753418, | |
| "rewards/margins": 0.43273621797561646, | |
| "rewards/rejected": 5.334909439086914, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8258706467661692, | |
| "grad_norm": 2981.522216796875, | |
| "learning_rate": 3.802554320523949e-08, | |
| "logits/chosen": -5.59375, | |
| "logits/rejected": -5.619062423706055, | |
| "logps/chosen": -928.6199951171875, | |
| "logps/rejected": -869.6599731445312, | |
| "loss": 3.1704, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 8.217485427856445, | |
| "rewards/margins": 1.8676855564117432, | |
| "rewards/rejected": 6.345156192779541, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.835820895522388, | |
| "grad_norm": 1575.174072265625, | |
| "learning_rate": 3.390679272837724e-08, | |
| "logits/chosen": -5.644999980926514, | |
| "logits/rejected": -5.65625, | |
| "logps/chosen": -920.8800048828125, | |
| "logps/rejected": -862.9199829101562, | |
| "loss": 3.3543, | |
| "rewards/accuracies": 0.5425000190734863, | |
| "rewards/chosen": 6.809421539306641, | |
| "rewards/margins": 1.4754736423492432, | |
| "rewards/rejected": 5.333471775054932, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.845771144278607, | |
| "grad_norm": 1714.1629638671875, | |
| "learning_rate": 3.00078442232703e-08, | |
| "logits/chosen": -5.693437576293945, | |
| "logits/rejected": -5.644374847412109, | |
| "logps/chosen": -917.4000244140625, | |
| "logps/rejected": -891.1199951171875, | |
| "loss": 4.002, | |
| "rewards/accuracies": 0.5237500071525574, | |
| "rewards/chosen": 5.626728534698486, | |
| "rewards/margins": 0.3019775450229645, | |
| "rewards/rejected": 5.322280406951904, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8557213930348259, | |
| "grad_norm": 4939.02490234375, | |
| "learning_rate": 2.633266355529684e-08, | |
| "logits/chosen": -5.634375095367432, | |
| "logits/rejected": -5.644999980926514, | |
| "logps/chosen": -928.219970703125, | |
| "logps/rejected": -868.4199829101562, | |
| "loss": 3.297, | |
| "rewards/accuracies": 0.5481250286102295, | |
| "rewards/chosen": 8.091529846191406, | |
| "rewards/margins": 2.134963274002075, | |
| "rewards/rejected": 5.956567287445068, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8656716417910447, | |
| "grad_norm": 3157.2939453125, | |
| "learning_rate": 2.2884988981515447e-08, | |
| "logits/chosen": -5.770625114440918, | |
| "logits/rejected": -5.713437557220459, | |
| "logps/chosen": -948.8400268554688, | |
| "logps/rejected": -911.5800170898438, | |
| "loss": 4.5557, | |
| "rewards/accuracies": 0.5231249928474426, | |
| "rewards/chosen": 5.515078067779541, | |
| "rewards/margins": -0.4518188536167145, | |
| "rewards/rejected": 5.966367244720459, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8756218905472637, | |
| "grad_norm": 1111.7291259765625, | |
| "learning_rate": 1.9668327348248857e-08, | |
| "logits/chosen": -5.699999809265137, | |
| "logits/rejected": -5.701562404632568, | |
| "logps/chosen": -895.1400146484375, | |
| "logps/rejected": -893.3200073242188, | |
| "loss": 3.5643, | |
| "rewards/accuracies": 0.5493749976158142, | |
| "rewards/chosen": 5.846921443939209, | |
| "rewards/margins": 0.8055566549301147, | |
| "rewards/rejected": 5.04319953918457, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8855721393034826, | |
| "grad_norm": 4429.13330078125, | |
| "learning_rate": 1.6685950524050307e-08, | |
| "logits/chosen": -5.749062538146973, | |
| "logits/rejected": -5.731562614440918, | |
| "logps/chosen": -940.0399780273438, | |
| "logps/rejected": -897.780029296875, | |
| "loss": 3.4933, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 5.280101299285889, | |
| "rewards/margins": 0.7753466963768005, | |
| "rewards/rejected": 4.501115798950195, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 1798.3892822265625, | |
| "learning_rate": 1.3940892071680837e-08, | |
| "logits/chosen": -5.7578125, | |
| "logits/rejected": -5.719531059265137, | |
| "logps/chosen": -913.3400268554688, | |
| "logps/rejected": -870.8599853515625, | |
| "loss": 3.8212, | |
| "rewards/accuracies": 0.5056250095367432, | |
| "rewards/chosen": 6.39865255355835, | |
| "rewards/margins": 0.44740965962409973, | |
| "rewards/rejected": 5.9497971534729, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9054726368159204, | |
| "grad_norm": 1578.768798828125, | |
| "learning_rate": 1.1435944162481808e-08, | |
| "logits/chosen": -5.743750095367432, | |
| "logits/rejected": -5.692031383514404, | |
| "logps/chosen": -936.0999755859375, | |
| "logps/rejected": -878.8800048828125, | |
| "loss": 3.6106, | |
| "rewards/accuracies": 0.5268750190734863, | |
| "rewards/chosen": 6.700493335723877, | |
| "rewards/margins": 0.9369800090789795, | |
| "rewards/rejected": 5.758784294128418, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.9154228855721394, | |
| "grad_norm": 2657.033935546875, | |
| "learning_rate": 9.17365473628226e-09, | |
| "logits/chosen": -5.730000019073486, | |
| "logits/rejected": -5.676718711853027, | |
| "logps/chosen": -936.4600219726562, | |
| "logps/rejected": -913.5800170898438, | |
| "loss": 4.1793, | |
| "rewards/accuracies": 0.5262500047683716, | |
| "rewards/chosen": 6.953037261962891, | |
| "rewards/margins": 0.0015722656389698386, | |
| "rewards/rejected": 6.950415134429932, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.9253731343283582, | |
| "grad_norm": 5578.38916015625, | |
| "learning_rate": 7.1563249097292e-09, | |
| "logits/chosen": -5.808750152587891, | |
| "logits/rejected": -5.783437728881836, | |
| "logps/chosen": -908.780029296875, | |
| "logps/rejected": -889.7000122070312, | |
| "loss": 3.3363, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 5.7548828125, | |
| "rewards/margins": 1.054022192955017, | |
| "rewards/rejected": 4.701448917388916, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9353233830845771, | |
| "grad_norm": 2391.306640625, | |
| "learning_rate": 5.38600663567737e-09, | |
| "logits/chosen": -5.85281229019165, | |
| "logits/rejected": -5.862187385559082, | |
| "logps/chosen": -873.1599731445312, | |
| "logps/rejected": -841.780029296875, | |
| "loss": 3.168, | |
| "rewards/accuracies": 0.5099999904632568, | |
| "rewards/chosen": 6.133432388305664, | |
| "rewards/margins": 0.7415136694908142, | |
| "rewards/rejected": 5.392402172088623, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.945273631840796, | |
| "grad_norm": 1352.6234130859375, | |
| "learning_rate": 3.864500616019228e-09, | |
| "logits/chosen": -5.815937519073486, | |
| "logits/rejected": -5.816874980926514, | |
| "logps/chosen": -930.3800048828125, | |
| "logps/rejected": -871.1799926757812, | |
| "loss": 3.6411, | |
| "rewards/accuracies": 0.5568749904632568, | |
| "rewards/chosen": 6.584997653961182, | |
| "rewards/margins": 1.1969140768051147, | |
| "rewards/rejected": 5.385488510131836, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9552238805970149, | |
| "grad_norm": 1797.4112548828125, | |
| "learning_rate": 2.593354470077802e-09, | |
| "logits/chosen": -5.836249828338623, | |
| "logits/rejected": -5.78249979019165, | |
| "logps/chosen": -942.0, | |
| "logps/rejected": -871.239990234375, | |
| "loss": 3.9736, | |
| "rewards/accuracies": 0.5318750143051147, | |
| "rewards/chosen": 5.091171741485596, | |
| "rewards/margins": 0.1407189965248108, | |
| "rewards/rejected": 4.95010232925415, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9651741293532339, | |
| "grad_norm": 1682.3397216796875, | |
| "learning_rate": 1.5738611604260433e-09, | |
| "logits/chosen": -5.798749923706055, | |
| "logits/rejected": -5.760000228881836, | |
| "logps/chosen": -901.0, | |
| "logps/rejected": -837.4600219726562, | |
| "loss": 3.8623, | |
| "rewards/accuracies": 0.5318750143051147, | |
| "rewards/chosen": 6.400158882141113, | |
| "rewards/margins": 0.044941406697034836, | |
| "rewards/rejected": 6.351467132568359, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9751243781094527, | |
| "grad_norm": 1607.166259765625, | |
| "learning_rate": 8.070576777333138e-10, | |
| "logits/chosen": -5.75390625, | |
| "logits/rejected": -5.801562309265137, | |
| "logps/chosen": -863.0599975585938, | |
| "logps/rejected": -844.0399780273438, | |
| "loss": 3.1538, | |
| "rewards/accuracies": 0.5356249809265137, | |
| "rewards/chosen": 7.545395374298096, | |
| "rewards/margins": 1.907900333404541, | |
| "rewards/rejected": 5.632500171661377, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9850746268656716, | |
| "grad_norm": 1843.007568359375, | |
| "learning_rate": 2.937239859770735e-10, | |
| "logits/chosen": -5.721562385559082, | |
| "logits/rejected": -5.678124904632568, | |
| "logps/chosen": -901.3200073242188, | |
| "logps/rejected": -831.760009765625, | |
| "loss": 4.2105, | |
| "rewards/accuracies": 0.5493749976158142, | |
| "rewards/chosen": 8.320673942565918, | |
| "rewards/margins": 0.23246826231479645, | |
| "rewards/rejected": 8.0889253616333, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9950248756218906, | |
| "grad_norm": 2304.98828125, | |
| "learning_rate": 3.4382229092522196e-11, | |
| "logits/chosen": -5.78781270980835, | |
| "logits/rejected": -5.800312519073486, | |
| "logps/chosen": -910.6599731445312, | |
| "logps/rejected": -895.8599853515625, | |
| "loss": 3.3647, | |
| "rewards/accuracies": 0.5425000190734863, | |
| "rewards/chosen": 7.996386528015137, | |
| "rewards/margins": 1.4546045064926147, | |
| "rewards/rejected": 6.5366530418396, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5025, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |