| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9989258861439313, | |
| "eval_steps": 100000, | |
| "global_step": 465, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010741138560687433, | |
| "grad_norm": 94.5147817778946, | |
| "learning_rate": 8.51063829787234e-08, | |
| "logits/chosen": -10.583702087402344, | |
| "logits/rejected": -10.455877304077148, | |
| "logps/chosen": -0.9049979448318481, | |
| "logps/rejected": -0.8784100413322449, | |
| "loss": 6.1451, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -9.04997730255127, | |
| "rewards/margins": -0.2658771872520447, | |
| "rewards/rejected": -8.784101486206055, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.021482277121374866, | |
| "grad_norm": 128.5515421485228, | |
| "learning_rate": 1.702127659574468e-07, | |
| "logits/chosen": -10.710015296936035, | |
| "logits/rejected": -10.85377311706543, | |
| "logps/chosen": -1.0046945810317993, | |
| "logps/rejected": -0.8850045204162598, | |
| "loss": 5.8491, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -10.04694652557373, | |
| "rewards/margins": -1.196901559829712, | |
| "rewards/rejected": -8.850046157836914, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0322234156820623, | |
| "grad_norm": 58.802331595913145, | |
| "learning_rate": 2.553191489361702e-07, | |
| "logits/chosen": -10.312850952148438, | |
| "logits/rejected": -10.239133834838867, | |
| "logps/chosen": -1.0889472961425781, | |
| "logps/rejected": -1.1543949842453003, | |
| "loss": 6.2505, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -10.889472961425781, | |
| "rewards/margins": 0.6544777154922485, | |
| "rewards/rejected": -11.543951034545898, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04296455424274973, | |
| "grad_norm": 151.21348799898024, | |
| "learning_rate": 3.404255319148936e-07, | |
| "logits/chosen": -9.954164505004883, | |
| "logits/rejected": -10.053568840026855, | |
| "logps/chosen": -0.9611791372299194, | |
| "logps/rejected": -1.1332345008850098, | |
| "loss": 5.5619, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -9.611791610717773, | |
| "rewards/margins": 1.7205528020858765, | |
| "rewards/rejected": -11.332345008850098, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05370569280343716, | |
| "grad_norm": 99.92865956439873, | |
| "learning_rate": 4.25531914893617e-07, | |
| "logits/chosen": -10.326103210449219, | |
| "logits/rejected": -10.055009841918945, | |
| "logps/chosen": -0.8260948061943054, | |
| "logps/rejected": -1.1549828052520752, | |
| "loss": 5.2635, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -8.260948181152344, | |
| "rewards/margins": 3.2888808250427246, | |
| "rewards/rejected": -11.54982852935791, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0644468313641246, | |
| "grad_norm": 81.6600880763309, | |
| "learning_rate": 5.106382978723404e-07, | |
| "logits/chosen": -9.319940567016602, | |
| "logits/rejected": -9.13192081451416, | |
| "logps/chosen": -0.6618553996086121, | |
| "logps/rejected": -0.6553267240524292, | |
| "loss": 5.0518, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -6.61855411529541, | |
| "rewards/margins": -0.06528709828853607, | |
| "rewards/rejected": -6.553267002105713, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07518796992481203, | |
| "grad_norm": 92.79131753018717, | |
| "learning_rate": 5.957446808510638e-07, | |
| "logits/chosen": -8.877812385559082, | |
| "logits/rejected": -8.929550170898438, | |
| "logps/chosen": -0.717892050743103, | |
| "logps/rejected": -0.6935927867889404, | |
| "loss": 5.224, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -7.178920745849609, | |
| "rewards/margins": -0.24299363791942596, | |
| "rewards/rejected": -6.935927391052246, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08592910848549946, | |
| "grad_norm": 89.40050001716304, | |
| "learning_rate": 6.808510638297872e-07, | |
| "logits/chosen": -7.988096714019775, | |
| "logits/rejected": -7.907191276550293, | |
| "logps/chosen": -0.7402302622795105, | |
| "logps/rejected": -0.7434382438659668, | |
| "loss": 4.8885, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -7.4023027420043945, | |
| "rewards/margins": 0.032080501317977905, | |
| "rewards/rejected": -7.434383392333984, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0966702470461869, | |
| "grad_norm": 59.92644904113339, | |
| "learning_rate": 7.659574468085107e-07, | |
| "logits/chosen": -8.71805477142334, | |
| "logits/rejected": -8.232014656066895, | |
| "logps/chosen": -0.5317873954772949, | |
| "logps/rejected": -0.6050616502761841, | |
| "loss": 4.5879, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -5.317873477935791, | |
| "rewards/margins": 0.7327424883842468, | |
| "rewards/rejected": -6.0506157875061035, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10741138560687433, | |
| "grad_norm": 60.25452206880678, | |
| "learning_rate": 7.998983280184396e-07, | |
| "logits/chosen": -8.83049488067627, | |
| "logits/rejected": -8.585375785827637, | |
| "logps/chosen": -0.5144228339195251, | |
| "logps/rejected": -0.5809676647186279, | |
| "loss": 4.6549, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -5.144228458404541, | |
| "rewards/margins": 0.6654484868049622, | |
| "rewards/rejected": -5.8096771240234375, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11815252416756176, | |
| "grad_norm": 46.997605934809734, | |
| "learning_rate": 7.992771864078597e-07, | |
| "logits/chosen": -8.163946151733398, | |
| "logits/rejected": -8.180994033813477, | |
| "logps/chosen": -0.5956984758377075, | |
| "logps/rejected": -0.7000880837440491, | |
| "loss": 4.6606, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -5.956984519958496, | |
| "rewards/margins": 1.0438958406448364, | |
| "rewards/rejected": -7.000881195068359, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1288936627282492, | |
| "grad_norm": 59.25357465703395, | |
| "learning_rate": 7.980922636120897e-07, | |
| "logits/chosen": -8.718216896057129, | |
| "logits/rejected": -8.35698127746582, | |
| "logps/chosen": -0.5706155896186829, | |
| "logps/rejected": -0.6969493627548218, | |
| "loss": 4.4885, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -5.706155776977539, | |
| "rewards/margins": 1.2633379697799683, | |
| "rewards/rejected": -6.969493865966797, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13963480128893663, | |
| "grad_norm": 50.79780612404402, | |
| "learning_rate": 7.963452327474534e-07, | |
| "logits/chosen": -9.234804153442383, | |
| "logits/rejected": -9.1095609664917, | |
| "logps/chosen": -0.6090906858444214, | |
| "logps/rejected": -0.7208055257797241, | |
| "loss": 4.666, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -6.090908050537109, | |
| "rewards/margins": 1.1171473264694214, | |
| "rewards/rejected": -7.208055019378662, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.15037593984962405, | |
| "grad_norm": 56.36564641791114, | |
| "learning_rate": 7.940385606293987e-07, | |
| "logits/chosen": -8.946883201599121, | |
| "logits/rejected": -8.716778755187988, | |
| "logps/chosen": -0.6818052530288696, | |
| "logps/rejected": -0.7961267828941345, | |
| "loss": 4.577, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -6.818052768707275, | |
| "rewards/margins": 1.1432150602340698, | |
| "rewards/rejected": -7.961267948150635, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1611170784103115, | |
| "grad_norm": 61.76930510948969, | |
| "learning_rate": 7.911755042893434e-07, | |
| "logits/chosen": -9.067525863647461, | |
| "logits/rejected": -8.9346923828125, | |
| "logps/chosen": -0.6832990646362305, | |
| "logps/rejected": -0.7763570547103882, | |
| "loss": 4.4179, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -6.8329901695251465, | |
| "rewards/margins": 0.9305804371833801, | |
| "rewards/rejected": -7.763571262359619, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.17185821697099893, | |
| "grad_norm": 63.602633074853536, | |
| "learning_rate": 7.877601063757321e-07, | |
| "logits/chosen": -9.461370468139648, | |
| "logits/rejected": -8.981520652770996, | |
| "logps/chosen": -0.6881433129310608, | |
| "logps/rejected": -0.8508684039115906, | |
| "loss": 4.3763, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -6.881433010101318, | |
| "rewards/margins": 1.6272509098052979, | |
| "rewards/rejected": -8.508684158325195, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18259935553168635, | |
| "grad_norm": 50.67613033462041, | |
| "learning_rate": 7.837971894457989e-07, | |
| "logits/chosen": -9.557887077331543, | |
| "logits/rejected": -9.17081069946289, | |
| "logps/chosen": -0.6830392479896545, | |
| "logps/rejected": -0.799291729927063, | |
| "loss": 4.6499, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -6.830392360687256, | |
| "rewards/margins": 1.1625245809555054, | |
| "rewards/rejected": -7.992917060852051, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1933404940923738, | |
| "grad_norm": 57.58777625940833, | |
| "learning_rate": 7.792923491560942e-07, | |
| "logits/chosen": -8.579484939575195, | |
| "logits/rejected": -8.546136856079102, | |
| "logps/chosen": -0.6667743921279907, | |
| "logps/rejected": -0.7407978177070618, | |
| "loss": 4.4492, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -6.667743682861328, | |
| "rewards/margins": 0.7402342557907104, | |
| "rewards/rejected": -7.407977104187012, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 68.91179001810359, | |
| "learning_rate": 7.742519463613926e-07, | |
| "logits/chosen": -9.336307525634766, | |
| "logits/rejected": -9.128133773803711, | |
| "logps/chosen": -0.706219494342804, | |
| "logps/rejected": -0.7757526636123657, | |
| "loss": 4.2763, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -7.062193870544434, | |
| "rewards/margins": 0.6953321099281311, | |
| "rewards/rejected": -7.7575273513793945, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.21482277121374865, | |
| "grad_norm": 140.43504047927686, | |
| "learning_rate": 7.68683098133138e-07, | |
| "logits/chosen": -8.939419746398926, | |
| "logits/rejected": -8.681028366088867, | |
| "logps/chosen": -0.7093919515609741, | |
| "logps/rejected": -0.8932901620864868, | |
| "loss": 4.4002, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -7.093919277191162, | |
| "rewards/margins": 1.838982343673706, | |
| "rewards/rejected": -8.932901382446289, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22556390977443608, | |
| "grad_norm": 105.52949175741638, | |
| "learning_rate": 7.625936677101051e-07, | |
| "logits/chosen": -8.601816177368164, | |
| "logits/rejected": -8.625459671020508, | |
| "logps/chosen": -0.8767679333686829, | |
| "logps/rejected": -0.8515819311141968, | |
| "loss": 4.4644, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -8.767679214477539, | |
| "rewards/margins": -0.25185948610305786, | |
| "rewards/rejected": -8.51581859588623, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.23630504833512353, | |
| "grad_norm": 56.44051818244315, | |
| "learning_rate": 7.559922533954731e-07, | |
| "logits/chosen": -9.58240795135498, | |
| "logits/rejected": -9.501542091369629, | |
| "logps/chosen": -0.7655607461929321, | |
| "logps/rejected": -0.8916142582893372, | |
| "loss": 4.2797, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -7.6556077003479, | |
| "rewards/margins": 1.2605348825454712, | |
| "rewards/rejected": -8.916143417358398, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24704618689581095, | |
| "grad_norm": 68.9497539150874, | |
| "learning_rate": 7.488881764159808e-07, | |
| "logits/chosen": -9.756335258483887, | |
| "logits/rejected": -9.543218612670898, | |
| "logps/chosen": -0.7038711309432983, | |
| "logps/rejected": -0.7986757159233093, | |
| "loss": 4.154, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -7.0387115478515625, | |
| "rewards/margins": 0.9480463862419128, | |
| "rewards/rejected": -7.986758232116699, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2577873254564984, | |
| "grad_norm": 90.20504733911939, | |
| "learning_rate": 7.412914677603135e-07, | |
| "logits/chosen": -9.883420944213867, | |
| "logits/rejected": -9.735390663146973, | |
| "logps/chosen": -0.9017173647880554, | |
| "logps/rejected": -0.9996153116226196, | |
| "loss": 4.2168, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -9.017173767089844, | |
| "rewards/margins": 0.9789786338806152, | |
| "rewards/rejected": -9.996152877807617, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.26852846401718583, | |
| "grad_norm": 63.31580116626401, | |
| "learning_rate": 7.332128540153017e-07, | |
| "logits/chosen": -10.71928596496582, | |
| "logits/rejected": -10.555776596069336, | |
| "logps/chosen": -0.788918673992157, | |
| "logps/rejected": -0.9437187910079956, | |
| "loss": 4.1085, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -7.889185905456543, | |
| "rewards/margins": 1.5480016469955444, | |
| "rewards/rejected": -9.437189102172852, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.27926960257787325, | |
| "grad_norm": 81.18347953083857, | |
| "learning_rate": 7.246637422199322e-07, | |
| "logits/chosen": -10.676037788391113, | |
| "logits/rejected": -10.630210876464844, | |
| "logps/chosen": -0.8381876945495605, | |
| "logps/rejected": -1.0221493244171143, | |
| "loss": 4.0857, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -8.381875991821289, | |
| "rewards/margins": 1.8396151065826416, | |
| "rewards/rejected": -10.221491813659668, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2900107411385607, | |
| "grad_norm": 80.90445716094146, | |
| "learning_rate": 7.156562037585574e-07, | |
| "logits/chosen": -11.714326858520508, | |
| "logits/rejected": -11.1636323928833, | |
| "logps/chosen": -0.8452903628349304, | |
| "logps/rejected": -1.1077954769134521, | |
| "loss": 3.9406, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -8.452905654907227, | |
| "rewards/margins": 2.6250510215759277, | |
| "rewards/rejected": -11.07795524597168, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3007518796992481, | |
| "grad_norm": 88.8085364984583, | |
| "learning_rate": 7.062029573160467e-07, | |
| "logits/chosen": -11.935297012329102, | |
| "logits/rejected": -11.792046546936035, | |
| "logps/chosen": -0.9109989404678345, | |
| "logps/rejected": -1.1446388959884644, | |
| "loss": 3.6921, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -9.109989166259766, | |
| "rewards/margins": 2.3363993167877197, | |
| "rewards/rejected": -11.446390151977539, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.31149301825993553, | |
| "grad_norm": 110.78828702618637, | |
| "learning_rate": 6.963173509189455e-07, | |
| "logits/chosen": -13.552042007446289, | |
| "logits/rejected": -13.324705123901367, | |
| "logps/chosen": -1.0502710342407227, | |
| "logps/rejected": -1.2693006992340088, | |
| "loss": 3.8236, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -10.502711296081543, | |
| "rewards/margins": 2.190295696258545, | |
| "rewards/rejected": -12.69300651550293, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.322234156820623, | |
| "grad_norm": 111.15417892636452, | |
| "learning_rate": 6.860133430880024e-07, | |
| "logits/chosen": -14.586761474609375, | |
| "logits/rejected": -14.23077392578125, | |
| "logps/chosen": -1.2444875240325928, | |
| "logps/rejected": -1.4712624549865723, | |
| "loss": 3.7527, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -12.444875717163086, | |
| "rewards/margins": 2.2677478790283203, | |
| "rewards/rejected": -14.712623596191406, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.33297529538131043, | |
| "grad_norm": 119.28100769496392, | |
| "learning_rate": 6.753054831286747e-07, | |
| "logits/chosen": -14.823234558105469, | |
| "logits/rejected": -14.770757675170898, | |
| "logps/chosen": -1.3944904804229736, | |
| "logps/rejected": -1.6155385971069336, | |
| "loss": 3.893, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -13.944903373718262, | |
| "rewards/margins": 2.2104804515838623, | |
| "rewards/rejected": -16.155384063720703, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.34371643394199786, | |
| "grad_norm": 103.61310969210844, | |
| "learning_rate": 6.642088905874433e-07, | |
| "logits/chosen": -14.195696830749512, | |
| "logits/rejected": -14.07690143585205, | |
| "logps/chosen": -1.3757129907608032, | |
| "logps/rejected": -1.5891997814178467, | |
| "loss": 3.7363, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -13.75713062286377, | |
| "rewards/margins": 2.134868860244751, | |
| "rewards/rejected": -15.891998291015625, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3544575725026853, | |
| "grad_norm": 95.46872943421629, | |
| "learning_rate": 6.527392339029455e-07, | |
| "logits/chosen": -14.401777267456055, | |
| "logits/rejected": -14.322749137878418, | |
| "logps/chosen": -1.2530758380889893, | |
| "logps/rejected": -1.5336121320724487, | |
| "loss": 3.4763, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -12.530759811401367, | |
| "rewards/margins": 2.8053627014160156, | |
| "rewards/rejected": -15.336122512817383, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3651987110633727, | |
| "grad_norm": 119.23452912777815, | |
| "learning_rate": 6.409127082820689e-07, | |
| "logits/chosen": -14.566454887390137, | |
| "logits/rejected": -14.484842300415039, | |
| "logps/chosen": -1.531166434288025, | |
| "logps/rejected": -1.7996248006820679, | |
| "loss": 3.6554, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -15.311663627624512, | |
| "rewards/margins": 2.6845829486846924, | |
| "rewards/rejected": -17.996248245239258, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37593984962406013, | |
| "grad_norm": 123.93953756068933, | |
| "learning_rate": 6.287460128322457e-07, | |
| "logits/chosen": -14.157377243041992, | |
| "logits/rejected": -14.0371675491333, | |
| "logps/chosen": -1.5080561637878418, | |
| "logps/rejected": -1.9248558282852173, | |
| "loss": 3.4374, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -15.080561637878418, | |
| "rewards/margins": 4.167994022369385, | |
| "rewards/rejected": -19.24855613708496, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3866809881847476, | |
| "grad_norm": 223.710665367037, | |
| "learning_rate": 6.16256326982239e-07, | |
| "logits/chosen": -16.03777313232422, | |
| "logits/rejected": -16.100345611572266, | |
| "logps/chosen": -1.5300877094268799, | |
| "logps/rejected": -1.8575336933135986, | |
| "loss": 3.4606, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -15.300875663757324, | |
| "rewards/margins": 3.2744598388671875, | |
| "rewards/rejected": -18.575336456298828, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.39742212674543503, | |
| "grad_norm": 294.4348098673441, | |
| "learning_rate": 6.034612862247114e-07, | |
| "logits/chosen": -14.142799377441406, | |
| "logits/rejected": -13.796422958374023, | |
| "logps/chosen": -1.5025275945663452, | |
| "logps/rejected": -1.7810484170913696, | |
| "loss": 3.1185, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -15.025274276733398, | |
| "rewards/margins": 2.7852089405059814, | |
| "rewards/rejected": -17.810483932495117, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 112.37773385751002, | |
| "learning_rate": 5.903789572148295e-07, | |
| "logits/chosen": -14.8009614944458, | |
| "logits/rejected": -14.250249862670898, | |
| "logps/chosen": -1.5931546688079834, | |
| "logps/rejected": -2.022378444671631, | |
| "loss": 3.3847, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -15.931546211242676, | |
| "rewards/margins": 4.292238712310791, | |
| "rewards/rejected": -20.223783493041992, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4189044038668099, | |
| "grad_norm": 125.77761635765717, | |
| "learning_rate": 5.770278122600662e-07, | |
| "logits/chosen": -14.832977294921875, | |
| "logits/rejected": -14.608530044555664, | |
| "logps/chosen": -1.6177564859390259, | |
| "logps/rejected": -1.9727256298065186, | |
| "loss": 3.6009, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -16.177562713623047, | |
| "rewards/margins": 3.549692153930664, | |
| "rewards/rejected": -19.72725486755371, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4296455424274973, | |
| "grad_norm": 101.70153964682511, | |
| "learning_rate": 5.634267032372192e-07, | |
| "logits/chosen": -14.803668022155762, | |
| "logits/rejected": -14.786203384399414, | |
| "logps/chosen": -1.6423594951629639, | |
| "logps/rejected": -1.9881032705307007, | |
| "loss": 3.3904, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -16.423595428466797, | |
| "rewards/margins": 3.457437515258789, | |
| "rewards/rejected": -19.881032943725586, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44038668098818473, | |
| "grad_norm": 123.22102599092351, | |
| "learning_rate": 5.495948349734758e-07, | |
| "logits/chosen": -14.582061767578125, | |
| "logits/rejected": -14.51270580291748, | |
| "logps/chosen": -1.747982382774353, | |
| "logps/rejected": -2.051506757736206, | |
| "loss": 3.1521, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -17.47982406616211, | |
| "rewards/margins": 3.035243034362793, | |
| "rewards/rejected": -20.51506805419922, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.45112781954887216, | |
| "grad_norm": 106.53206062736002, | |
| "learning_rate": 5.355517381291105e-07, | |
| "logits/chosen": -15.856142044067383, | |
| "logits/rejected": -15.048059463500977, | |
| "logps/chosen": -1.9086406230926514, | |
| "logps/rejected": -2.3940463066101074, | |
| "loss": 3.1856, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -19.086406707763672, | |
| "rewards/margins": 4.854057312011719, | |
| "rewards/rejected": -23.940462112426758, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.46186895810955964, | |
| "grad_norm": 135.01895835142213, | |
| "learning_rate": 5.21317241620105e-07, | |
| "logits/chosen": -17.223520278930664, | |
| "logits/rejected": -16.822795867919922, | |
| "logps/chosen": -1.9180253744125366, | |
| "logps/rejected": -2.2895708084106445, | |
| "loss": 3.4171, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -19.180253982543945, | |
| "rewards/margins": 3.7154533863067627, | |
| "rewards/rejected": -22.895706176757812, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.47261009667024706, | |
| "grad_norm": 116.88786548811646, | |
| "learning_rate": 5.069114446196291e-07, | |
| "logits/chosen": -14.405430793762207, | |
| "logits/rejected": -14.03125, | |
| "logps/chosen": -1.8195463418960571, | |
| "logps/rejected": -2.373373508453369, | |
| "loss": 3.1087, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -18.195463180541992, | |
| "rewards/margins": 5.538268566131592, | |
| "rewards/rejected": -23.73373031616211, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4833512352309345, | |
| "grad_norm": 117.33800643418842, | |
| "learning_rate": 4.923546881779183e-07, | |
| "logits/chosen": -15.34239387512207, | |
| "logits/rejected": -15.118896484375, | |
| "logps/chosen": -1.5427398681640625, | |
| "logps/rejected": -1.9821481704711914, | |
| "loss": 3.0885, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -15.427398681640625, | |
| "rewards/margins": 4.394083023071289, | |
| "rewards/rejected": -19.821481704711914, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4940923737916219, | |
| "grad_norm": 116.49865117484065, | |
| "learning_rate": 4.776675265006186e-07, | |
| "logits/chosen": -14.630195617675781, | |
| "logits/rejected": -14.59937858581543, | |
| "logps/chosen": -1.6563146114349365, | |
| "logps/rejected": -2.0928232669830322, | |
| "loss": 3.2032, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -16.56314468383789, | |
| "rewards/margins": 4.365086555480957, | |
| "rewards/rejected": -20.928232192993164, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5048335123523093, | |
| "grad_norm": 203.29377037587201, | |
| "learning_rate": 4.62870697926156e-07, | |
| "logits/chosen": -14.8600435256958, | |
| "logits/rejected": -15.338279724121094, | |
| "logps/chosen": -1.7831714153289795, | |
| "logps/rejected": -2.145383358001709, | |
| "loss": 3.2633, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -17.831714630126953, | |
| "rewards/margins": 3.622117519378662, | |
| "rewards/rejected": -21.45383071899414, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5155746509129968, | |
| "grad_norm": 116.16395762640381, | |
| "learning_rate": 4.479850956431092e-07, | |
| "logits/chosen": -14.476922988891602, | |
| "logits/rejected": -14.742956161499023, | |
| "logps/chosen": -1.6810489892959595, | |
| "logps/rejected": -2.0232789516448975, | |
| "loss": 3.0869, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -16.810489654541016, | |
| "rewards/margins": 3.4223015308380127, | |
| "rewards/rejected": -20.232791900634766, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 98.59837238595348, | |
| "learning_rate": 4.33031738188933e-07, | |
| "logits/chosen": -15.394210815429688, | |
| "logits/rejected": -14.864255905151367, | |
| "logps/chosen": -1.6588356494903564, | |
| "logps/rejected": -2.0340933799743652, | |
| "loss": 3.2303, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -16.58835792541504, | |
| "rewards/margins": 3.752579927444458, | |
| "rewards/rejected": -20.3409366607666, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5370569280343717, | |
| "grad_norm": 122.58746074195308, | |
| "learning_rate": 4.180317397716889e-07, | |
| "logits/chosen": -15.588345527648926, | |
| "logits/rejected": -15.222723007202148, | |
| "logps/chosen": -1.6328115463256836, | |
| "logps/rejected": -2.2505877017974854, | |
| "loss": 3.0874, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -16.328113555908203, | |
| "rewards/margins": 6.177763938903809, | |
| "rewards/rejected": -22.505878448486328, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.547798066595059, | |
| "grad_norm": 120.49234048427142, | |
| "learning_rate": 4.030062804566888e-07, | |
| "logits/chosen": -15.462881088256836, | |
| "logits/rejected": -15.3878173828125, | |
| "logps/chosen": -1.6617505550384521, | |
| "logps/rejected": -1.9518375396728516, | |
| "loss": 3.0471, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -16.61750602722168, | |
| "rewards/margins": 2.9008688926696777, | |
| "rewards/rejected": -19.518375396728516, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5585392051557465, | |
| "grad_norm": 102.36282696415171, | |
| "learning_rate": 3.8797657626014614e-07, | |
| "logits/chosen": -15.693799018859863, | |
| "logits/rejected": -15.475125312805176, | |
| "logps/chosen": -1.7303409576416016, | |
| "logps/rejected": -2.111295223236084, | |
| "loss": 3.1486, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -17.303409576416016, | |
| "rewards/margins": 3.809544801712036, | |
| "rewards/rejected": -21.112953186035156, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.569280343716434, | |
| "grad_norm": 120.30352416855824, | |
| "learning_rate": 3.729638491920669e-07, | |
| "logits/chosen": -14.018827438354492, | |
| "logits/rejected": -14.128240585327148, | |
| "logps/chosen": -1.5650581121444702, | |
| "logps/rejected": -1.8844165802001953, | |
| "loss": 3.0723, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -15.650581359863281, | |
| "rewards/margins": 3.193586826324463, | |
| "rewards/rejected": -18.84417152404785, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5800214822771214, | |
| "grad_norm": 91.11946128351651, | |
| "learning_rate": 3.5798929729067464e-07, | |
| "logits/chosen": -15.980966567993164, | |
| "logits/rejected": -15.59577465057373, | |
| "logps/chosen": -1.759478211402893, | |
| "logps/rejected": -2.1487960815429688, | |
| "loss": 2.7323, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -17.59478187561035, | |
| "rewards/margins": 3.8931777477264404, | |
| "rewards/rejected": -21.487960815429688, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5907626208378088, | |
| "grad_norm": 111.96074414088407, | |
| "learning_rate": 3.4307406469068595e-07, | |
| "logits/chosen": -15.691810607910156, | |
| "logits/rejected": -15.631698608398438, | |
| "logps/chosen": -1.7383606433868408, | |
| "logps/rejected": -2.2635440826416016, | |
| "loss": 2.9053, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -17.38360595703125, | |
| "rewards/margins": 5.251835823059082, | |
| "rewards/rejected": -22.635440826416016, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6015037593984962, | |
| "grad_norm": 122.95732420434838, | |
| "learning_rate": 3.282392117676968e-07, | |
| "logits/chosen": -15.389913558959961, | |
| "logits/rejected": -15.556841850280762, | |
| "logps/chosen": -1.9042613506317139, | |
| "logps/rejected": -2.476677417755127, | |
| "loss": 2.9112, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -19.04261589050293, | |
| "rewards/margins": 5.72415828704834, | |
| "rewards/rejected": -24.766775131225586, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 104.08928006990811, | |
| "learning_rate": 3.135056854008371e-07, | |
| "logits/chosen": -16.152729034423828, | |
| "logits/rejected": -16.002233505249023, | |
| "logps/chosen": -1.8163830041885376, | |
| "logps/rejected": -2.228738784790039, | |
| "loss": 2.8591, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -18.163829803466797, | |
| "rewards/margins": 4.123559474945068, | |
| "rewards/rejected": -22.28738784790039, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6229860365198711, | |
| "grad_norm": 113.24003069429473, | |
| "learning_rate": 2.988942893956833e-07, | |
| "logits/chosen": -15.338768005371094, | |
| "logits/rejected": -15.249606132507324, | |
| "logps/chosen": -1.8918412923812866, | |
| "logps/rejected": -2.32609224319458, | |
| "loss": 3.0229, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -18.918415069580078, | |
| "rewards/margins": 4.342508792877197, | |
| "rewards/rejected": -23.260921478271484, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6337271750805585, | |
| "grad_norm": 151.8876206935079, | |
| "learning_rate": 2.844256551091911e-07, | |
| "logits/chosen": -16.8232421875, | |
| "logits/rejected": -16.842761993408203, | |
| "logps/chosen": -1.9518108367919922, | |
| "logps/rejected": -2.4939913749694824, | |
| "loss": 2.833, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -19.518108367919922, | |
| "rewards/margins": 5.421802997589111, | |
| "rewards/rejected": -24.939910888671875, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.644468313641246, | |
| "grad_norm": 273.6603281637076, | |
| "learning_rate": 2.7012021231812664e-07, | |
| "logits/chosen": -16.766956329345703, | |
| "logits/rejected": -16.424999237060547, | |
| "logps/chosen": -1.969435453414917, | |
| "logps/rejected": -2.308202028274536, | |
| "loss": 3.2442, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -19.694355010986328, | |
| "rewards/margins": 3.387664318084717, | |
| "rewards/rejected": -23.082019805908203, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6552094522019334, | |
| "grad_norm": 187.01720889425104, | |
| "learning_rate": 2.5599816037212954e-07, | |
| "logits/chosen": -14.743069648742676, | |
| "logits/rejected": -14.623723983764648, | |
| "logps/chosen": -1.8649146556854248, | |
| "logps/rejected": -2.4030163288116455, | |
| "loss": 2.9136, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -18.64914894104004, | |
| "rewards/margins": 5.381015777587891, | |
| "rewards/rejected": -24.03016471862793, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6659505907626209, | |
| "grad_norm": 142.83830462137144, | |
| "learning_rate": 2.4207943967214064e-07, | |
| "logits/chosen": -16.09463119506836, | |
| "logits/rejected": -15.806689262390137, | |
| "logps/chosen": -2.033447742462158, | |
| "logps/rejected": -2.524364471435547, | |
| "loss": 3.1207, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -20.334476470947266, | |
| "rewards/margins": 4.909164905548096, | |
| "rewards/rejected": -25.24364471435547, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6766917293233082, | |
| "grad_norm": 106.6308691579763, | |
| "learning_rate": 2.2838370351446547e-07, | |
| "logits/chosen": -15.870585441589355, | |
| "logits/rejected": -15.55876350402832, | |
| "logps/chosen": -1.8023881912231445, | |
| "logps/rejected": -2.277968168258667, | |
| "loss": 2.7641, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -18.023881912231445, | |
| "rewards/margins": 4.755801200866699, | |
| "rewards/rejected": -22.779682159423828, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6874328678839957, | |
| "grad_norm": 108.7814729394269, | |
| "learning_rate": 2.1493029034023188e-07, | |
| "logits/chosen": -15.210580825805664, | |
| "logits/rejected": -15.057415962219238, | |
| "logps/chosen": -1.789072036743164, | |
| "logps/rejected": -2.2775633335113525, | |
| "loss": 2.8863, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -17.89072036743164, | |
| "rewards/margins": 4.884912014007568, | |
| "rewards/rejected": -22.775630950927734, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6981740064446831, | |
| "grad_norm": 180.86259023584043, | |
| "learning_rate": 2.0173819642942376e-07, | |
| "logits/chosen": -14.378689765930176, | |
| "logits/rejected": -14.201916694641113, | |
| "logps/chosen": -1.9404065608978271, | |
| "logps/rejected": -2.5850563049316406, | |
| "loss": 2.9848, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -19.404064178466797, | |
| "rewards/margins": 6.446499824523926, | |
| "rewards/rejected": -25.85056495666504, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7089151450053706, | |
| "grad_norm": 132.13798361660807, | |
| "learning_rate": 1.888260490780485e-07, | |
| "logits/chosen": -14.281087875366211, | |
| "logits/rejected": -14.154438972473145, | |
| "logps/chosen": -1.7613548040390015, | |
| "logps/rejected": -2.238495349884033, | |
| "loss": 3.0495, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -17.61355209350586, | |
| "rewards/margins": 4.771404266357422, | |
| "rewards/rejected": -22.384952545166016, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.719656283566058, | |
| "grad_norm": 127.44733826541191, | |
| "learning_rate": 1.7621208029631078e-07, | |
| "logits/chosen": -14.766406059265137, | |
| "logits/rejected": -14.667470932006836, | |
| "logps/chosen": -1.9043552875518799, | |
| "logps/rejected": -2.4970054626464844, | |
| "loss": 2.9109, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -19.04355239868164, | |
| "rewards/margins": 5.9265007972717285, | |
| "rewards/rejected": -24.970054626464844, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7303974221267454, | |
| "grad_norm": 114.16195914051926, | |
| "learning_rate": 1.6391410106493227e-07, | |
| "logits/chosen": -14.881872177124023, | |
| "logits/rejected": -14.627456665039062, | |
| "logps/chosen": -1.9786325693130493, | |
| "logps/rejected": -2.5424978733062744, | |
| "loss": 2.8817, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -19.786325454711914, | |
| "rewards/margins": 5.63865327835083, | |
| "rewards/rejected": -25.424976348876953, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7411385606874329, | |
| "grad_norm": 99.76652109445614, | |
| "learning_rate": 1.5194947618596673e-07, | |
| "logits/chosen": -15.026782035827637, | |
| "logits/rejected": -14.58587646484375, | |
| "logps/chosen": -1.9211170673370361, | |
| "logps/rejected": -2.394774913787842, | |
| "loss": 3.0269, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -19.211170196533203, | |
| "rewards/margins": 4.736577033996582, | |
| "rewards/rejected": -23.9477481842041, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "grad_norm": 107.2380444923777, | |
| "learning_rate": 1.4033509976362083e-07, | |
| "logits/chosen": -15.670697212219238, | |
| "logits/rejected": -15.579752922058105, | |
| "logps/chosen": -1.9338979721069336, | |
| "logps/rejected": -2.3520257472991943, | |
| "loss": 2.9892, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -19.338979721069336, | |
| "rewards/margins": 4.181277751922607, | |
| "rewards/rejected": -23.52025604248047, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7626208378088077, | |
| "grad_norm": 108.07049767560719, | |
| "learning_rate": 1.2908737134970363e-07, | |
| "logits/chosen": -14.5513334274292, | |
| "logits/rejected": -14.516873359680176, | |
| "logps/chosen": -1.8658726215362549, | |
| "logps/rejected": -2.476576566696167, | |
| "loss": 3.1196, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -18.65872573852539, | |
| "rewards/margins": 6.1070404052734375, | |
| "rewards/rejected": -24.765766143798828, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7733619763694952, | |
| "grad_norm": 103.96338647941381, | |
| "learning_rate": 1.1822217278738515e-07, | |
| "logits/chosen": -15.559527397155762, | |
| "logits/rejected": -15.545167922973633, | |
| "logps/chosen": -1.9091074466705322, | |
| "logps/rejected": -2.4284536838531494, | |
| "loss": 3.0461, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -19.091075897216797, | |
| "rewards/margins": 5.193462371826172, | |
| "rewards/rejected": -24.284536361694336, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7841031149301826, | |
| "grad_norm": 138.78303459839336, | |
| "learning_rate": 1.0775484578596241e-07, | |
| "logits/chosen": -15.669352531433105, | |
| "logits/rejected": -15.537897109985352, | |
| "logps/chosen": -1.94468092918396, | |
| "logps/rejected": -2.5436809062957764, | |
| "loss": 2.7974, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -19.44681167602539, | |
| "rewards/margins": 5.989997386932373, | |
| "rewards/rejected": -25.436809539794922, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7948442534908701, | |
| "grad_norm": 122.14734643501443, | |
| "learning_rate": 9.770017025829673e-08, | |
| "logits/chosen": -15.961019515991211, | |
| "logits/rejected": -15.95417308807373, | |
| "logps/chosen": -2.1812241077423096, | |
| "logps/rejected": -2.6908135414123535, | |
| "loss": 2.6374, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -21.812240600585938, | |
| "rewards/margins": 5.095890998840332, | |
| "rewards/rejected": -26.908132553100586, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8055853920515574, | |
| "grad_norm": 114.19485857094502, | |
| "learning_rate": 8.807234345151027e-08, | |
| "logits/chosen": -14.920249938964844, | |
| "logits/rejected": -14.890344619750977, | |
| "logps/chosen": -2.0413661003112793, | |
| "logps/rejected": -2.6543760299682617, | |
| "loss": 2.8891, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -20.41366195678711, | |
| "rewards/margins": 6.13009786605835, | |
| "rewards/rejected": -26.543758392333984, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 184.4289644042616, | |
| "learning_rate": 7.888495990040924e-08, | |
| "logits/chosen": -13.656982421875, | |
| "logits/rejected": -13.701431274414062, | |
| "logps/chosen": -2.011772871017456, | |
| "logps/rejected": -2.7216084003448486, | |
| "loss": 2.9801, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -20.117727279663086, | |
| "rewards/margins": 7.098354339599609, | |
| "rewards/rejected": -27.21608543395996, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8270676691729323, | |
| "grad_norm": 132.46593111411627, | |
| "learning_rate": 7.015099223193943e-08, | |
| "logits/chosen": -15.658330917358398, | |
| "logits/rejected": -15.693890571594238, | |
| "logps/chosen": -1.9581212997436523, | |
| "logps/rejected": -2.5001060962677, | |
| "loss": 2.8611, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -19.58121681213379, | |
| "rewards/margins": 5.419846534729004, | |
| "rewards/rejected": -25.001062393188477, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8378088077336198, | |
| "grad_norm": 109.73562851103766, | |
| "learning_rate": 6.188277284777857e-08, | |
| "logits/chosen": -14.48884391784668, | |
| "logits/rejected": -13.800481796264648, | |
| "logps/chosen": -1.9877732992172241, | |
| "logps/rejected": -2.5746169090270996, | |
| "loss": 2.8295, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -19.877731323242188, | |
| "rewards/margins": 5.868436813354492, | |
| "rewards/rejected": -25.746166229248047, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8485499462943072, | |
| "grad_norm": 119.04235166242496, | |
| "learning_rate": 5.409197651092965e-08, | |
| "logits/chosen": -15.729510307312012, | |
| "logits/rejected": -15.620780944824219, | |
| "logps/chosen": -2.1660141944885254, | |
| "logps/rejected": -2.667978286743164, | |
| "loss": 2.7298, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -21.660140991210938, | |
| "rewards/margins": 5.019640922546387, | |
| "rewards/rejected": -26.679784774780273, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8592910848549946, | |
| "grad_norm": 105.88618147362118, | |
| "learning_rate": 4.678960386090298e-08, | |
| "logits/chosen": -15.191770553588867, | |
| "logits/rejected": -15.158576965332031, | |
| "logps/chosen": -1.9015194177627563, | |
| "logps/rejected": -2.5145716667175293, | |
| "loss": 2.7402, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -19.015193939208984, | |
| "rewards/margins": 6.130521297454834, | |
| "rewards/rejected": -25.145715713500977, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8700322234156821, | |
| "grad_norm": 114.47737417347113, | |
| "learning_rate": 3.998596588076366e-08, | |
| "logits/chosen": -13.7559814453125, | |
| "logits/rejected": -13.483953475952148, | |
| "logps/chosen": -1.9659799337387085, | |
| "logps/rejected": -2.390488862991333, | |
| "loss": 3.1028, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -19.659801483154297, | |
| "rewards/margins": 4.245090484619141, | |
| "rewards/rejected": -23.904891967773438, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8807733619763695, | |
| "grad_norm": 124.82158209754382, | |
| "learning_rate": 3.3690669337976996e-08, | |
| "logits/chosen": -15.061103820800781, | |
| "logits/rejected": -14.891242980957031, | |
| "logps/chosen": -1.8727645874023438, | |
| "logps/rejected": -2.3343753814697266, | |
| "loss": 2.7051, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -18.727645874023438, | |
| "rewards/margins": 4.6161088943481445, | |
| "rewards/rejected": -23.343753814697266, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8915145005370569, | |
| "grad_norm": 127.84134494966216, | |
| "learning_rate": 2.7912603219609798e-08, | |
| "logits/chosen": -15.650156021118164, | |
| "logits/rejected": -15.531412124633789, | |
| "logps/chosen": -2.0558724403381348, | |
| "logps/rejected": -2.456268787384033, | |
| "loss": 2.7054, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -20.558725357055664, | |
| "rewards/margins": 4.003961563110352, | |
| "rewards/rejected": -24.562685012817383, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.9022556390977443, | |
| "grad_norm": 121.68417066288369, | |
| "learning_rate": 2.265992618104029e-08, | |
| "logits/chosen": -15.883665084838867, | |
| "logits/rejected": -15.859227180480957, | |
| "logps/chosen": -2.1571857929229736, | |
| "logps/rejected": -2.7032248973846436, | |
| "loss": 2.7706, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -21.571857452392578, | |
| "rewards/margins": 5.460390090942383, | |
| "rewards/rejected": -27.03224754333496, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9129967776584318, | |
| "grad_norm": 205.5451817035243, | |
| "learning_rate": 1.7940055025900304e-08, | |
| "logits/chosen": -14.086555480957031, | |
| "logits/rejected": -13.882139205932617, | |
| "logps/chosen": -2.0481374263763428, | |
| "logps/rejected": -2.428682804107666, | |
| "loss": 3.0577, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -20.481372833251953, | |
| "rewards/margins": 3.8054566383361816, | |
| "rewards/rejected": -24.28683090209961, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9237379162191193, | |
| "grad_norm": 119.46095967048043, | |
| "learning_rate": 1.3759654233514817e-08, | |
| "logits/chosen": -14.88987922668457, | |
| "logits/rejected": -14.742823600769043, | |
| "logps/chosen": -1.941057562828064, | |
| "logps/rejected": -2.4432146549224854, | |
| "loss": 2.8075, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -19.41057586669922, | |
| "rewards/margins": 5.021571636199951, | |
| "rewards/rejected": -24.432147979736328, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9344790547798066, | |
| "grad_norm": 112.52416047730715, | |
| "learning_rate": 1.0124626548627402e-08, | |
| "logits/chosen": -15.55200481414795, | |
| "logits/rejected": -15.57677936553955, | |
| "logps/chosen": -2.0663094520568848, | |
| "logps/rejected": -2.723595380783081, | |
| "loss": 2.7934, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -20.663097381591797, | |
| "rewards/margins": 6.5728559494018555, | |
| "rewards/rejected": -27.235950469970703, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9452201933404941, | |
| "grad_norm": 95.43516446345191, | |
| "learning_rate": 7.040104646698042e-09, | |
| "logits/chosen": -14.149500846862793, | |
| "logits/rejected": -14.121160507202148, | |
| "logps/chosen": -2.1648197174072266, | |
| "logps/rejected": -2.803515911102295, | |
| "loss": 2.7258, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -21.648197174072266, | |
| "rewards/margins": 6.386962890625, | |
| "rewards/rejected": -28.035160064697266, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9559613319011815, | |
| "grad_norm": 105.32519506523218, | |
| "learning_rate": 4.510443886542114e-09, | |
| "logits/chosen": -15.509679794311523, | |
| "logits/rejected": -15.587133407592773, | |
| "logps/chosen": -2.019160509109497, | |
| "logps/rejected": -2.5319721698760986, | |
| "loss": 2.8015, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -20.191600799560547, | |
| "rewards/margins": 5.128118991851807, | |
| "rewards/rejected": -25.319721221923828, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.966702470461869, | |
| "grad_norm": 142.71634790119586, | |
| "learning_rate": 2.539216160544333e-09, | |
| "logits/chosen": -15.480878829956055, | |
| "logits/rejected": -15.102048873901367, | |
| "logps/chosen": -2.1352379322052, | |
| "logps/rejected": -2.589895486831665, | |
| "loss": 2.889, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -21.35237693786621, | |
| "rewards/margins": 4.546576023101807, | |
| "rewards/rejected": -25.898956298828125, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9774436090225563, | |
| "grad_norm": 123.94313579799326, | |
| "learning_rate": 1.1292048511303054e-09, | |
| "logits/chosen": -14.889852523803711, | |
| "logits/rejected": -15.104809761047363, | |
| "logps/chosen": -1.9709218740463257, | |
| "logps/rejected": -2.4727888107299805, | |
| "loss": 2.9813, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -19.709218978881836, | |
| "rewards/margins": 5.018665313720703, | |
| "rewards/rejected": -24.727886199951172, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9881847475832438, | |
| "grad_norm": 100.41632245481235, | |
| "learning_rate": 2.82400900618418e-10, | |
| "logits/chosen": -15.215599060058594, | |
| "logits/rejected": -15.19567584991455, | |
| "logps/chosen": -1.937787652015686, | |
| "logps/rejected": -2.593761920928955, | |
| "loss": 2.5524, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -19.377878189086914, | |
| "rewards/margins": 6.559741020202637, | |
| "rewards/rejected": -25.937618255615234, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9989258861439313, | |
| "grad_norm": 139.5358860215804, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -15.462666511535645, | |
| "logits/rejected": -15.356382369995117, | |
| "logps/chosen": -2.155303716659546, | |
| "logps/rejected": -2.6153688430786133, | |
| "loss": 2.5621, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -21.55303955078125, | |
| "rewards/margins": 4.600649356842041, | |
| "rewards/rejected": -26.1536865234375, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9989258861439313, | |
| "step": 465, | |
| "total_flos": 0.0, | |
| "train_loss": 3.5518602760889197, | |
| "train_runtime": 6148.2375, | |
| "train_samples_per_second": 9.689, | |
| "train_steps_per_second": 0.076 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 465, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |