| { | |
| "best_metric": 0.6881732940673828, | |
| "best_model_checkpoint": "/mnt/data/shesj/Trained/RL4CoT/DPO/Parallel_13B_numglueCorrect_extend_10lang_v3_iter2.json/checkpoint-1000", | |
| "epoch": 0.2508938091952581, | |
| "eval_steps": 100, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1e-08, | |
| "logits/chosen": -1.5961366891860962, | |
| "logits/rejected": -1.4505422115325928, | |
| "logps/chosen": -5.596881866455078, | |
| "logps/rejected": -9.411199569702148, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.26249998807907104, | |
| "rewards/chosen": -0.0024300559889525175, | |
| "rewards/margins": -0.003987783100455999, | |
| "rewards/rejected": 0.00155772699508816, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2e-08, | |
| "logits/chosen": -1.7575080394744873, | |
| "logits/rejected": -1.5709590911865234, | |
| "logps/chosen": -5.7118635177612305, | |
| "logps/rejected": -8.229207992553711, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.005083759315311909, | |
| "rewards/margins": 0.007960619404911995, | |
| "rewards/rejected": -0.0028768605552613735, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3e-08, | |
| "logits/chosen": -1.3776355981826782, | |
| "logits/rejected": -1.1297904253005981, | |
| "logps/chosen": -5.6487884521484375, | |
| "logps/rejected": -6.3460259437561035, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0028433147817850113, | |
| "rewards/margins": 0.004021753557026386, | |
| "rewards/rejected": -0.0011784390080720186, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4e-08, | |
| "logits/chosen": -1.3416035175323486, | |
| "logits/rejected": -1.4808504581451416, | |
| "logps/chosen": -6.05244255065918, | |
| "logps/rejected": -6.185896873474121, | |
| "loss": 0.6939, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.002370675327256322, | |
| "rewards/margins": -0.003694503800943494, | |
| "rewards/rejected": 0.0013238281244412065, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5e-08, | |
| "logits/chosen": -1.5403258800506592, | |
| "logits/rejected": -1.6286535263061523, | |
| "logps/chosen": -5.387463569641113, | |
| "logps/rejected": -6.760479927062988, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.002958612982183695, | |
| "rewards/margins": -0.0027186197694391012, | |
| "rewards/rejected": -0.0002399933582637459, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6e-08, | |
| "logits/chosen": -1.3823367357254028, | |
| "logits/rejected": -1.3627607822418213, | |
| "logps/chosen": -5.9833083152771, | |
| "logps/rejected": -5.850650787353516, | |
| "loss": 0.6936, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.002619728446006775, | |
| "rewards/margins": 0.004939082078635693, | |
| "rewards/rejected": -0.002319354098290205, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.999999999999999e-08, | |
| "logits/chosen": -1.4706413745880127, | |
| "logits/rejected": -1.4250898361206055, | |
| "logps/chosen": -5.871586799621582, | |
| "logps/rejected": -7.329322814941406, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0053069936111569405, | |
| "rewards/margins": 0.006646360270678997, | |
| "rewards/rejected": -0.001339366426691413, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8e-08, | |
| "logits/chosen": -1.3677151203155518, | |
| "logits/rejected": -1.4711055755615234, | |
| "logps/chosen": -5.220858573913574, | |
| "logps/rejected": -6.019894599914551, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.0007630128529854119, | |
| "rewards/margins": 0.0009836136596277356, | |
| "rewards/rejected": -0.0002206008939538151, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9e-08, | |
| "logits/chosen": -1.5524793863296509, | |
| "logits/rejected": -1.5295554399490356, | |
| "logps/chosen": -5.439484596252441, | |
| "logps/rejected": -6.122335433959961, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.0029087450820952654, | |
| "rewards/margins": -0.0008771896245889366, | |
| "rewards/rejected": -0.002031555399298668, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -1.5465644598007202, | |
| "logits/rejected": -1.5324052572250366, | |
| "logps/chosen": -5.974602699279785, | |
| "logps/rejected": -6.071439743041992, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.00048004291602410376, | |
| "rewards/margins": -0.0023091284092515707, | |
| "rewards/rejected": 0.0027891716454178095, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.1e-07, | |
| "logits/chosen": -1.4663435220718384, | |
| "logits/rejected": -1.4614862203598022, | |
| "logps/chosen": -5.4424333572387695, | |
| "logps/rejected": -6.563809394836426, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.0017671022797003388, | |
| "rewards/margins": 0.00013901680358685553, | |
| "rewards/rejected": 0.0016280856216326356, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2e-07, | |
| "logits/chosen": -1.3572139739990234, | |
| "logits/rejected": -1.5018689632415771, | |
| "logps/chosen": -5.387210845947266, | |
| "logps/rejected": -5.628620147705078, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0009455516701564193, | |
| "rewards/margins": -0.0012193130096420646, | |
| "rewards/rejected": 0.0002737622708082199, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3e-07, | |
| "logits/chosen": -1.4500153064727783, | |
| "logits/rejected": -1.5059070587158203, | |
| "logps/chosen": -5.2743024826049805, | |
| "logps/rejected": -5.707627773284912, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -8.441717363893986e-05, | |
| "rewards/margins": -0.0039388397708535194, | |
| "rewards/rejected": 0.0038544225972145796, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3999999999999998e-07, | |
| "logits/chosen": -1.400269627571106, | |
| "logits/rejected": -1.4793713092803955, | |
| "logps/chosen": -5.212708473205566, | |
| "logps/rejected": -7.475738525390625, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.0009079872397705913, | |
| "rewards/margins": -0.004652161151170731, | |
| "rewards/rejected": 0.0037441744934767485, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.5e-07, | |
| "logits/chosen": -1.6910864114761353, | |
| "logits/rejected": -1.6056768894195557, | |
| "logps/chosen": -5.710541725158691, | |
| "logps/rejected": -6.734696388244629, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.0002835780323948711, | |
| "rewards/margins": 0.001549507724121213, | |
| "rewards/rejected": -0.0018330859020352364, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6e-07, | |
| "logits/chosen": -1.348565936088562, | |
| "logits/rejected": -1.3284913301467896, | |
| "logps/chosen": -5.4736833572387695, | |
| "logps/rejected": -6.32749080657959, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.004232033155858517, | |
| "rewards/margins": 0.008012665435671806, | |
| "rewards/rejected": -0.003780632745474577, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.7e-07, | |
| "logits/chosen": -1.4623692035675049, | |
| "logits/rejected": -1.3986611366271973, | |
| "logps/chosen": -5.681046485900879, | |
| "logps/rejected": -6.193971157073975, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.003290227148681879, | |
| "rewards/margins": 0.00016479431360494345, | |
| "rewards/rejected": -0.003455021884292364, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.8e-07, | |
| "logits/chosen": -1.6442101001739502, | |
| "logits/rejected": -1.7291923761367798, | |
| "logps/chosen": -6.625936985015869, | |
| "logps/rejected": -8.430340766906738, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.004968828056007624, | |
| "rewards/margins": 0.004905154462903738, | |
| "rewards/rejected": 6.367354944813997e-05, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.8999999999999998e-07, | |
| "logits/chosen": -1.3575623035430908, | |
| "logits/rejected": -1.3733503818511963, | |
| "logps/chosen": -5.06931209564209, | |
| "logps/rejected": -5.564260005950928, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0030343024991452694, | |
| "rewards/margins": 0.004197937436401844, | |
| "rewards/rejected": -0.0011636342387646437, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -1.4199776649475098, | |
| "logits/rejected": -1.3951759338378906, | |
| "logps/chosen": -4.820796012878418, | |
| "logps/rejected": -5.737803936004639, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.0034723032731562853, | |
| "rewards/margins": -0.003412929829210043, | |
| "rewards/rejected": -5.9372840041760355e-05, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_logits/chosen": -3.167680501937866, | |
| "eval_logits/rejected": -3.1470484733581543, | |
| "eval_logps/chosen": -6.138812065124512, | |
| "eval_logps/rejected": -6.94625997543335, | |
| "eval_loss": 0.6930756568908691, | |
| "eval_rewards/accuracies": 0.5112179517745972, | |
| "eval_rewards/chosen": 0.00018124215421266854, | |
| "eval_rewards/margins": 0.00026574215735308826, | |
| "eval_rewards/rejected": -8.450019231531769e-05, | |
| "eval_runtime": 615.2356, | |
| "eval_samples_per_second": 32.401, | |
| "eval_steps_per_second": 0.507, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9999658256641745e-07, | |
| "logits/chosen": -1.5726451873779297, | |
| "logits/rejected": -1.4517858028411865, | |
| "logps/chosen": -6.400355339050293, | |
| "logps/rejected": -8.746145248413086, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.0017850773874670267, | |
| "rewards/margins": 0.003671336220577359, | |
| "rewards/rejected": -0.001886258483864367, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.999863304992469e-07, | |
| "logits/chosen": -1.4806731939315796, | |
| "logits/rejected": -1.4488584995269775, | |
| "logps/chosen": -4.718806266784668, | |
| "logps/rejected": -6.621995449066162, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.003079561050981283, | |
| "rewards/margins": -0.0010764991166070104, | |
| "rewards/rejected": -0.002003061817958951, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9996924449920347e-07, | |
| "logits/chosen": -1.3951839208602905, | |
| "logits/rejected": -1.5014991760253906, | |
| "logps/chosen": -5.849172115325928, | |
| "logps/rejected": -6.110888957977295, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.002346306573599577, | |
| "rewards/margins": -0.0002563331217970699, | |
| "rewards/rejected": -0.0020899735391139984, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.999453257340926e-07, | |
| "logits/chosen": -1.709111213684082, | |
| "logits/rejected": -1.6502996683120728, | |
| "logps/chosen": -5.375964164733887, | |
| "logps/rejected": -6.7974419593811035, | |
| "loss": 0.6908, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0006786620942875743, | |
| "rewards/margins": 0.002161815296858549, | |
| "rewards/rejected": -0.0014831533189862967, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9991457583873009e-07, | |
| "logits/chosen": -1.6699622869491577, | |
| "logits/rejected": -1.6052825450897217, | |
| "logps/chosen": -5.923104286193848, | |
| "logps/rejected": -6.845878601074219, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.002157662995159626, | |
| "rewards/margins": 0.0037304076831787825, | |
| "rewards/rejected": -0.0015727445716038346, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9987699691483047e-07, | |
| "logits/chosen": -1.5257129669189453, | |
| "logits/rejected": -1.5561866760253906, | |
| "logps/chosen": -4.64754581451416, | |
| "logps/rejected": -6.103314399719238, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.00055070681264624, | |
| "rewards/margins": 0.008159702643752098, | |
| "rewards/rejected": -0.0076089962385594845, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9983259153086325e-07, | |
| "logits/chosen": -1.5833688974380493, | |
| "logits/rejected": -1.5711325407028198, | |
| "logps/chosen": -5.653990745544434, | |
| "logps/rejected": -7.226287841796875, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0021617007441818714, | |
| "rewards/margins": 0.005132616497576237, | |
| "rewards/rejected": -0.007294316776096821, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9978136272187745e-07, | |
| "logits/chosen": -1.3831666707992554, | |
| "logits/rejected": -1.3798400163650513, | |
| "logps/chosen": -4.3581929206848145, | |
| "logps/rejected": -5.750641345977783, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0044810837134718895, | |
| "rewards/margins": 0.002624013228341937, | |
| "rewards/rejected": 0.0018570702522993088, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.997233139892941e-07, | |
| "logits/chosen": -1.4784475564956665, | |
| "logits/rejected": -1.453616976737976, | |
| "logps/chosen": -5.93430757522583, | |
| "logps/rejected": -6.510165214538574, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0023500092793256044, | |
| "rewards/margins": 0.007515914738178253, | |
| "rewards/rejected": -0.005165906623005867, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9965844930066698e-07, | |
| "logits/chosen": -1.4583405256271362, | |
| "logits/rejected": -1.611533761024475, | |
| "logps/chosen": -5.597588539123535, | |
| "logps/rejected": -7.059754848480225, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.0004460577911231667, | |
| "rewards/margins": 0.008472367189824581, | |
| "rewards/rejected": -0.00891842506825924, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9958677308941136e-07, | |
| "logits/chosen": -1.590573787689209, | |
| "logits/rejected": -1.5703933238983154, | |
| "logps/chosen": -8.305377006530762, | |
| "logps/rejected": -6.981376647949219, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0003787805908359587, | |
| "rewards/margins": 0.006806156598031521, | |
| "rewards/rejected": -0.006427376065403223, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9950829025450114e-07, | |
| "logits/chosen": -1.6168692111968994, | |
| "logits/rejected": -1.3941491842269897, | |
| "logps/chosen": -4.724443435668945, | |
| "logps/rejected": -7.058165073394775, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.002109301509335637, | |
| "rewards/margins": 0.008472011424601078, | |
| "rewards/rejected": -0.00636270921677351, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9942300616013377e-07, | |
| "logits/chosen": -1.7095611095428467, | |
| "logits/rejected": -1.7674989700317383, | |
| "logps/chosen": -5.134617328643799, | |
| "logps/rejected": -5.987712383270264, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.0022057683672755957, | |
| "rewards/margins": 0.0033194683492183685, | |
| "rewards/rejected": -0.0055252364836633205, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.993309266353638e-07, | |
| "logits/chosen": -1.3613307476043701, | |
| "logits/rejected": -1.4475274085998535, | |
| "logps/chosen": -6.0252180099487305, | |
| "logps/rejected": -6.769097328186035, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.00019303560839034617, | |
| "rewards/margins": 0.0034273392520844936, | |
| "rewards/rejected": -0.0036203742492944, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.992320579737045e-07, | |
| "logits/chosen": -1.2607237100601196, | |
| "logits/rejected": -1.2645083665847778, | |
| "logps/chosen": -5.982339859008789, | |
| "logps/rejected": -5.792677879333496, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.006508027669042349, | |
| "rewards/margins": 0.01678382232785225, | |
| "rewards/rejected": -0.010275794193148613, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9912640693269751e-07, | |
| "logits/chosen": -1.4913660287857056, | |
| "logits/rejected": -1.467761516571045, | |
| "logps/chosen": -4.889002799987793, | |
| "logps/rejected": -6.567469120025635, | |
| "loss": 0.6895, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.007032574620097876, | |
| "rewards/margins": 0.015939272940158844, | |
| "rewards/rejected": -0.008906697854399681, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9901398073345117e-07, | |
| "logits/chosen": -1.6264896392822266, | |
| "logits/rejected": -1.787503957748413, | |
| "logps/chosen": -5.5176873207092285, | |
| "logps/rejected": -7.238592624664307, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.0024650241248309612, | |
| "rewards/margins": 0.014559340663254261, | |
| "rewards/rejected": -0.012094316072762012, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9889478706014683e-07, | |
| "logits/chosen": -1.5220094919204712, | |
| "logits/rejected": -1.3731660842895508, | |
| "logps/chosen": -4.48035192489624, | |
| "logps/rejected": -5.973559856414795, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.002238192595541477, | |
| "rewards/margins": 0.0097486088052392, | |
| "rewards/rejected": -0.007510416209697723, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9876883405951376e-07, | |
| "logits/chosen": -1.3608970642089844, | |
| "logits/rejected": -1.2253252267837524, | |
| "logps/chosen": -6.359659194946289, | |
| "logps/rejected": -6.143754005432129, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0014768632827326655, | |
| "rewards/margins": 0.004477277398109436, | |
| "rewards/rejected": -0.0030004139989614487, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9863613034027222e-07, | |
| "logits/chosen": -1.3653035163879395, | |
| "logits/rejected": -1.3042250871658325, | |
| "logps/chosen": -5.9716901779174805, | |
| "logps/rejected": -6.985511779785156, | |
| "loss": 0.6853, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.003856885712593794, | |
| "rewards/margins": 0.011315730400383472, | |
| "rewards/rejected": -0.0074588460847735405, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_logits/chosen": -3.174004077911377, | |
| "eval_logits/rejected": -3.153486490249634, | |
| "eval_logps/chosen": -6.161444664001465, | |
| "eval_logps/rejected": -6.977956771850586, | |
| "eval_loss": 0.6930607557296753, | |
| "eval_rewards/accuracies": 0.5140224099159241, | |
| "eval_rewards/chosen": -0.0020819876808673143, | |
| "eval_rewards/margins": 0.0011722741182893515, | |
| "eval_rewards/rejected": -0.0032542620319873095, | |
| "eval_runtime": 618.1553, | |
| "eval_samples_per_second": 32.248, | |
| "eval_steps_per_second": 0.505, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9849668497254518e-07, | |
| "logits/chosen": -1.366420030593872, | |
| "logits/rejected": -1.418501377105713, | |
| "logps/chosen": -4.841452121734619, | |
| "logps/rejected": -7.0436296463012695, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0067129782401025295, | |
| "rewards/margins": 0.013699628412723541, | |
| "rewards/rejected": -0.006986652500927448, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9835050748723822e-07, | |
| "logits/chosen": -1.5236493349075317, | |
| "logits/rejected": -1.4237031936645508, | |
| "logps/chosen": -5.028156280517578, | |
| "logps/rejected": -6.966952323913574, | |
| "loss": 0.6862, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.004889342468231916, | |
| "rewards/margins": 0.01445357222110033, | |
| "rewards/rejected": -0.009564228355884552, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9819760787538837e-07, | |
| "logits/chosen": -1.7881724834442139, | |
| "logits/rejected": -1.702300786972046, | |
| "logps/chosen": -5.008362293243408, | |
| "logps/rejected": -7.346070766448975, | |
| "loss": 0.6862, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.003184701083227992, | |
| "rewards/margins": 0.009327715262770653, | |
| "rewards/rejected": -0.012512415647506714, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9803799658748093e-07, | |
| "logits/chosen": -1.7177143096923828, | |
| "logits/rejected": -1.579524278640747, | |
| "logps/chosen": -5.551783561706543, | |
| "logps/rejected": -8.588147163391113, | |
| "loss": 0.6859, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.00936056487262249, | |
| "rewards/margins": -0.0009745029965415597, | |
| "rewards/rejected": -0.00838606245815754, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9787168453273545e-07, | |
| "logits/chosen": -1.4372203350067139, | |
| "logits/rejected": -1.4148085117340088, | |
| "logps/chosen": -5.2672553062438965, | |
| "logps/rejected": -5.7078680992126465, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.004485909827053547, | |
| "rewards/margins": 0.01839793473482132, | |
| "rewards/rejected": -0.013912022113800049, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9769868307835993e-07, | |
| "logits/chosen": -1.6145379543304443, | |
| "logits/rejected": -1.536407709121704, | |
| "logps/chosen": -5.295413970947266, | |
| "logps/rejected": -7.089319705963135, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.00589085603132844, | |
| "rewards/margins": 0.01651725545525551, | |
| "rewards/rejected": -0.010626398026943207, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9751900404877398e-07, | |
| "logits/chosen": -1.5907440185546875, | |
| "logits/rejected": -1.5271718502044678, | |
| "logps/chosen": -4.938467979431152, | |
| "logps/rejected": -5.892498970031738, | |
| "loss": 0.6846, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0029959643725305796, | |
| "rewards/margins": 0.021143438294529915, | |
| "rewards/rejected": -0.01814747229218483, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9733265972480058e-07, | |
| "logits/chosen": -1.6651685237884521, | |
| "logits/rejected": -1.5356318950653076, | |
| "logps/chosen": -7.418261528015137, | |
| "logps/rejected": -8.457880973815918, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0033951636869460344, | |
| "rewards/margins": 0.026372741907835007, | |
| "rewards/rejected": -0.02297757938504219, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9713966284282674e-07, | |
| "logits/chosen": -1.4138095378875732, | |
| "logits/rejected": -1.3163411617279053, | |
| "logps/chosen": -6.462838172912598, | |
| "logps/rejected": -7.8138837814331055, | |
| "loss": 0.6839, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.007619897834956646, | |
| "rewards/margins": 0.0050119319930672646, | |
| "rewards/rejected": -0.012631828896701336, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9694002659393302e-07, | |
| "logits/chosen": -1.5196382999420166, | |
| "logits/rejected": -1.477994680404663, | |
| "logps/chosen": -5.463156700134277, | |
| "logps/rejected": -6.823214530944824, | |
| "loss": 0.6831, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0001128343865275383, | |
| "rewards/margins": 0.022026551887392998, | |
| "rewards/rejected": -0.02213938534259796, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9673376462299182e-07, | |
| "logits/chosen": -1.522629976272583, | |
| "logits/rejected": -1.4759032726287842, | |
| "logps/chosen": -6.2718963623046875, | |
| "logps/rejected": -7.001539707183838, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.00030401311232708395, | |
| "rewards/margins": 0.021813327446579933, | |
| "rewards/rejected": -0.021509312093257904, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9652089102773487e-07, | |
| "logits/chosen": -1.815498948097229, | |
| "logits/rejected": -1.9041208028793335, | |
| "logps/chosen": -4.9399566650390625, | |
| "logps/rejected": -6.410666465759277, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0037350181955844164, | |
| "rewards/margins": 0.023170799016952515, | |
| "rewards/rejected": -0.01943577639758587, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.963014203577896e-07, | |
| "logits/chosen": -1.5250468254089355, | |
| "logits/rejected": -1.5195215940475464, | |
| "logps/chosen": -4.857203483581543, | |
| "logps/rejected": -6.858325958251953, | |
| "loss": 0.6829, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.007676561363041401, | |
| "rewards/margins": 0.008902650326490402, | |
| "rewards/rejected": -0.016579212620854378, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9607536761368482e-07, | |
| "logits/chosen": -1.398611068725586, | |
| "logits/rejected": -1.4291143417358398, | |
| "logps/chosen": -5.944151878356934, | |
| "logps/rejected": -6.5233049392700195, | |
| "loss": 0.682, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.004370951093733311, | |
| "rewards/margins": 0.02115224488079548, | |
| "rewards/rejected": -0.016781292855739594, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9584274824582527e-07, | |
| "logits/chosen": -1.3120192289352417, | |
| "logits/rejected": -1.3574376106262207, | |
| "logps/chosen": -4.558770179748535, | |
| "logps/rejected": -5.6866583824157715, | |
| "loss": 0.6821, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.004323200322687626, | |
| "rewards/margins": 0.01929726079106331, | |
| "rewards/rejected": -0.014974060468375683, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9560357815343574e-07, | |
| "logits/chosen": -1.5410289764404297, | |
| "logits/rejected": -1.5422722101211548, | |
| "logps/chosen": -7.481714725494385, | |
| "logps/rejected": -6.8584442138671875, | |
| "loss": 0.6795, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.004259726498275995, | |
| "rewards/margins": 0.026613563299179077, | |
| "rewards/rejected": -0.02235383726656437, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9535787368347442e-07, | |
| "logits/chosen": -1.6866031885147095, | |
| "logits/rejected": -1.573209524154663, | |
| "logps/chosen": -6.985440731048584, | |
| "logps/rejected": -6.92074728012085, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.003224983811378479, | |
| "rewards/margins": 0.03093186393380165, | |
| "rewards/rejected": -0.03415685147047043, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9510565162951537e-07, | |
| "logits/chosen": -1.5131175518035889, | |
| "logits/rejected": -1.5239301919937134, | |
| "logps/chosen": -5.186364650726318, | |
| "logps/rejected": -6.2596564292907715, | |
| "loss": 0.6808, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.012961247935891151, | |
| "rewards/margins": 0.05755941942334175, | |
| "rewards/rejected": -0.04459817335009575, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9484692923060094e-07, | |
| "logits/chosen": -1.540378212928772, | |
| "logits/rejected": -1.5740854740142822, | |
| "logps/chosen": -6.078642845153809, | |
| "logps/rejected": -6.573060512542725, | |
| "loss": 0.6809, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.011097406968474388, | |
| "rewards/margins": 0.030860627070069313, | |
| "rewards/rejected": -0.041958026587963104, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9458172417006346e-07, | |
| "logits/chosen": -1.5138806104660034, | |
| "logits/rejected": -1.3892674446105957, | |
| "logps/chosen": -5.267967224121094, | |
| "logps/rejected": -6.3208441734313965, | |
| "loss": 0.6801, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0035496647469699383, | |
| "rewards/margins": 0.038710370659828186, | |
| "rewards/rejected": -0.03516070172190666, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_logits/chosen": -3.184242010116577, | |
| "eval_logits/rejected": -3.1636037826538086, | |
| "eval_logps/chosen": -6.222862720489502, | |
| "eval_logps/rejected": -7.056950569152832, | |
| "eval_loss": 0.6925042867660522, | |
| "eval_rewards/accuracies": 0.5252403616905212, | |
| "eval_rewards/chosen": -0.008223854936659336, | |
| "eval_rewards/margins": 0.0029297315049916506, | |
| "eval_rewards/rejected": -0.011153585277497768, | |
| "eval_runtime": 620.1964, | |
| "eval_samples_per_second": 32.141, | |
| "eval_steps_per_second": 0.503, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.943100545743165e-07, | |
| "logits/chosen": -1.6512733697891235, | |
| "logits/rejected": -1.721056342124939, | |
| "logps/chosen": -5.9783034324646, | |
| "logps/rejected": -5.989422798156738, | |
| "loss": 0.6787, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.009316826239228249, | |
| "rewards/margins": 0.029956454411149025, | |
| "rewards/rejected": -0.039273280650377274, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9403193901161612e-07, | |
| "logits/chosen": -1.4372011423110962, | |
| "logits/rejected": -1.2608239650726318, | |
| "logps/chosen": -5.216915130615234, | |
| "logps/rejected": -7.368325233459473, | |
| "loss": 0.6783, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.00843791477382183, | |
| "rewards/margins": 0.019914906471967697, | |
| "rewards/rejected": -0.028352823108434677, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9374739649079154e-07, | |
| "logits/chosen": -1.4851138591766357, | |
| "logits/rejected": -1.5565675497055054, | |
| "logps/chosen": -6.530230522155762, | |
| "logps/rejected": -5.977784156799316, | |
| "loss": 0.6795, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.005143959075212479, | |
| "rewards/margins": 0.03382311016321182, | |
| "rewards/rejected": -0.028679147362709045, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9345644645994608e-07, | |
| "logits/chosen": -1.7324178218841553, | |
| "logits/rejected": -1.6093097925186157, | |
| "logps/chosen": -5.668793201446533, | |
| "logps/rejected": -8.58001708984375, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.007929663173854351, | |
| "rewards/margins": 0.016542982310056686, | |
| "rewards/rejected": -0.02447264827787876, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9315910880512788e-07, | |
| "logits/chosen": -1.664690613746643, | |
| "logits/rejected": -1.6959164142608643, | |
| "logps/chosen": -4.798338413238525, | |
| "logps/rejected": -6.650571346282959, | |
| "loss": 0.6793, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.002946319757029414, | |
| "rewards/margins": 0.03725341707468033, | |
| "rewards/rejected": -0.04019974544644356, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.928554038489707e-07, | |
| "logits/chosen": -1.6366933584213257, | |
| "logits/rejected": -1.6312482357025146, | |
| "logps/chosen": -5.757204532623291, | |
| "logps/rejected": -6.9877214431762695, | |
| "loss": 0.6771, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.004410495515912771, | |
| "rewards/margins": 0.03300872817635536, | |
| "rewards/rejected": -0.03741922602057457, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9254535234930483e-07, | |
| "logits/chosen": -1.383345365524292, | |
| "logits/rejected": -1.4103121757507324, | |
| "logps/chosen": -7.0129594802856445, | |
| "logps/rejected": -6.204277038574219, | |
| "loss": 0.6795, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0018229834968224168, | |
| "rewards/margins": 0.018833067268133163, | |
| "rewards/rejected": -0.020656052976846695, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9222897549773846e-07, | |
| "logits/chosen": -1.3323512077331543, | |
| "logits/rejected": -1.3201320171356201, | |
| "logps/chosen": -5.118114471435547, | |
| "logps/rejected": -7.212060451507568, | |
| "loss": 0.6776, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.009920697659254074, | |
| "rewards/margins": 0.029569601640105247, | |
| "rewards/rejected": -0.03949030116200447, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9190629491820908e-07, | |
| "logits/chosen": -1.4638566970825195, | |
| "logits/rejected": -1.4451267719268799, | |
| "logps/chosen": -5.2605671882629395, | |
| "logps/rejected": -6.085940837860107, | |
| "loss": 0.6739, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.005447469651699066, | |
| "rewards/margins": 0.034440845251083374, | |
| "rewards/rejected": -0.03988831490278244, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9157733266550572e-07, | |
| "logits/chosen": -1.5962337255477905, | |
| "logits/rejected": -1.6220163106918335, | |
| "logps/chosen": -6.018959045410156, | |
| "logps/rejected": -6.889263153076172, | |
| "loss": 0.674, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.0057447971776127815, | |
| "rewards/margins": 0.07764319330453873, | |
| "rewards/rejected": -0.08338797837495804, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9124211122376135e-07, | |
| "logits/chosen": -1.4028089046478271, | |
| "logits/rejected": -1.3758150339126587, | |
| "logps/chosen": -5.279055595397949, | |
| "logps/rejected": -8.15546989440918, | |
| "loss": 0.6746, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.004057350568473339, | |
| "rewards/margins": 0.041755061596632004, | |
| "rewards/rejected": -0.03769771382212639, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9090065350491624e-07, | |
| "logits/chosen": -1.466732144355774, | |
| "logits/rejected": -1.505338430404663, | |
| "logps/chosen": -6.279904365539551, | |
| "logps/rejected": -6.962622165679932, | |
| "loss": 0.6722, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.0070061227306723595, | |
| "rewards/margins": 0.04479537159204483, | |
| "rewards/rejected": -0.037789251655340195, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.905529828471519e-07, | |
| "logits/chosen": -1.5488044023513794, | |
| "logits/rejected": -1.4669849872589111, | |
| "logps/chosen": -6.46566104888916, | |
| "logps/rejected": -6.615389823913574, | |
| "loss": 0.6752, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.009375613182783127, | |
| "rewards/margins": 0.0315098911523819, | |
| "rewards/rejected": -0.04088550806045532, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.901991230132959e-07, | |
| "logits/chosen": -1.5748860836029053, | |
| "logits/rejected": -1.5498300790786743, | |
| "logps/chosen": -5.943437099456787, | |
| "logps/rejected": -7.429045677185059, | |
| "loss": 0.6752, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.012559416703879833, | |
| "rewards/margins": 0.03320794180035591, | |
| "rewards/rejected": -0.04576735943555832, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.8983909818919788e-07, | |
| "logits/chosen": -1.4835641384124756, | |
| "logits/rejected": -1.4734997749328613, | |
| "logps/chosen": -5.776877403259277, | |
| "logps/rejected": -6.821134090423584, | |
| "loss": 0.6746, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 8.074291690718383e-05, | |
| "rewards/margins": 0.041906945407390594, | |
| "rewards/rejected": -0.04182619974017143, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.8947293298207635e-07, | |
| "logits/chosen": -1.590511441230774, | |
| "logits/rejected": -1.5636823177337646, | |
| "logps/chosen": -6.252840042114258, | |
| "logps/rejected": -7.568482398986816, | |
| "loss": 0.6773, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.009611086919903755, | |
| "rewards/margins": 0.04731304943561554, | |
| "rewards/rejected": -0.056924134492874146, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.8910065241883678e-07, | |
| "logits/chosen": -1.7463115453720093, | |
| "logits/rejected": -1.552099347114563, | |
| "logps/chosen": -5.755384922027588, | |
| "logps/rejected": -9.108583450317383, | |
| "loss": 0.6762, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.014299096539616585, | |
| "rewards/margins": 0.04487228766083717, | |
| "rewards/rejected": -0.05917138606309891, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.8872228194436116e-07, | |
| "logits/chosen": -1.4452903270721436, | |
| "logits/rejected": -1.4187732934951782, | |
| "logps/chosen": -5.313692092895508, | |
| "logps/rejected": -6.416788578033447, | |
| "loss": 0.6713, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.017885476350784302, | |
| "rewards/margins": 0.03439543396234512, | |
| "rewards/rejected": -0.052280914038419724, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.8833784741976886e-07, | |
| "logits/chosen": -1.5387766361236572, | |
| "logits/rejected": -1.5287269353866577, | |
| "logps/chosen": -5.880291938781738, | |
| "logps/rejected": -6.970144748687744, | |
| "loss": 0.6682, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.018293332308530807, | |
| "rewards/margins": 0.044221844524145126, | |
| "rewards/rejected": -0.06251517683267593, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.8794737512064888e-07, | |
| "logits/chosen": -1.640610933303833, | |
| "logits/rejected": -1.539394736289978, | |
| "logps/chosen": -5.029040336608887, | |
| "logps/rejected": -6.50949764251709, | |
| "loss": 0.6676, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.008673004806041718, | |
| "rewards/margins": 0.04148182272911072, | |
| "rewards/rejected": -0.050154827535152435, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_logits/chosen": -3.1871755123138428, | |
| "eval_logits/rejected": -3.1664505004882812, | |
| "eval_logps/chosen": -6.327807903289795, | |
| "eval_logps/rejected": -7.203298091888428, | |
| "eval_loss": 0.6918210983276367, | |
| "eval_rewards/accuracies": 0.5324519276618958, | |
| "eval_rewards/chosen": -0.018718333914875984, | |
| "eval_rewards/margins": 0.007069970481097698, | |
| "eval_rewards/rejected": -0.025788303464651108, | |
| "eval_runtime": 624.3628, | |
| "eval_samples_per_second": 31.927, | |
| "eval_steps_per_second": 0.5, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.875508917352643e-07, | |
| "logits/chosen": -1.3193857669830322, | |
| "logits/rejected": -1.3139396905899048, | |
| "logps/chosen": -5.116377830505371, | |
| "logps/rejected": -7.48989200592041, | |
| "loss": 0.6702, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.00014799665950704366, | |
| "rewards/margins": 0.06528617441654205, | |
| "rewards/rejected": -0.06513817608356476, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.871484243627277e-07, | |
| "logits/chosen": -1.6524467468261719, | |
| "logits/rejected": -1.6226232051849365, | |
| "logps/chosen": -5.752681732177734, | |
| "logps/rejected": -7.157814979553223, | |
| "loss": 0.6721, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02660415694117546, | |
| "rewards/margins": 0.04491695761680603, | |
| "rewards/rejected": -0.07152111828327179, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.867400005111495e-07, | |
| "logits/chosen": -1.4393689632415771, | |
| "logits/rejected": -1.369827151298523, | |
| "logps/chosen": -6.169909477233887, | |
| "logps/rejected": -7.438270568847656, | |
| "loss": 0.6732, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.005500434432178736, | |
| "rewards/margins": 0.06915347278118134, | |
| "rewards/rejected": -0.07465390115976334, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8632564809575738e-07, | |
| "logits/chosen": -1.622018814086914, | |
| "logits/rejected": -1.367488145828247, | |
| "logps/chosen": -5.794252872467041, | |
| "logps/rejected": -7.3042449951171875, | |
| "loss": 0.6702, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.016323495656251907, | |
| "rewards/margins": 0.07359044253826141, | |
| "rewards/rejected": -0.057266950607299805, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.859053954369885e-07, | |
| "logits/chosen": -1.5450842380523682, | |
| "logits/rejected": -1.5149590969085693, | |
| "logps/chosen": -4.754348278045654, | |
| "logps/rejected": -6.53006649017334, | |
| "loss": 0.6752, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.005179319530725479, | |
| "rewards/margins": 0.04943736642599106, | |
| "rewards/rejected": -0.05461668223142624, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.854792712585539e-07, | |
| "logits/chosen": -1.6106449365615845, | |
| "logits/rejected": -1.472044587135315, | |
| "logps/chosen": -5.688014030456543, | |
| "logps/rejected": -7.954685211181641, | |
| "loss": 0.6644, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.01195032149553299, | |
| "rewards/margins": 0.08017265051603317, | |
| "rewards/rejected": -0.09212296456098557, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8504730468547506e-07, | |
| "logits/chosen": -1.7078673839569092, | |
| "logits/rejected": -1.6030772924423218, | |
| "logps/chosen": -5.776875972747803, | |
| "logps/rejected": -8.993513107299805, | |
| "loss": 0.6652, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03464338928461075, | |
| "rewards/margins": 0.04098203405737877, | |
| "rewards/rejected": -0.07562542706727982, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.846095252420935e-07, | |
| "logits/chosen": -1.5690264701843262, | |
| "logits/rejected": -1.466038465499878, | |
| "logps/chosen": -5.061371326446533, | |
| "logps/rejected": -7.400214195251465, | |
| "loss": 0.6721, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.0034414425026625395, | |
| "rewards/margins": 0.06261752545833588, | |
| "rewards/rejected": -0.06605897843837738, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.841659628500527e-07, | |
| "logits/chosen": -1.5850152969360352, | |
| "logits/rejected": -1.626733422279358, | |
| "logps/chosen": -5.730135440826416, | |
| "logps/rejected": -7.101342678070068, | |
| "loss": 0.6687, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0294067170470953, | |
| "rewards/margins": 0.06425820291042328, | |
| "rewards/rejected": -0.09366491436958313, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8371664782625284e-07, | |
| "logits/chosen": -1.4130651950836182, | |
| "logits/rejected": -1.4390531778335571, | |
| "logps/chosen": -6.015176296234131, | |
| "logps/rejected": -6.557525634765625, | |
| "loss": 0.6697, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.019521493464708328, | |
| "rewards/margins": 0.048861414194107056, | |
| "rewards/rejected": -0.06838290393352509, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8326161088077904e-07, | |
| "logits/chosen": -1.5105092525482178, | |
| "logits/rejected": -1.5329506397247314, | |
| "logps/chosen": -5.465790748596191, | |
| "logps/rejected": -6.999060153961182, | |
| "loss": 0.6671, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.01001508068293333, | |
| "rewards/margins": 0.05736090615391731, | |
| "rewards/rejected": -0.06737598031759262, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.82800883114802e-07, | |
| "logits/chosen": -1.3845876455307007, | |
| "logits/rejected": -1.4282658100128174, | |
| "logps/chosen": -6.4336256980896, | |
| "logps/rejected": -6.6461639404296875, | |
| "loss": 0.6661, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.023983094841241837, | |
| "rewards/margins": 0.0474400632083416, | |
| "rewards/rejected": -0.07142315804958344, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.8233449601845256e-07, | |
| "logits/chosen": -1.5965187549591064, | |
| "logits/rejected": -1.6954765319824219, | |
| "logps/chosen": -7.093475341796875, | |
| "logps/rejected": -8.442846298217773, | |
| "loss": 0.662, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.0007729934295639396, | |
| "rewards/margins": 0.09539145231246948, | |
| "rewards/rejected": -0.09616444259881973, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.8186248146866925e-07, | |
| "logits/chosen": -1.4333058595657349, | |
| "logits/rejected": -1.3761308193206787, | |
| "logps/chosen": -6.583089351654053, | |
| "logps/rejected": -8.258574485778809, | |
| "loss": 0.6664, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0373983308672905, | |
| "rewards/margins": 0.028671253472566605, | |
| "rewards/rejected": -0.0660695806145668, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.8138487172701948e-07, | |
| "logits/chosen": -1.4833250045776367, | |
| "logits/rejected": -1.5289559364318848, | |
| "logps/chosen": -5.485930442810059, | |
| "logps/rejected": -6.312931060791016, | |
| "loss": 0.6665, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.00883159227669239, | |
| "rewards/margins": 0.05059976503252983, | |
| "rewards/rejected": -0.059431351721286774, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.8090169943749475e-07, | |
| "logits/chosen": -1.3064377307891846, | |
| "logits/rejected": -1.246734619140625, | |
| "logps/chosen": -4.533870220184326, | |
| "logps/rejected": -6.509539604187012, | |
| "loss": 0.6642, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.005517001263797283, | |
| "rewards/margins": 0.06515659391880035, | |
| "rewards/rejected": -0.07067359238862991, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.8041299762427914e-07, | |
| "logits/chosen": -1.605373740196228, | |
| "logits/rejected": -1.5201643705368042, | |
| "logps/chosen": -6.204909801483154, | |
| "logps/rejected": -8.210671424865723, | |
| "loss": 0.6623, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.024663742631673813, | |
| "rewards/margins": 0.06276446580886841, | |
| "rewards/rejected": -0.08742821216583252, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.7991879968949247e-07, | |
| "logits/chosen": -1.2218480110168457, | |
| "logits/rejected": -1.1649186611175537, | |
| "logps/chosen": -4.534018516540527, | |
| "logps/rejected": -5.6343183517456055, | |
| "loss": 0.6628, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.007990372367203236, | |
| "rewards/margins": 0.07446859031915665, | |
| "rewards/rejected": -0.0824589654803276, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.794191394109071e-07, | |
| "logits/chosen": -1.4611269235610962, | |
| "logits/rejected": -1.4972448348999023, | |
| "logps/chosen": -6.087195873260498, | |
| "logps/rejected": -7.726794242858887, | |
| "loss": 0.6659, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02641758695244789, | |
| "rewards/margins": 0.03801130875945091, | |
| "rewards/rejected": -0.0644288882613182, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7891405093963936e-07, | |
| "logits/chosen": -1.6527025699615479, | |
| "logits/rejected": -1.5353628396987915, | |
| "logps/chosen": -4.67386531829834, | |
| "logps/rejected": -6.836350917816162, | |
| "loss": 0.6582, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.008362488821148872, | |
| "rewards/margins": 0.07853694260120392, | |
| "rewards/rejected": -0.08689942955970764, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_logits/chosen": -3.196692705154419, | |
| "eval_logits/rejected": -3.1757330894470215, | |
| "eval_logps/chosen": -6.479369640350342, | |
| "eval_logps/rejected": -7.3778076171875, | |
| "eval_loss": 0.691510021686554, | |
| "eval_rewards/accuracies": 0.5356570482254028, | |
| "eval_rewards/chosen": -0.03387455642223358, | |
| "eval_rewards/margins": 0.0093647176399827, | |
| "eval_rewards/rejected": -0.043239280581474304, | |
| "eval_runtime": 630.1142, | |
| "eval_samples_per_second": 31.636, | |
| "eval_steps_per_second": 0.495, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7840356879781529e-07, | |
| "logits/chosen": -1.5877869129180908, | |
| "logits/rejected": -1.5104506015777588, | |
| "logps/chosen": -6.052631378173828, | |
| "logps/rejected": -9.69563102722168, | |
| "loss": 0.6607, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.04113469645380974, | |
| "rewards/margins": 0.08254175633192062, | |
| "rewards/rejected": -0.12367645651102066, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7788772787621125e-07, | |
| "logits/chosen": -1.5799081325531006, | |
| "logits/rejected": -1.6663821935653687, | |
| "logps/chosen": -6.070633888244629, | |
| "logps/rejected": -6.397655487060547, | |
| "loss": 0.6615, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03644927218556404, | |
| "rewards/margins": 0.06373059004545212, | |
| "rewards/rejected": -0.10017986595630646, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7736656343186894e-07, | |
| "logits/chosen": -1.5198477506637573, | |
| "logits/rejected": -1.5320686101913452, | |
| "logps/chosen": -4.8720879554748535, | |
| "logps/rejected": -6.920307159423828, | |
| "loss": 0.6546, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.011885623447597027, | |
| "rewards/margins": 0.12990930676460266, | |
| "rewards/rejected": -0.11802370846271515, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.768401110856859e-07, | |
| "logits/chosen": -1.4357372522354126, | |
| "logits/rejected": -1.3116753101348877, | |
| "logps/chosen": -5.608946323394775, | |
| "logps/rejected": -7.910216331481934, | |
| "loss": 0.6588, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03163328021764755, | |
| "rewards/margins": 0.06858251243829727, | |
| "rewards/rejected": -0.10021580755710602, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7630840681998066e-07, | |
| "logits/chosen": -1.5340244770050049, | |
| "logits/rejected": -1.417262315750122, | |
| "logps/chosen": -5.505238056182861, | |
| "logps/rejected": -9.424348831176758, | |
| "loss": 0.657, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.043297283351421356, | |
| "rewards/margins": 0.06860554218292236, | |
| "rewards/rejected": -0.11190283298492432, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7577148697603348e-07, | |
| "logits/chosen": -1.7233011722564697, | |
| "logits/rejected": -1.6292282342910767, | |
| "logps/chosen": -5.7183942794799805, | |
| "logps/rejected": -6.829588890075684, | |
| "loss": 0.6596, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.0456002876162529, | |
| "rewards/margins": 0.06835009157657623, | |
| "rewards/rejected": -0.11395038664340973, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7522938825160247e-07, | |
| "logits/chosen": -1.3610880374908447, | |
| "logits/rejected": -1.3855262994766235, | |
| "logps/chosen": -5.5726470947265625, | |
| "logps/rejected": -7.635517120361328, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03400539606809616, | |
| "rewards/margins": 0.03809204697608948, | |
| "rewards/rejected": -0.07209744304418564, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7468214769841538e-07, | |
| "logits/chosen": -1.5978724956512451, | |
| "logits/rejected": -1.4684964418411255, | |
| "logps/chosen": -5.9864959716796875, | |
| "logps/rejected": -7.737608432769775, | |
| "loss": 0.6572, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.044120945036411285, | |
| "rewards/margins": 0.06804494559764862, | |
| "rewards/rejected": -0.11216588318347931, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7412980271963708e-07, | |
| "logits/chosen": -1.567631721496582, | |
| "logits/rejected": -1.54317045211792, | |
| "logps/chosen": -6.259519100189209, | |
| "logps/rejected": -7.840844631195068, | |
| "loss": 0.6614, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.006099678575992584, | |
| "rewards/margins": 0.1069895401597023, | |
| "rewards/rejected": -0.11308921873569489, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7357239106731316e-07, | |
| "logits/chosen": -1.6185524463653564, | |
| "logits/rejected": -1.6444612741470337, | |
| "logps/chosen": -7.109327793121338, | |
| "logps/rejected": -8.976202964782715, | |
| "loss": 0.6555, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.04382595047354698, | |
| "rewards/margins": 0.09603992104530334, | |
| "rewards/rejected": -0.13986587524414062, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7300995083978961e-07, | |
| "logits/chosen": -1.7173988819122314, | |
| "logits/rejected": -1.5636038780212402, | |
| "logps/chosen": -5.074532508850098, | |
| "logps/rejected": -7.6101531982421875, | |
| "loss": 0.6536, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.04033565893769264, | |
| "rewards/margins": 0.08647169172763824, | |
| "rewards/rejected": -0.1268073469400406, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.724425204791089e-07, | |
| "logits/chosen": -1.3909645080566406, | |
| "logits/rejected": -1.4084515571594238, | |
| "logps/chosen": -5.7829999923706055, | |
| "logps/rejected": -6.748002529144287, | |
| "loss": 0.6638, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.03097168169915676, | |
| "rewards/margins": 0.04933810234069824, | |
| "rewards/rejected": -0.08030977845191956, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7187013876838238e-07, | |
| "logits/chosen": -1.8116487264633179, | |
| "logits/rejected": -1.7086451053619385, | |
| "logps/chosen": -6.233320236206055, | |
| "logps/rejected": -8.50374698638916, | |
| "loss": 0.6505, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.014792789705097675, | |
| "rewards/margins": 0.09041319042444229, | |
| "rewards/rejected": -0.1052059754729271, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.712928448291397e-07, | |
| "logits/chosen": -1.4688892364501953, | |
| "logits/rejected": -1.531427025794983, | |
| "logps/chosen": -6.673917293548584, | |
| "logps/rejected": -6.955280303955078, | |
| "loss": 0.6468, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.013536572456359863, | |
| "rewards/margins": 0.09923507273197174, | |
| "rewards/rejected": -0.11277163028717041, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7071067811865473e-07, | |
| "logits/chosen": -1.7241573333740234, | |
| "logits/rejected": -1.6227924823760986, | |
| "logps/chosen": -6.4433488845825195, | |
| "logps/rejected": -9.645414352416992, | |
| "loss": 0.6523, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.054280836135149, | |
| "rewards/margins": 0.09889966994524002, | |
| "rewards/rejected": -0.15318050980567932, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.7012367842724884e-07, | |
| "logits/chosen": -1.4540927410125732, | |
| "logits/rejected": -1.448845624923706, | |
| "logps/chosen": -7.165667533874512, | |
| "logps/rejected": -9.222039222717285, | |
| "loss": 0.6556, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03299577161669731, | |
| "rewards/margins": 0.09829618781805038, | |
| "rewards/rejected": -0.1312919557094574, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.695318858755712e-07, | |
| "logits/chosen": -1.6242682933807373, | |
| "logits/rejected": -1.507509708404541, | |
| "logps/chosen": -4.892175197601318, | |
| "logps/rejected": -6.663909912109375, | |
| "loss": 0.6587, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.05031831935048103, | |
| "rewards/margins": 0.06618543714284897, | |
| "rewards/rejected": -0.1165037602186203, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.6893534091185658e-07, | |
| "logits/chosen": -1.655474305152893, | |
| "logits/rejected": -1.6360971927642822, | |
| "logps/chosen": -6.940272331237793, | |
| "logps/rejected": -10.048660278320312, | |
| "loss": 0.6463, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.006456449627876282, | |
| "rewards/margins": 0.1727210134267807, | |
| "rewards/rejected": -0.17917747795581818, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.6833408430916082e-07, | |
| "logits/chosen": -1.3439010381698608, | |
| "logits/rejected": -1.429510235786438, | |
| "logps/chosen": -5.466313362121582, | |
| "logps/rejected": -7.18454647064209, | |
| "loss": 0.6487, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.05292614549398422, | |
| "rewards/margins": 0.12489708513021469, | |
| "rewards/rejected": -0.17782321572303772, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.6772815716257412e-07, | |
| "logits/chosen": -1.51864492893219, | |
| "logits/rejected": -1.455951452255249, | |
| "logps/chosen": -6.4935503005981445, | |
| "logps/rejected": -8.119839668273926, | |
| "loss": 0.6537, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0410698801279068, | |
| "rewards/margins": 0.07796063274145126, | |
| "rewards/rejected": -0.11903052031993866, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_logits/chosen": -3.1874260902404785, | |
| "eval_logits/rejected": -3.1666018962860107, | |
| "eval_logps/chosen": -6.68336296081543, | |
| "eval_logps/rejected": -7.635651588439941, | |
| "eval_loss": 0.6910278797149658, | |
| "eval_rewards/accuracies": 0.5352563858032227, | |
| "eval_rewards/chosen": -0.05427387356758118, | |
| "eval_rewards/margins": 0.014749797061085701, | |
| "eval_rewards/rejected": -0.06902367621660233, | |
| "eval_runtime": 633.9901, | |
| "eval_samples_per_second": 31.442, | |
| "eval_steps_per_second": 0.492, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.6711760088641197e-07, | |
| "logits/chosen": -1.707196593284607, | |
| "logits/rejected": -1.7934414148330688, | |
| "logps/chosen": -6.166356086730957, | |
| "logps/rejected": -8.222631454467773, | |
| "loss": 0.6509, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.04317507520318031, | |
| "rewards/margins": 0.09725725650787354, | |
| "rewards/rejected": -0.14043232798576355, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.665024572113848e-07, | |
| "logits/chosen": -1.545772910118103, | |
| "logits/rejected": -1.6466634273529053, | |
| "logps/chosen": -6.318427085876465, | |
| "logps/rejected": -8.524378776550293, | |
| "loss": 0.6514, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0410161130130291, | |
| "rewards/margins": 0.10211832821369171, | |
| "rewards/rejected": -0.14313443005084991, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.6588276818174578e-07, | |
| "logits/chosen": -1.8286006450653076, | |
| "logits/rejected": -1.8894586563110352, | |
| "logps/chosen": -6.069952011108398, | |
| "logps/rejected": -9.206938743591309, | |
| "loss": 0.651, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.020980229601264, | |
| "rewards/margins": 0.11290951818227768, | |
| "rewards/rejected": -0.13388976454734802, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.6525857615241686e-07, | |
| "logits/chosen": -1.47043776512146, | |
| "logits/rejected": -1.5266094207763672, | |
| "logps/chosen": -6.51092529296875, | |
| "logps/rejected": -7.585784912109375, | |
| "loss": 0.6509, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.04623446241021156, | |
| "rewards/margins": 0.08200391381978989, | |
| "rewards/rejected": -0.12823837995529175, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.6462992378609406e-07, | |
| "logits/chosen": -1.3878867626190186, | |
| "logits/rejected": -1.427337884902954, | |
| "logps/chosen": -5.593390464782715, | |
| "logps/rejected": -8.398979187011719, | |
| "loss": 0.6468, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.024669837206602097, | |
| "rewards/margins": 0.16932611167430878, | |
| "rewards/rejected": -0.19399593770503998, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.6399685405033166e-07, | |
| "logits/chosen": -1.4157261848449707, | |
| "logits/rejected": -1.3857667446136475, | |
| "logps/chosen": -7.09283447265625, | |
| "logps/rejected": -7.571598052978516, | |
| "loss": 0.6464, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.06203197315335274, | |
| "rewards/margins": 0.08331136405467987, | |
| "rewards/rejected": -0.1453433334827423, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.6335941021460504e-07, | |
| "logits/chosen": -1.4545994997024536, | |
| "logits/rejected": -1.5172913074493408, | |
| "logps/chosen": -6.2775068283081055, | |
| "logps/rejected": -9.521524429321289, | |
| "loss": 0.6483, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.045348554849624634, | |
| "rewards/margins": 0.10597596317529678, | |
| "rewards/rejected": -0.15132452547550201, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.627176358473537e-07, | |
| "logits/chosen": -1.5027107000350952, | |
| "logits/rejected": -1.3006885051727295, | |
| "logps/chosen": -6.991432189941406, | |
| "logps/rejected": -8.119719505310059, | |
| "loss": 0.6497, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.07398322969675064, | |
| "rewards/margins": 0.03964931517839432, | |
| "rewards/rejected": -0.11363253742456436, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.6207157481300312e-07, | |
| "logits/chosen": -1.5265876054763794, | |
| "logits/rejected": -1.4646865129470825, | |
| "logps/chosen": -6.282595634460449, | |
| "logps/rejected": -7.717820167541504, | |
| "loss": 0.6474, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.07993953675031662, | |
| "rewards/margins": 0.09873492270708084, | |
| "rewards/rejected": -0.17867444455623627, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.614212712689668e-07, | |
| "logits/chosen": -1.3973186016082764, | |
| "logits/rejected": -1.3966352939605713, | |
| "logps/chosen": -6.049260139465332, | |
| "logps/rejected": -6.803788185119629, | |
| "loss": 0.6457, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.053327836096286774, | |
| "rewards/margins": 0.13262109458446503, | |
| "rewards/rejected": -0.1859489530324936, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.607667696626281e-07, | |
| "logits/chosen": -1.604760766029358, | |
| "logits/rejected": -1.6347767114639282, | |
| "logps/chosen": -5.876626014709473, | |
| "logps/rejected": -9.119338989257812, | |
| "loss": 0.6534, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.058147139847278595, | |
| "rewards/margins": 0.09601394087076187, | |
| "rewards/rejected": -0.15416109561920166, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.601081147283025e-07, | |
| "logits/chosen": -1.517514944076538, | |
| "logits/rejected": -1.4594268798828125, | |
| "logps/chosen": -5.623353004455566, | |
| "logps/rejected": -8.838793754577637, | |
| "loss": 0.6423, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.034189894795417786, | |
| "rewards/margins": 0.120149627327919, | |
| "rewards/rejected": -0.1543395221233368, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.594453514841798e-07, | |
| "logits/chosen": -1.4519226551055908, | |
| "logits/rejected": -1.4916832447052002, | |
| "logps/chosen": -6.024622917175293, | |
| "logps/rejected": -8.156673431396484, | |
| "loss": 0.6421, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.03883025795221329, | |
| "rewards/margins": 0.14476829767227173, | |
| "rewards/rejected": -0.18359854817390442, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.5877852522924732e-07, | |
| "logits/chosen": -1.4973927736282349, | |
| "logits/rejected": -1.5078349113464355, | |
| "logps/chosen": -5.639887809753418, | |
| "logps/rejected": -6.538842678070068, | |
| "loss": 0.6519, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.046415191143751144, | |
| "rewards/margins": 0.08505970239639282, | |
| "rewards/rejected": -0.13147488236427307, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.5810768154019382e-07, | |
| "logits/chosen": -1.6372696161270142, | |
| "logits/rejected": -1.604660987854004, | |
| "logps/chosen": -6.092626571655273, | |
| "logps/rejected": -7.028026580810547, | |
| "loss": 0.6493, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.07696966081857681, | |
| "rewards/margins": 0.10025089979171753, | |
| "rewards/rejected": -0.17722055315971375, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.5743286626829435e-07, | |
| "logits/chosen": -1.3090213537216187, | |
| "logits/rejected": -1.3185111284255981, | |
| "logps/chosen": -4.761218070983887, | |
| "logps/rejected": -6.58809757232666, | |
| "loss": 0.6395, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.03409787639975548, | |
| "rewards/margins": 0.1063770055770874, | |
| "rewards/rejected": -0.14047487080097198, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.5675412553627636e-07, | |
| "logits/chosen": -1.4627618789672852, | |
| "logits/rejected": -1.4510043859481812, | |
| "logps/chosen": -5.731337547302246, | |
| "logps/rejected": -10.051878929138184, | |
| "loss": 0.6498, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03276892006397247, | |
| "rewards/margins": 0.13961870968341827, | |
| "rewards/rejected": -0.17238759994506836, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.5607150573516727e-07, | |
| "logits/chosen": -1.450136423110962, | |
| "logits/rejected": -1.4004848003387451, | |
| "logps/chosen": -6.355518341064453, | |
| "logps/rejected": -7.548236846923828, | |
| "loss": 0.6462, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.07692820578813553, | |
| "rewards/margins": 0.07390854507684708, | |
| "rewards/rejected": -0.1508367359638214, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.5538505352112372e-07, | |
| "logits/chosen": -1.7112722396850586, | |
| "logits/rejected": -1.4571855068206787, | |
| "logps/chosen": -6.127640724182129, | |
| "logps/rejected": -7.857392311096191, | |
| "loss": 0.6451, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.07270301878452301, | |
| "rewards/margins": 0.13500170409679413, | |
| "rewards/rejected": -0.20770475268363953, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.546948158122427e-07, | |
| "logits/chosen": -1.7370418310165405, | |
| "logits/rejected": -1.7001125812530518, | |
| "logps/chosen": -5.915326118469238, | |
| "logps/rejected": -8.323230743408203, | |
| "loss": 0.6435, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.07308584451675415, | |
| "rewards/margins": 0.15741673111915588, | |
| "rewards/rejected": -0.23050260543823242, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_logits/chosen": -3.186901092529297, | |
| "eval_logits/rejected": -3.1663575172424316, | |
| "eval_logps/chosen": -6.852160930633545, | |
| "eval_logps/rejected": -7.856788158416748, | |
| "eval_loss": 0.6899130344390869, | |
| "eval_rewards/accuracies": 0.5376602411270142, | |
| "eval_rewards/chosen": -0.0711536630988121, | |
| "eval_rewards/margins": 0.019983632490038872, | |
| "eval_rewards/rejected": -0.09113729745149612, | |
| "eval_runtime": 640.2435, | |
| "eval_samples_per_second": 31.135, | |
| "eval_steps_per_second": 0.487, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.540008397853547e-07, | |
| "logits/chosen": -1.5285913944244385, | |
| "logits/rejected": -1.506296992301941, | |
| "logps/chosen": -4.79876184463501, | |
| "logps/rejected": -7.281890869140625, | |
| "loss": 0.6362, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.01859206147491932, | |
| "rewards/margins": 0.11602876335382462, | |
| "rewards/rejected": -0.1346208155155182, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.5330317287279937e-07, | |
| "logits/chosen": -1.489984393119812, | |
| "logits/rejected": -1.384037733078003, | |
| "logps/chosen": -6.871964931488037, | |
| "logps/rejected": -9.540796279907227, | |
| "loss": 0.6343, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.09499697387218475, | |
| "rewards/margins": 0.11305873095989227, | |
| "rewards/rejected": -0.20805568993091583, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.526018627591834e-07, | |
| "logits/chosen": -1.4063003063201904, | |
| "logits/rejected": -1.392664909362793, | |
| "logps/chosen": -7.688409328460693, | |
| "logps/rejected": -8.560149192810059, | |
| "loss": 0.6462, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.08843918889760971, | |
| "rewards/margins": 0.12001106888055801, | |
| "rewards/rejected": -0.20845024287700653, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.5189695737812152e-07, | |
| "logits/chosen": -1.4897139072418213, | |
| "logits/rejected": -1.437138319015503, | |
| "logps/chosen": -5.6488165855407715, | |
| "logps/rejected": -8.617653846740723, | |
| "loss": 0.637, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.08085988461971283, | |
| "rewards/margins": 0.1466083824634552, | |
| "rewards/rejected": -0.22746825218200684, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.511885049089601e-07, | |
| "logits/chosen": -1.4502151012420654, | |
| "logits/rejected": -1.558260440826416, | |
| "logps/chosen": -6.4499664306640625, | |
| "logps/rejected": -8.321115493774414, | |
| "loss": 0.6455, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.08257200568914413, | |
| "rewards/margins": 0.10597743839025497, | |
| "rewards/rejected": -0.1885494440793991, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.5047655377348439e-07, | |
| "logits/chosen": -1.3676706552505493, | |
| "logits/rejected": -1.3527071475982666, | |
| "logps/chosen": -6.454686641693115, | |
| "logps/rejected": -7.9275712966918945, | |
| "loss": 0.6392, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.07613205164670944, | |
| "rewards/margins": 0.11505208909511566, | |
| "rewards/rejected": -0.1911841332912445, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.4976115263260874e-07, | |
| "logits/chosen": -1.4929918050765991, | |
| "logits/rejected": -1.4645825624465942, | |
| "logps/chosen": -5.613523006439209, | |
| "logps/rejected": -8.017206192016602, | |
| "loss": 0.6462, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.043209511786699295, | |
| "rewards/margins": 0.09339593350887299, | |
| "rewards/rejected": -0.13660545647144318, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.4904235038305082e-07, | |
| "logits/chosen": -1.430387258529663, | |
| "logits/rejected": -1.5527921915054321, | |
| "logps/chosen": -7.354534149169922, | |
| "logps/rejected": -8.235260963439941, | |
| "loss": 0.6458, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.07374344021081924, | |
| "rewards/margins": 0.14879578351974487, | |
| "rewards/rejected": -0.22253921627998352, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.483201961539896e-07, | |
| "logits/chosen": -1.4932714700698853, | |
| "logits/rejected": -1.4173743724822998, | |
| "logps/chosen": -5.170498847961426, | |
| "logps/rejected": -7.974796295166016, | |
| "loss": 0.6307, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.04303674027323723, | |
| "rewards/margins": 0.15367302298545837, | |
| "rewards/rejected": -0.1967097669839859, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.4759473930370737e-07, | |
| "logits/chosen": -1.7062429189682007, | |
| "logits/rejected": -1.6499900817871094, | |
| "logps/chosen": -6.257295608520508, | |
| "logps/rejected": -7.387567043304443, | |
| "loss": 0.6434, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.10747422277927399, | |
| "rewards/margins": 0.08016838133335114, | |
| "rewards/rejected": -0.18764260411262512, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.4686602941621615e-07, | |
| "logits/chosen": -1.244971752166748, | |
| "logits/rejected": -1.2606076002120972, | |
| "logps/chosen": -6.677346706390381, | |
| "logps/rejected": -9.851409912109375, | |
| "loss": 0.6353, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.08402319997549057, | |
| "rewards/margins": 0.10642798244953156, | |
| "rewards/rejected": -0.19045117497444153, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.4613411629786877e-07, | |
| "logits/chosen": -1.4713022708892822, | |
| "logits/rejected": -1.5048084259033203, | |
| "logps/chosen": -6.147872447967529, | |
| "logps/rejected": -7.910445213317871, | |
| "loss": 0.6417, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.07068430632352829, | |
| "rewards/margins": 0.16299991309642792, | |
| "rewards/rejected": -0.2336842119693756, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.4539904997395468e-07, | |
| "logits/chosen": -1.3301279544830322, | |
| "logits/rejected": -1.3574144840240479, | |
| "logps/chosen": -6.699339389801025, | |
| "logps/rejected": -9.695663452148438, | |
| "loss": 0.6279, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.06842740625143051, | |
| "rewards/margins": 0.13677598536014557, | |
| "rewards/rejected": -0.20520341396331787, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.4466088068528067e-07, | |
| "logits/chosen": -1.51852548122406, | |
| "logits/rejected": -1.4426764249801636, | |
| "logps/chosen": -5.722136497497559, | |
| "logps/rejected": -8.637394905090332, | |
| "loss": 0.6389, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0995694026350975, | |
| "rewards/margins": 0.11200227588415146, | |
| "rewards/rejected": -0.21157169342041016, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.4391965888473702e-07, | |
| "logits/chosen": -1.5839837789535522, | |
| "logits/rejected": -1.5870234966278076, | |
| "logps/chosen": -6.29650354385376, | |
| "logps/rejected": -9.936391830444336, | |
| "loss": 0.63, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.08398771286010742, | |
| "rewards/margins": 0.11872847378253937, | |
| "rewards/rejected": -0.20271620154380798, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.4317543523384928e-07, | |
| "logits/chosen": -1.694641351699829, | |
| "logits/rejected": -1.6382360458374023, | |
| "logps/chosen": -6.944577217102051, | |
| "logps/rejected": -8.599451065063477, | |
| "loss": 0.6283, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.08465109765529633, | |
| "rewards/margins": 0.16851410269737244, | |
| "rewards/rejected": -0.25316524505615234, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.4242826059931536e-07, | |
| "logits/chosen": -1.4301807880401611, | |
| "logits/rejected": -1.3396793603897095, | |
| "logps/chosen": -5.984529972076416, | |
| "logps/rejected": -8.90467643737793, | |
| "loss": 0.6422, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.11020038276910782, | |
| "rewards/margins": 0.13162291049957275, | |
| "rewards/rejected": -0.24182331562042236, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.4167818604952903e-07, | |
| "logits/chosen": -1.8726218938827515, | |
| "logits/rejected": -1.8315349817276, | |
| "logps/chosen": -6.4224066734313965, | |
| "logps/rejected": -9.21501350402832, | |
| "loss": 0.6316, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.08100078999996185, | |
| "rewards/margins": 0.16276691854000092, | |
| "rewards/rejected": -0.24376770853996277, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.4092526285108939e-07, | |
| "logits/chosen": -1.6488440036773682, | |
| "logits/rejected": -1.6959072351455688, | |
| "logps/chosen": -5.05312442779541, | |
| "logps/rejected": -8.503290176391602, | |
| "loss": 0.6354, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.025250781327486038, | |
| "rewards/margins": 0.17517754435539246, | |
| "rewards/rejected": -0.2004283368587494, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.4016954246529695e-07, | |
| "logits/chosen": -1.5666801929473877, | |
| "logits/rejected": -1.6438080072402954, | |
| "logps/chosen": -6.046337127685547, | |
| "logps/rejected": -8.245607376098633, | |
| "loss": 0.6332, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.08281711488962173, | |
| "rewards/margins": 0.1388954371213913, | |
| "rewards/rejected": -0.22171254456043243, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_logits/chosen": -3.1937944889068604, | |
| "eval_logits/rejected": -3.173306941986084, | |
| "eval_logps/chosen": -7.060844421386719, | |
| "eval_logps/rejected": -8.098358154296875, | |
| "eval_loss": 0.6894627213478088, | |
| "eval_rewards/accuracies": 0.5336538553237915, | |
| "eval_rewards/chosen": -0.09202194213867188, | |
| "eval_rewards/margins": 0.023272372782230377, | |
| "eval_rewards/rejected": -0.11529432237148285, | |
| "eval_runtime": 642.2741, | |
| "eval_samples_per_second": 31.037, | |
| "eval_steps_per_second": 0.486, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.3941107654463616e-07, | |
| "logits/chosen": -1.6347227096557617, | |
| "logits/rejected": -1.5565736293792725, | |
| "logps/chosen": -6.210860252380371, | |
| "logps/rejected": -8.191205978393555, | |
| "loss": 0.633, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.0968286544084549, | |
| "rewards/margins": 0.13983137905597687, | |
| "rewards/rejected": -0.23666004836559296, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.3864991692924522e-07, | |
| "logits/chosen": -1.6105167865753174, | |
| "logits/rejected": -1.5960274934768677, | |
| "logps/chosen": -6.203638553619385, | |
| "logps/rejected": -9.020109176635742, | |
| "loss": 0.6296, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.09015347063541412, | |
| "rewards/margins": 0.13406077027320862, | |
| "rewards/rejected": -0.22421424090862274, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.3788611564337276e-07, | |
| "logits/chosen": -1.5526584386825562, | |
| "logits/rejected": -1.5093594789505005, | |
| "logps/chosen": -6.796191215515137, | |
| "logps/rejected": -8.405149459838867, | |
| "loss": 0.6223, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.08056505024433136, | |
| "rewards/margins": 0.13686737418174744, | |
| "rewards/rejected": -0.21743245422840118, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3711972489182207e-07, | |
| "logits/chosen": -1.311798334121704, | |
| "logits/rejected": -1.2505333423614502, | |
| "logps/chosen": -6.062074184417725, | |
| "logps/rejected": -8.897860527038574, | |
| "loss": 0.6306, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.030773481354117393, | |
| "rewards/margins": 0.19692835211753845, | |
| "rewards/rejected": -0.22770185768604279, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3635079705638297e-07, | |
| "logits/chosen": -1.5671780109405518, | |
| "logits/rejected": -1.5879342555999756, | |
| "logps/chosen": -6.3402533531188965, | |
| "logps/rejected": -9.183328628540039, | |
| "loss": 0.6338, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.06746585667133331, | |
| "rewards/margins": 0.13011819124221802, | |
| "rewards/rejected": -0.19758403301239014, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3557938469225164e-07, | |
| "logits/chosen": -1.423302412033081, | |
| "logits/rejected": -1.4359217882156372, | |
| "logps/chosen": -6.0072126388549805, | |
| "logps/rejected": -9.093564987182617, | |
| "loss": 0.6206, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.09784204512834549, | |
| "rewards/margins": 0.13212862610816956, | |
| "rewards/rejected": -0.22997066378593445, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3480554052443843e-07, | |
| "logits/chosen": -1.7213542461395264, | |
| "logits/rejected": -1.6981548070907593, | |
| "logps/chosen": -7.577986240386963, | |
| "logps/rejected": -10.498396873474121, | |
| "loss": 0.6315, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.10542915016412735, | |
| "rewards/margins": 0.1352270096540451, | |
| "rewards/rejected": -0.24065613746643066, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.340293174441643e-07, | |
| "logits/chosen": -1.6847597360610962, | |
| "logits/rejected": -1.6527271270751953, | |
| "logps/chosen": -8.06293773651123, | |
| "logps/rejected": -9.775976181030273, | |
| "loss": 0.6238, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.11751914024353027, | |
| "rewards/margins": 0.1262793093919754, | |
| "rewards/rejected": -0.24379844963550568, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.332507685052457e-07, | |
| "logits/chosen": -1.3307305574417114, | |
| "logits/rejected": -1.296414852142334, | |
| "logps/chosen": -5.506723880767822, | |
| "logps/rejected": -9.845108032226562, | |
| "loss": 0.6284, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.05931394174695015, | |
| "rewards/margins": 0.17723853886127472, | |
| "rewards/rejected": -0.23655244708061218, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3246994692046836e-07, | |
| "logits/chosen": -1.4784770011901855, | |
| "logits/rejected": -1.4177029132843018, | |
| "logps/chosen": -6.148890495300293, | |
| "logps/rejected": -9.090556144714355, | |
| "loss": 0.6283, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.07422871887683868, | |
| "rewards/margins": 0.16514147818088531, | |
| "rewards/rejected": -0.239370197057724, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3168690605795043e-07, | |
| "logits/chosen": -1.4130356311798096, | |
| "logits/rejected": -1.453981637954712, | |
| "logps/chosen": -5.568487167358398, | |
| "logps/rejected": -9.608223915100098, | |
| "loss": 0.6299, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.087576724588871, | |
| "rewards/margins": 0.2175295650959015, | |
| "rewards/rejected": -0.3051062524318695, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.3090169943749475e-07, | |
| "logits/chosen": -1.4787486791610718, | |
| "logits/rejected": -1.437145471572876, | |
| "logps/chosen": -5.853695869445801, | |
| "logps/rejected": -8.345501899719238, | |
| "loss": 0.6359, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.05679728835821152, | |
| "rewards/margins": 0.1354844868183136, | |
| "rewards/rejected": -0.19228176772594452, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.3011438072693074e-07, | |
| "logits/chosen": -1.5212247371673584, | |
| "logits/rejected": -1.4433953762054443, | |
| "logps/chosen": -6.458197593688965, | |
| "logps/rejected": -9.791971206665039, | |
| "loss": 0.6192, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.07254395633935928, | |
| "rewards/margins": 0.23906107246875763, | |
| "rewards/rejected": -0.3116050362586975, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.2932500373844649e-07, | |
| "logits/chosen": -1.6338106393814087, | |
| "logits/rejected": -1.4234752655029297, | |
| "logps/chosen": -5.266790866851807, | |
| "logps/rejected": -8.406009674072266, | |
| "loss": 0.6266, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.09523673355579376, | |
| "rewards/margins": 0.15846498310565948, | |
| "rewards/rejected": -0.25370171666145325, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.2853362242491051e-07, | |
| "logits/chosen": -1.4831264019012451, | |
| "logits/rejected": -1.5002310276031494, | |
| "logps/chosen": -5.224946022033691, | |
| "logps/rejected": -7.2889556884765625, | |
| "loss": 0.6128, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.06067908555269241, | |
| "rewards/margins": 0.17062053084373474, | |
| "rewards/rejected": -0.23129959404468536, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.2774029087618446e-07, | |
| "logits/chosen": -1.758953332901001, | |
| "logits/rejected": -1.6146351099014282, | |
| "logps/chosen": -7.610304355621338, | |
| "logps/rejected": -10.307147026062012, | |
| "loss": 0.619, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.07071717083454132, | |
| "rewards/margins": 0.20475205779075623, | |
| "rewards/rejected": -0.27546921372413635, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.2694506331542577e-07, | |
| "logits/chosen": -1.591770052909851, | |
| "logits/rejected": -1.4535247087478638, | |
| "logps/chosen": -5.625763893127441, | |
| "logps/rejected": -9.85864543914795, | |
| "loss": 0.6345, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.0788426622748375, | |
| "rewards/margins": 0.15841318666934967, | |
| "rewards/rejected": -0.23725584149360657, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.2614799409538198e-07, | |
| "logits/chosen": -1.4659042358398438, | |
| "logits/rejected": -1.4563062191009521, | |
| "logps/chosen": -5.280413627624512, | |
| "logps/rejected": -9.323397636413574, | |
| "loss": 0.6226, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.09044213593006134, | |
| "rewards/margins": 0.1876935213804245, | |
| "rewards/rejected": -0.2781356871128082, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.253491376946754e-07, | |
| "logits/chosen": -1.3718680143356323, | |
| "logits/rejected": -1.3589755296707153, | |
| "logps/chosen": -6.7582688331604, | |
| "logps/rejected": -10.708890914916992, | |
| "loss": 0.6275, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.06859103590250015, | |
| "rewards/margins": 0.16634361445903778, | |
| "rewards/rejected": -0.23493464291095734, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.2454854871407992e-07, | |
| "logits/chosen": -1.3176133632659912, | |
| "logits/rejected": -1.3136779069900513, | |
| "logps/chosen": -6.8164777755737305, | |
| "logps/rejected": -9.460234642028809, | |
| "loss": 0.6217, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.1115039810538292, | |
| "rewards/margins": 0.17541712522506714, | |
| "rewards/rejected": -0.28692108392715454, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_logits/chosen": -3.18924617767334, | |
| "eval_logits/rejected": -3.1685447692871094, | |
| "eval_logps/chosen": -7.315045356750488, | |
| "eval_logps/rejected": -8.408605575561523, | |
| "eval_loss": 0.6886266469955444, | |
| "eval_rewards/accuracies": 0.5352563858032227, | |
| "eval_rewards/chosen": -0.11744209378957748, | |
| "eval_rewards/margins": 0.0288768969476223, | |
| "eval_rewards/rejected": -0.14631898701190948, | |
| "eval_runtime": 646.5131, | |
| "eval_samples_per_second": 30.833, | |
| "eval_steps_per_second": 0.483, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.2374628187278885e-07, | |
| "logits/chosen": -1.6750404834747314, | |
| "logits/rejected": -1.6324043273925781, | |
| "logps/chosen": -6.7752885818481445, | |
| "logps/rejected": -9.884492874145508, | |
| "loss": 0.6205, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.08316051214933395, | |
| "rewards/margins": 0.19135987758636475, | |
| "rewards/rejected": -0.2745203971862793, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.2294239200467515e-07, | |
| "logits/chosen": -1.4121886491775513, | |
| "logits/rejected": -1.4634597301483154, | |
| "logps/chosen": -8.272059440612793, | |
| "logps/rejected": -10.233227729797363, | |
| "loss": 0.6219, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.1476745754480362, | |
| "rewards/margins": 0.1988331377506256, | |
| "rewards/rejected": -0.346507728099823, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.2213693405454345e-07, | |
| "logits/chosen": -1.6356998682022095, | |
| "logits/rejected": -1.6384683847427368, | |
| "logps/chosen": -5.971181392669678, | |
| "logps/rejected": -8.54847240447998, | |
| "loss": 0.6212, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.13469815254211426, | |
| "rewards/margins": 0.1848105490207672, | |
| "rewards/rejected": -0.31950870156288147, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.213299630743747e-07, | |
| "logits/chosen": -1.4081295728683472, | |
| "logits/rejected": -1.333418846130371, | |
| "logps/chosen": -7.040464878082275, | |
| "logps/rejected": -8.731954574584961, | |
| "loss": 0.6179, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.12728752195835114, | |
| "rewards/margins": 0.17130446434020996, | |
| "rewards/rejected": -0.2985920011997223, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.205215342195634e-07, | |
| "logits/chosen": -1.5196300745010376, | |
| "logits/rejected": -1.5172007083892822, | |
| "logps/chosen": -5.7593536376953125, | |
| "logps/rejected": -9.683046340942383, | |
| "loss": 0.6175, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.10798660665750504, | |
| "rewards/margins": 0.25386783480644226, | |
| "rewards/rejected": -0.3618544340133667, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.1971170274514802e-07, | |
| "logits/chosen": -1.6615116596221924, | |
| "logits/rejected": -1.6914132833480835, | |
| "logps/chosen": -7.840911865234375, | |
| "logps/rejected": -9.82246208190918, | |
| "loss": 0.6232, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.14061352610588074, | |
| "rewards/margins": 0.14244748651981354, | |
| "rewards/rejected": -0.2830609679222107, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.1890052400203402e-07, | |
| "logits/chosen": -1.834905982017517, | |
| "logits/rejected": -1.7889604568481445, | |
| "logps/chosen": -7.288022041320801, | |
| "logps/rejected": -10.376224517822266, | |
| "loss": 0.6095, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.1694309115409851, | |
| "rewards/margins": 0.1324268877506256, | |
| "rewards/rejected": -0.3018577992916107, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.18088053433211e-07, | |
| "logits/chosen": -1.5051194429397583, | |
| "logits/rejected": -1.5041710138320923, | |
| "logps/chosen": -6.950632572174072, | |
| "logps/rejected": -9.823295593261719, | |
| "loss": 0.6208, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.11773916333913803, | |
| "rewards/margins": 0.17614324390888214, | |
| "rewards/rejected": -0.29388242959976196, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.1727434656996305e-07, | |
| "logits/chosen": -1.6864219903945923, | |
| "logits/rejected": -1.7134393453598022, | |
| "logps/chosen": -8.343110084533691, | |
| "logps/rejected": -10.6605863571167, | |
| "loss": 0.6269, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.1767118275165558, | |
| "rewards/margins": 0.15801793336868286, | |
| "rewards/rejected": -0.33472976088523865, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.1645945902807339e-07, | |
| "logits/chosen": -1.521843671798706, | |
| "logits/rejected": -1.4162412881851196, | |
| "logps/chosen": -5.303705215454102, | |
| "logps/rejected": -8.564371109008789, | |
| "loss": 0.6195, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.08533243834972382, | |
| "rewards/margins": 0.2330024540424347, | |
| "rewards/rejected": -0.3183349072933197, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.1564344650402309e-07, | |
| "logits/chosen": -1.4854631423950195, | |
| "logits/rejected": -1.5085594654083252, | |
| "logps/chosen": -6.938723087310791, | |
| "logps/rejected": -9.417280197143555, | |
| "loss": 0.6228, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.11080266535282135, | |
| "rewards/margins": 0.18629567325115204, | |
| "rewards/rejected": -0.2970983386039734, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.1482636477118419e-07, | |
| "logits/chosen": -1.5579372644424438, | |
| "logits/rejected": -1.5621235370635986, | |
| "logps/chosen": -6.055428981781006, | |
| "logps/rejected": -8.274986267089844, | |
| "loss": 0.617, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.08702854812145233, | |
| "rewards/margins": 0.19237728416919708, | |
| "rewards/rejected": -0.2794058322906494, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.1400826967600779e-07, | |
| "logits/chosen": -1.6652123928070068, | |
| "logits/rejected": -1.5670098066329956, | |
| "logps/chosen": -6.44259786605835, | |
| "logps/rejected": -9.023821830749512, | |
| "loss": 0.6279, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.12496472895145416, | |
| "rewards/margins": 0.1399177610874176, | |
| "rewards/rejected": -0.26488250494003296, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.131892171342069e-07, | |
| "logits/chosen": -1.5863568782806396, | |
| "logits/rejected": -1.4539623260498047, | |
| "logps/chosen": -8.477279663085938, | |
| "logps/rejected": -11.437823295593262, | |
| "loss": 0.6105, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.12180640548467636, | |
| "rewards/margins": 0.20829221606254578, | |
| "rewards/rejected": -0.3300986588001251, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.1236926312693478e-07, | |
| "logits/chosen": -1.4525238275527954, | |
| "logits/rejected": -1.3738415241241455, | |
| "logps/chosen": -6.627744197845459, | |
| "logps/rejected": -10.264178276062012, | |
| "loss": 0.6171, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.12857326865196228, | |
| "rewards/margins": 0.23741576075553894, | |
| "rewards/rejected": -0.365989089012146, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.1154846369695863e-07, | |
| "logits/chosen": -1.2888273000717163, | |
| "logits/rejected": -1.2684919834136963, | |
| "logps/chosen": -7.151463985443115, | |
| "logps/rejected": -10.000380516052246, | |
| "loss": 0.6203, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.09203344583511353, | |
| "rewards/margins": 0.21837130188941956, | |
| "rewards/rejected": -0.3104047179222107, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.1072687494482918e-07, | |
| "logits/chosen": -1.3414087295532227, | |
| "logits/rejected": -1.3495652675628662, | |
| "logps/chosen": -7.086977481842041, | |
| "logps/rejected": -10.637145042419434, | |
| "loss": 0.6076, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.11822090297937393, | |
| "rewards/margins": 0.2522438168525696, | |
| "rewards/rejected": -0.3704647123813629, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.0990455302504628e-07, | |
| "logits/chosen": -1.2805410623550415, | |
| "logits/rejected": -1.3479764461517334, | |
| "logps/chosen": -6.615203857421875, | |
| "logps/rejected": -9.906420707702637, | |
| "loss": 0.6078, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.1480402648448944, | |
| "rewards/margins": 0.21094676852226257, | |
| "rewards/rejected": -0.358987033367157, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.0908155414222082e-07, | |
| "logits/chosen": -1.3715662956237793, | |
| "logits/rejected": -1.4605109691619873, | |
| "logps/chosen": -6.958237648010254, | |
| "logps/rejected": -8.883742332458496, | |
| "loss": 0.6167, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.13871873915195465, | |
| "rewards/margins": 0.16220495104789734, | |
| "rewards/rejected": -0.3009237051010132, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.0825793454723325e-07, | |
| "logits/chosen": -1.5984854698181152, | |
| "logits/rejected": -1.618065595626831, | |
| "logps/chosen": -7.258332252502441, | |
| "logps/rejected": -9.4107666015625, | |
| "loss": 0.6015, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.0792965292930603, | |
| "rewards/margins": 0.28216832876205444, | |
| "rewards/rejected": -0.36146482825279236, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_logits/chosen": -3.169694662094116, | |
| "eval_logits/rejected": -3.1489059925079346, | |
| "eval_logps/chosen": -7.470149040222168, | |
| "eval_logps/rejected": -8.596240043640137, | |
| "eval_loss": 0.6881732940673828, | |
| "eval_rewards/accuracies": 0.5396634340286255, | |
| "eval_rewards/chosen": -0.13295257091522217, | |
| "eval_rewards/margins": 0.032129984349012375, | |
| "eval_rewards/rejected": -0.16508255898952484, | |
| "eval_runtime": 650.2113, | |
| "eval_samples_per_second": 30.658, | |
| "eval_steps_per_second": 0.48, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |