| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.6155917425310937, | |
| "eval_steps": 20, | |
| "global_step": 700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004615976407231696, | |
| "grad_norm": 92.4314081607968, | |
| "learning_rate": 1.1494252873563218e-08, | |
| "logits/chosen": -1.3403388261795044, | |
| "logits/rejected": -1.3443610668182373, | |
| "logps/chosen": -48.98606872558594, | |
| "logps/rejected": -52.890384674072266, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.009231952814463392, | |
| "grad_norm": 114.27394093227154, | |
| "learning_rate": 2.2988505747126436e-08, | |
| "logits/chosen": -1.3453574180603027, | |
| "logits/rejected": -1.3622318506240845, | |
| "logps/chosen": -39.51582336425781, | |
| "logps/rejected": -55.267478942871094, | |
| "loss": 0.6984, | |
| "rewards/accuracies": 0.4444444477558136, | |
| "rewards/chosen": -0.0065580871887505054, | |
| "rewards/margins": -0.004221578594297171, | |
| "rewards/rejected": -0.002336508594453335, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01384792922169509, | |
| "grad_norm": 150.47548740563838, | |
| "learning_rate": 3.448275862068965e-08, | |
| "logits/chosen": -1.3104500770568848, | |
| "logits/rejected": -1.3256760835647583, | |
| "logps/chosen": -46.711997985839844, | |
| "logps/rejected": -61.08738327026367, | |
| "loss": 0.7077, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": -0.015531142242252827, | |
| "rewards/margins": -0.022088024765253067, | |
| "rewards/rejected": 0.006556881591677666, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.018463905628926785, | |
| "grad_norm": 127.64708744370179, | |
| "learning_rate": 4.597701149425287e-08, | |
| "logits/chosen": -1.3497395515441895, | |
| "logits/rejected": -1.3723570108413696, | |
| "logps/chosen": -50.4114875793457, | |
| "logps/rejected": -67.92998504638672, | |
| "loss": 0.7139, | |
| "rewards/accuracies": 0.4027777910232544, | |
| "rewards/chosen": 0.04585569351911545, | |
| "rewards/margins": -0.033931903541088104, | |
| "rewards/rejected": 0.07978759706020355, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.023079882036158482, | |
| "grad_norm": 104.28603257129862, | |
| "learning_rate": 5.747126436781609e-08, | |
| "logits/chosen": -1.3193544149398804, | |
| "logits/rejected": -1.3253015279769897, | |
| "logps/chosen": -48.21293258666992, | |
| "logps/rejected": -55.63939666748047, | |
| "loss": 0.6759, | |
| "rewards/accuracies": 0.6527777910232544, | |
| "rewards/chosen": 0.09531965851783752, | |
| "rewards/margins": 0.03943055123090744, | |
| "rewards/rejected": 0.055889103561639786, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02769585844339018, | |
| "grad_norm": 122.92866276312793, | |
| "learning_rate": 6.89655172413793e-08, | |
| "logits/chosen": -1.356438159942627, | |
| "logits/rejected": -1.3778866529464722, | |
| "logps/chosen": -47.38197326660156, | |
| "logps/rejected": -62.85205841064453, | |
| "loss": 0.7012, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.021549424156546593, | |
| "rewards/margins": -0.00793980248272419, | |
| "rewards/rejected": 0.029489226639270782, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.032311834850621876, | |
| "grad_norm": 141.4940238366865, | |
| "learning_rate": 8.045977011494252e-08, | |
| "logits/chosen": -1.2811020612716675, | |
| "logits/rejected": -1.3018286228179932, | |
| "logps/chosen": -53.21059799194336, | |
| "logps/rejected": -68.97090148925781, | |
| "loss": 0.6877, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": -0.0111711286008358, | |
| "rewards/margins": 0.018631484359502792, | |
| "rewards/rejected": -0.029802612960338593, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03692781125785357, | |
| "grad_norm": 108.13024807755075, | |
| "learning_rate": 9.195402298850574e-08, | |
| "logits/chosen": -1.3367334604263306, | |
| "logits/rejected": -1.3522446155548096, | |
| "logps/chosen": -40.02373504638672, | |
| "logps/rejected": -54.60912322998047, | |
| "loss": 0.702, | |
| "rewards/accuracies": 0.4166666567325592, | |
| "rewards/chosen": 0.010513358749449253, | |
| "rewards/margins": -0.013667477294802666, | |
| "rewards/rejected": 0.024180836975574493, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04154378766508527, | |
| "grad_norm": 90.94991906906556, | |
| "learning_rate": 1.0344827586206897e-07, | |
| "logits/chosen": -1.2363929748535156, | |
| "logits/rejected": -1.2396348714828491, | |
| "logps/chosen": -56.863731384277344, | |
| "logps/rejected": -53.349342346191406, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": 0.07457832247018814, | |
| "rewards/margins": 0.004799458663910627, | |
| "rewards/rejected": 0.06977886706590652, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.046159764072316964, | |
| "grad_norm": 166.09887872402663, | |
| "learning_rate": 1.1494252873563217e-07, | |
| "logits/chosen": -1.2872830629348755, | |
| "logits/rejected": -1.317036747932434, | |
| "logps/chosen": -49.23244857788086, | |
| "logps/rejected": -71.62715911865234, | |
| "loss": 0.7087, | |
| "rewards/accuracies": 0.5694444179534912, | |
| "rewards/chosen": 0.02608451619744301, | |
| "rewards/margins": -0.02054634317755699, | |
| "rewards/rejected": 0.046630859375, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046159764072316964, | |
| "eval_logits/chosen": -1.262330412864685, | |
| "eval_logits/rejected": -1.273974895477295, | |
| "eval_logps/chosen": -48.664798736572266, | |
| "eval_logps/rejected": -56.1088752746582, | |
| "eval_loss": 0.6998714804649353, | |
| "eval_rewards/accuracies": 0.4228110611438751, | |
| "eval_rewards/chosen": -0.01568525843322277, | |
| "eval_rewards/margins": -0.007799813989549875, | |
| "eval_rewards/rejected": -0.007885444909334183, | |
| "eval_runtime": 231.8501, | |
| "eval_samples_per_second": 7.479, | |
| "eval_steps_per_second": 1.872, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05077574047954866, | |
| "grad_norm": 141.4312370714434, | |
| "learning_rate": 1.2643678160919542e-07, | |
| "logits/chosen": -1.328560709953308, | |
| "logits/rejected": -1.3631365299224854, | |
| "logps/chosen": -50.79507827758789, | |
| "logps/rejected": -79.58642578125, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.5277777910232544, | |
| "rewards/chosen": 0.04927082732319832, | |
| "rewards/margins": 0.02934443950653076, | |
| "rewards/rejected": 0.019926389679312706, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05539171688678036, | |
| "grad_norm": 106.70546600458962, | |
| "learning_rate": 1.379310344827586e-07, | |
| "logits/chosen": -1.335903286933899, | |
| "logits/rejected": -1.3434231281280518, | |
| "logps/chosen": -59.29114532470703, | |
| "logps/rejected": -62.07218933105469, | |
| "loss": 0.6961, | |
| "rewards/accuracies": 0.5138888955116272, | |
| "rewards/chosen": 0.00791182741522789, | |
| "rewards/margins": -0.0008803076343610883, | |
| "rewards/rejected": 0.008792135864496231, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06000769329401205, | |
| "grad_norm": 84.5108710571048, | |
| "learning_rate": 1.4942528735632184e-07, | |
| "logits/chosen": -1.315495491027832, | |
| "logits/rejected": -1.314201831817627, | |
| "logps/chosen": -52.26453399658203, | |
| "logps/rejected": -46.654151916503906, | |
| "loss": 0.6879, | |
| "rewards/accuracies": 0.5972222089767456, | |
| "rewards/chosen": 0.10577751696109772, | |
| "rewards/margins": 0.01635124906897545, | |
| "rewards/rejected": 0.08942626416683197, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06462366970124375, | |
| "grad_norm": 142.3451166395631, | |
| "learning_rate": 1.6091954022988505e-07, | |
| "logits/chosen": -1.3120254278182983, | |
| "logits/rejected": -1.3418428897857666, | |
| "logps/chosen": -54.30976486206055, | |
| "logps/rejected": -76.96250915527344, | |
| "loss": 0.6796, | |
| "rewards/accuracies": 0.4722222089767456, | |
| "rewards/chosen": 0.14193940162658691, | |
| "rewards/margins": 0.03795723244547844, | |
| "rewards/rejected": 0.10398217290639877, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06923964610847544, | |
| "grad_norm": 85.45105537711353, | |
| "learning_rate": 1.7241379310344828e-07, | |
| "logits/chosen": -1.339949131011963, | |
| "logits/rejected": -1.35366952419281, | |
| "logps/chosen": -47.45890808105469, | |
| "logps/rejected": -56.32393264770508, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.12981660664081573, | |
| "rewards/margins": 0.022233910858631134, | |
| "rewards/rejected": 0.1075827032327652, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07385562251570714, | |
| "grad_norm": 87.30098240848938, | |
| "learning_rate": 1.839080459770115e-07, | |
| "logits/chosen": -1.3168249130249023, | |
| "logits/rejected": -1.3266100883483887, | |
| "logps/chosen": -48.27603530883789, | |
| "logps/rejected": -54.10696792602539, | |
| "loss": 0.6693, | |
| "rewards/accuracies": 0.5833333134651184, | |
| "rewards/chosen": 0.2039007544517517, | |
| "rewards/margins": 0.05477040261030197, | |
| "rewards/rejected": 0.14913035929203033, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07847159892293884, | |
| "grad_norm": 81.90822350206311, | |
| "learning_rate": 1.9540229885057472e-07, | |
| "logits/chosen": -1.3052334785461426, | |
| "logits/rejected": -1.3256360292434692, | |
| "logps/chosen": -44.5953483581543, | |
| "logps/rejected": -61.29960250854492, | |
| "loss": 0.674, | |
| "rewards/accuracies": 0.5555555820465088, | |
| "rewards/chosen": 0.29883071780204773, | |
| "rewards/margins": 0.04845905303955078, | |
| "rewards/rejected": 0.25037166476249695, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08308757533017054, | |
| "grad_norm": 97.62737031971352, | |
| "learning_rate": 2.0689655172413793e-07, | |
| "logits/chosen": -1.3107632398605347, | |
| "logits/rejected": -1.3140422105789185, | |
| "logps/chosen": -51.13896179199219, | |
| "logps/rejected": -48.718727111816406, | |
| "loss": 0.6724, | |
| "rewards/accuracies": 0.5972222089767456, | |
| "rewards/chosen": 0.2992264926433563, | |
| "rewards/margins": 0.05020047724246979, | |
| "rewards/rejected": 0.24902603030204773, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08770355173740223, | |
| "grad_norm": 91.70503719282425, | |
| "learning_rate": 2.1839080459770114e-07, | |
| "logits/chosen": -1.258926510810852, | |
| "logits/rejected": -1.2699991464614868, | |
| "logps/chosen": -50.59396743774414, | |
| "logps/rejected": -56.2684326171875, | |
| "loss": 0.6819, | |
| "rewards/accuracies": 0.5138888955116272, | |
| "rewards/chosen": 0.33299052715301514, | |
| "rewards/margins": 0.038206882774829865, | |
| "rewards/rejected": 0.29478365182876587, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09231952814463393, | |
| "grad_norm": 92.44581774007628, | |
| "learning_rate": 2.2988505747126435e-07, | |
| "logits/chosen": -1.3053722381591797, | |
| "logits/rejected": -1.316298007965088, | |
| "logps/chosen": -52.49648666381836, | |
| "logps/rejected": -56.09816360473633, | |
| "loss": 0.6665, | |
| "rewards/accuracies": 0.5555555820465088, | |
| "rewards/chosen": 0.38449087738990784, | |
| "rewards/margins": 0.07676863670349121, | |
| "rewards/rejected": 0.30772221088409424, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09231952814463393, | |
| "eval_logits/chosen": -1.2573766708374023, | |
| "eval_logits/rejected": -1.269149899482727, | |
| "eval_logps/chosen": -47.605804443359375, | |
| "eval_logps/rejected": -55.2476806640625, | |
| "eval_loss": 0.6606337428092957, | |
| "eval_rewards/accuracies": 0.5725806355476379, | |
| "eval_rewards/chosen": 0.5138096213340759, | |
| "eval_rewards/margins": 0.09109989553689957, | |
| "eval_rewards/rejected": 0.42270979285240173, | |
| "eval_runtime": 227.103, | |
| "eval_samples_per_second": 7.635, | |
| "eval_steps_per_second": 1.911, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09693550455186563, | |
| "grad_norm": 83.51334071319128, | |
| "learning_rate": 2.413793103448276e-07, | |
| "logits/chosen": -1.3424174785614014, | |
| "logits/rejected": -1.3526452779769897, | |
| "logps/chosen": -44.44143295288086, | |
| "logps/rejected": -49.79873275756836, | |
| "loss": 0.6451, | |
| "rewards/accuracies": 0.6527777910232544, | |
| "rewards/chosen": 0.5364899039268494, | |
| "rewards/margins": 0.11971965432167053, | |
| "rewards/rejected": 0.4167703092098236, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10155148095909731, | |
| "grad_norm": 97.54181471785779, | |
| "learning_rate": 2.5287356321839084e-07, | |
| "logits/chosen": -1.307891845703125, | |
| "logits/rejected": -1.3404256105422974, | |
| "logps/chosen": -50.57395935058594, | |
| "logps/rejected": -79.52447509765625, | |
| "loss": 0.6618, | |
| "rewards/accuracies": 0.5138888955116272, | |
| "rewards/chosen": 0.5593165159225464, | |
| "rewards/margins": 0.12356305122375488, | |
| "rewards/rejected": 0.4357534646987915, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10616745736632902, | |
| "grad_norm": 87.61515534490086, | |
| "learning_rate": 2.64367816091954e-07, | |
| "logits/chosen": -1.2781007289886475, | |
| "logits/rejected": -1.2948905229568481, | |
| "logps/chosen": -55.20244216918945, | |
| "logps/rejected": -57.26641845703125, | |
| "loss": 0.656, | |
| "rewards/accuracies": 0.5833333134651184, | |
| "rewards/chosen": 0.5990750193595886, | |
| "rewards/margins": 0.11623137444257736, | |
| "rewards/rejected": 0.4828437268733978, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11078343377356072, | |
| "grad_norm": 89.38653936134894, | |
| "learning_rate": 2.758620689655172e-07, | |
| "logits/chosen": -1.2956469058990479, | |
| "logits/rejected": -1.3047106266021729, | |
| "logps/chosen": -48.0570068359375, | |
| "logps/rejected": -54.5909423828125, | |
| "loss": 0.6482, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.6316156983375549, | |
| "rewards/margins": 0.15523308515548706, | |
| "rewards/rejected": 0.47638261318206787, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1153994101807924, | |
| "grad_norm": 80.50515017031447, | |
| "learning_rate": 2.873563218390804e-07, | |
| "logits/chosen": -1.3206638097763062, | |
| "logits/rejected": -1.328073501586914, | |
| "logps/chosen": -51.81482696533203, | |
| "logps/rejected": -51.81681442260742, | |
| "loss": 0.6387, | |
| "rewards/accuracies": 0.6527777910232544, | |
| "rewards/chosen": 0.7185304164886475, | |
| "rewards/margins": 0.1627696007490158, | |
| "rewards/rejected": 0.5557608008384705, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1200153865880241, | |
| "grad_norm": 81.34880289712139, | |
| "learning_rate": 2.988505747126437e-07, | |
| "logits/chosen": -1.3584266901016235, | |
| "logits/rejected": -1.3828377723693848, | |
| "logps/chosen": -44.66862869262695, | |
| "logps/rejected": -66.46587371826172, | |
| "loss": 0.6441, | |
| "rewards/accuracies": 0.5416666865348816, | |
| "rewards/chosen": 0.8013312816619873, | |
| "rewards/margins": 0.21720875799655914, | |
| "rewards/rejected": 0.5841224789619446, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1246313629952558, | |
| "grad_norm": 77.01376791928342, | |
| "learning_rate": 3.103448275862069e-07, | |
| "logits/chosen": -1.4568628072738647, | |
| "logits/rejected": -1.4819716215133667, | |
| "logps/chosen": -47.80024719238281, | |
| "logps/rejected": -64.31399536132812, | |
| "loss": 0.6113, | |
| "rewards/accuracies": 0.6388888955116272, | |
| "rewards/chosen": 0.7414547204971313, | |
| "rewards/margins": 0.2719371020793915, | |
| "rewards/rejected": 0.4695175588130951, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1292473394024875, | |
| "grad_norm": 72.36047181825025, | |
| "learning_rate": 3.218390804597701e-07, | |
| "logits/chosen": -1.2487589120864868, | |
| "logits/rejected": -1.250648021697998, | |
| "logps/chosen": -44.55437469482422, | |
| "logps/rejected": -49.30759811401367, | |
| "loss": 0.6265, | |
| "rewards/accuracies": 0.6527777910232544, | |
| "rewards/chosen": 0.8203690052032471, | |
| "rewards/margins": 0.1877826303243637, | |
| "rewards/rejected": 0.6325862407684326, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1338633158097192, | |
| "grad_norm": 82.28322419724, | |
| "learning_rate": 3.333333333333333e-07, | |
| "logits/chosen": -1.3345168828964233, | |
| "logits/rejected": -1.3384166955947876, | |
| "logps/chosen": -46.937095642089844, | |
| "logps/rejected": -48.4022216796875, | |
| "loss": 0.5824, | |
| "rewards/accuracies": 0.6388888955116272, | |
| "rewards/chosen": 0.8726701736450195, | |
| "rewards/margins": 0.2923206090927124, | |
| "rewards/rejected": 0.5803494453430176, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13847929221695088, | |
| "grad_norm": 71.91030915826812, | |
| "learning_rate": 3.4482758620689656e-07, | |
| "logits/chosen": -1.326303243637085, | |
| "logits/rejected": -1.3416494131088257, | |
| "logps/chosen": -45.597774505615234, | |
| "logps/rejected": -55.49925994873047, | |
| "loss": 0.5933, | |
| "rewards/accuracies": 0.7083333134651184, | |
| "rewards/chosen": 0.8803545236587524, | |
| "rewards/margins": 0.32467857003211975, | |
| "rewards/rejected": 0.5556759834289551, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13847929221695088, | |
| "eval_logits/chosen": -1.2449160814285278, | |
| "eval_logits/rejected": -1.2571001052856445, | |
| "eval_logps/chosen": -46.695552825927734, | |
| "eval_logps/rejected": -54.76215362548828, | |
| "eval_loss": 0.6056556105613708, | |
| "eval_rewards/accuracies": 0.6440092325210571, | |
| "eval_rewards/chosen": 0.9689397215843201, | |
| "eval_rewards/margins": 0.30346596240997314, | |
| "eval_rewards/rejected": 0.6654736995697021, | |
| "eval_runtime": 226.9584, | |
| "eval_samples_per_second": 7.64, | |
| "eval_steps_per_second": 1.912, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1430952686241826, | |
| "grad_norm": 73.53474176254309, | |
| "learning_rate": 3.5632183908045977e-07, | |
| "logits/chosen": -1.302392840385437, | |
| "logits/rejected": -1.3110175132751465, | |
| "logps/chosen": -47.1639289855957, | |
| "logps/rejected": -52.83234786987305, | |
| "loss": 0.6038, | |
| "rewards/accuracies": 0.7361111044883728, | |
| "rewards/chosen": 0.9415748119354248, | |
| "rewards/margins": 0.2688363194465637, | |
| "rewards/rejected": 0.6727384924888611, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.14771124503141428, | |
| "grad_norm": 72.82764174123524, | |
| "learning_rate": 3.67816091954023e-07, | |
| "logits/chosen": -1.3600918054580688, | |
| "logits/rejected": -1.3759901523590088, | |
| "logps/chosen": -48.68782043457031, | |
| "logps/rejected": -59.76481246948242, | |
| "loss": 0.5893, | |
| "rewards/accuracies": 0.6666666865348816, | |
| "rewards/chosen": 1.036276936531067, | |
| "rewards/margins": 0.393592894077301, | |
| "rewards/rejected": 0.6426840424537659, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.152327221438646, | |
| "grad_norm": 72.4502448635078, | |
| "learning_rate": 3.793103448275862e-07, | |
| "logits/chosen": -1.254841923713684, | |
| "logits/rejected": -1.2786000967025757, | |
| "logps/chosen": -45.10692596435547, | |
| "logps/rejected": -67.49703216552734, | |
| "loss": 0.5441, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": 1.195844292640686, | |
| "rewards/margins": 0.5360373854637146, | |
| "rewards/rejected": 0.6598069667816162, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15694319784587768, | |
| "grad_norm": 137.30984216926402, | |
| "learning_rate": 3.9080459770114945e-07, | |
| "logits/chosen": -1.3896088600158691, | |
| "logits/rejected": -1.4264814853668213, | |
| "logps/chosen": -41.05742645263672, | |
| "logps/rejected": -65.78131103515625, | |
| "loss": 0.6531, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.2528166770935059, | |
| "rewards/margins": 0.4981537461280823, | |
| "rewards/rejected": 0.7546629905700684, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.16155917425310937, | |
| "grad_norm": 72.56751313040455, | |
| "learning_rate": 4.0229885057471266e-07, | |
| "logits/chosen": -1.3160932064056396, | |
| "logits/rejected": -1.3216156959533691, | |
| "logps/chosen": -42.512996673583984, | |
| "logps/rejected": -46.83217239379883, | |
| "loss": 0.5056, | |
| "rewards/accuracies": 0.7361111044883728, | |
| "rewards/chosen": 1.5907378196716309, | |
| "rewards/margins": 0.5790587067604065, | |
| "rewards/rejected": 1.0116791725158691, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16617515066034108, | |
| "grad_norm": 78.13005318225228, | |
| "learning_rate": 4.1379310344827586e-07, | |
| "logits/chosen": -1.2581322193145752, | |
| "logits/rejected": -1.2822985649108887, | |
| "logps/chosen": -44.267303466796875, | |
| "logps/rejected": -71.07489013671875, | |
| "loss": 0.5435, | |
| "rewards/accuracies": 0.7361111044883728, | |
| "rewards/chosen": 1.4014304876327515, | |
| "rewards/margins": 0.723700761795044, | |
| "rewards/rejected": 0.6777297258377075, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.17079112706757277, | |
| "grad_norm": 56.428824716251846, | |
| "learning_rate": 4.25287356321839e-07, | |
| "logits/chosen": -1.2850017547607422, | |
| "logits/rejected": -1.2932665348052979, | |
| "logps/chosen": -52.70983123779297, | |
| "logps/rejected": -53.61183166503906, | |
| "loss": 0.6011, | |
| "rewards/accuracies": 0.7083333134651184, | |
| "rewards/chosen": 1.2770977020263672, | |
| "rewards/margins": 0.5247661471366882, | |
| "rewards/rejected": 0.7523314952850342, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.17540710347480445, | |
| "grad_norm": 80.88401808498448, | |
| "learning_rate": 4.367816091954023e-07, | |
| "logits/chosen": -1.2536025047302246, | |
| "logits/rejected": -1.2705798149108887, | |
| "logps/chosen": -47.25906753540039, | |
| "logps/rejected": -60.58863067626953, | |
| "loss": 0.5302, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 1.4430739879608154, | |
| "rewards/margins": 0.5440190434455872, | |
| "rewards/rejected": 0.8990550637245178, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.18002307988203617, | |
| "grad_norm": 68.76597330947016, | |
| "learning_rate": 4.482758620689655e-07, | |
| "logits/chosen": -1.2450529336929321, | |
| "logits/rejected": -1.2557368278503418, | |
| "logps/chosen": -42.16405487060547, | |
| "logps/rejected": -52.293785095214844, | |
| "loss": 0.5623, | |
| "rewards/accuracies": 0.5972222089767456, | |
| "rewards/chosen": 1.489140510559082, | |
| "rewards/margins": 0.561518669128418, | |
| "rewards/rejected": 0.9276217222213745, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.18463905628926786, | |
| "grad_norm": 74.75348151064274, | |
| "learning_rate": 4.597701149425287e-07, | |
| "logits/chosen": -1.3210065364837646, | |
| "logits/rejected": -1.3294503688812256, | |
| "logps/chosen": -42.845787048339844, | |
| "logps/rejected": -46.24819564819336, | |
| "loss": 0.5108, | |
| "rewards/accuracies": 0.6944444179534912, | |
| "rewards/chosen": 1.1906384229660034, | |
| "rewards/margins": 0.6210441589355469, | |
| "rewards/rejected": 0.5695942640304565, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18463905628926786, | |
| "eval_logits/chosen": -1.226657509803772, | |
| "eval_logits/rejected": -1.2391773462295532, | |
| "eval_logps/chosen": -46.65249252319336, | |
| "eval_logps/rejected": -55.351402282714844, | |
| "eval_loss": 0.5300613045692444, | |
| "eval_rewards/accuracies": 0.7142857313156128, | |
| "eval_rewards/chosen": 0.9904682636260986, | |
| "eval_rewards/margins": 0.6196123957633972, | |
| "eval_rewards/rejected": 0.3708558976650238, | |
| "eval_runtime": 227.2212, | |
| "eval_samples_per_second": 7.631, | |
| "eval_steps_per_second": 1.91, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18925503269649954, | |
| "grad_norm": 60.96010208828231, | |
| "learning_rate": 4.712643678160919e-07, | |
| "logits/chosen": -1.2890104055404663, | |
| "logits/rejected": -1.3069978952407837, | |
| "logps/chosen": -50.1616096496582, | |
| "logps/rejected": -61.80992126464844, | |
| "loss": 0.485, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": 0.9283716678619385, | |
| "rewards/margins": 0.7194375991821289, | |
| "rewards/rejected": 0.20893406867980957, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.19387100910373126, | |
| "grad_norm": 58.75128856791501, | |
| "learning_rate": 4.827586206896552e-07, | |
| "logits/chosen": -1.282674789428711, | |
| "logits/rejected": -1.3157954216003418, | |
| "logps/chosen": -42.50400161743164, | |
| "logps/rejected": -75.54652404785156, | |
| "loss": 0.4729, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.7927578091621399, | |
| "rewards/margins": 1.0220049619674683, | |
| "rewards/rejected": -0.22924719750881195, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19848698551096294, | |
| "grad_norm": 69.45296124181107, | |
| "learning_rate": 4.942528735632184e-07, | |
| "logits/chosen": -1.2970733642578125, | |
| "logits/rejected": -1.3046212196350098, | |
| "logps/chosen": -48.21321487426758, | |
| "logps/rejected": -59.63574981689453, | |
| "loss": 0.4753, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 0.7275898456573486, | |
| "rewards/margins": 0.7579395771026611, | |
| "rewards/rejected": -0.030349718406796455, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.20310296191819463, | |
| "grad_norm": 57.934663164568036, | |
| "learning_rate": 4.999979670146248e-07, | |
| "logits/chosen": -1.322100043296814, | |
| "logits/rejected": -1.333228588104248, | |
| "logps/chosen": -51.94272232055664, | |
| "logps/rejected": -62.809814453125, | |
| "loss": 0.4698, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.5794834494590759, | |
| "rewards/margins": 0.9340064525604248, | |
| "rewards/rejected": -0.3545229136943817, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.20771893832542634, | |
| "grad_norm": 67.05839544195749, | |
| "learning_rate": 4.99981703330008e-07, | |
| "logits/chosen": -1.2547653913497925, | |
| "logits/rejected": -1.2667738199234009, | |
| "logps/chosen": -44.88441467285156, | |
| "logps/rejected": -55.610042572021484, | |
| "loss": 0.4928, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 0.672675371170044, | |
| "rewards/margins": 0.72029709815979, | |
| "rewards/rejected": -0.04762159287929535, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.21233491473265803, | |
| "grad_norm": 51.75653250091916, | |
| "learning_rate": 4.99949177018813e-07, | |
| "logits/chosen": -1.3547184467315674, | |
| "logits/rejected": -1.3632615804672241, | |
| "logps/chosen": -41.517826080322266, | |
| "logps/rejected": -52.83651351928711, | |
| "loss": 0.3985, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.8576219081878662, | |
| "rewards/margins": 1.0123066902160645, | |
| "rewards/rejected": -0.15468484163284302, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.21695089113988972, | |
| "grad_norm": 81.64528533284856, | |
| "learning_rate": 4.999003901970474e-07, | |
| "logits/chosen": -1.3031115531921387, | |
| "logits/rejected": -1.3127225637435913, | |
| "logps/chosen": -54.79065704345703, | |
| "logps/rejected": -54.33130645751953, | |
| "loss": 0.5699, | |
| "rewards/accuracies": 0.7222222089767456, | |
| "rewards/chosen": 0.6079578995704651, | |
| "rewards/margins": 0.6609423160552979, | |
| "rewards/rejected": -0.05298437178134918, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.22156686754712143, | |
| "grad_norm": 50.92778720915042, | |
| "learning_rate": 4.998353460385512e-07, | |
| "logits/chosen": -1.2541792392730713, | |
| "logits/rejected": -1.2738375663757324, | |
| "logps/chosen": -45.45362091064453, | |
| "logps/rejected": -64.12794494628906, | |
| "loss": 0.4024, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.9434598088264465, | |
| "rewards/margins": 1.2422370910644531, | |
| "rewards/rejected": -0.29877743124961853, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.22618284395435312, | |
| "grad_norm": 56.275721433856205, | |
| "learning_rate": 4.997540487747892e-07, | |
| "logits/chosen": -1.2653698921203613, | |
| "logits/rejected": -1.2802023887634277, | |
| "logps/chosen": -44.68564987182617, | |
| "logps/rejected": -66.80543518066406, | |
| "loss": 0.4761, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 0.8570264577865601, | |
| "rewards/margins": 1.2518484592437744, | |
| "rewards/rejected": -0.39482197165489197, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2307988203615848, | |
| "grad_norm": 52.70179467657195, | |
| "learning_rate": 4.996565036945769e-07, | |
| "logits/chosen": -1.2993725538253784, | |
| "logits/rejected": -1.3049986362457275, | |
| "logps/chosen": -50.757686614990234, | |
| "logps/rejected": -53.39494323730469, | |
| "loss": 0.474, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.6508947014808655, | |
| "rewards/margins": 0.9926181435585022, | |
| "rewards/rejected": -0.3417234420776367, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2307988203615848, | |
| "eval_logits/chosen": -1.2166502475738525, | |
| "eval_logits/rejected": -1.2277939319610596, | |
| "eval_logps/chosen": -46.99800109863281, | |
| "eval_logps/rejected": -56.576629638671875, | |
| "eval_loss": 0.45405343174934387, | |
| "eval_rewards/accuracies": 0.7511520981788635, | |
| "eval_rewards/chosen": 0.8177129626274109, | |
| "eval_rewards/margins": 1.0594747066497803, | |
| "eval_rewards/rejected": -0.24176181852817535, | |
| "eval_runtime": 227.4246, | |
| "eval_samples_per_second": 7.625, | |
| "eval_steps_per_second": 1.908, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23541479676881652, | |
| "grad_norm": 44.630871285034665, | |
| "learning_rate": 4.995427171437356e-07, | |
| "logits/chosen": -1.2710050344467163, | |
| "logits/rejected": -1.2925546169281006, | |
| "logps/chosen": -44.29911422729492, | |
| "logps/rejected": -63.83744430541992, | |
| "loss": 0.3944, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.7817994356155396, | |
| "rewards/margins": 1.3069223165512085, | |
| "rewards/rejected": -0.5251227617263794, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2400307731760482, | |
| "grad_norm": 63.36947457687719, | |
| "learning_rate": 4.994126965246796e-07, | |
| "logits/chosen": -1.281785488128662, | |
| "logits/rejected": -1.2921262979507446, | |
| "logps/chosen": -45.61968994140625, | |
| "logps/rejected": -57.57981872558594, | |
| "loss": 0.4314, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.837566077709198, | |
| "rewards/margins": 1.084651231765747, | |
| "rewards/rejected": -0.247085303068161, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.24464674958327992, | |
| "grad_norm": 64.3165584011508, | |
| "learning_rate": 4.992664502959351e-07, | |
| "logits/chosen": -1.2655751705169678, | |
| "logits/rejected": -1.3007822036743164, | |
| "logps/chosen": -42.23821258544922, | |
| "logps/rejected": -85.14391326904297, | |
| "loss": 0.3478, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 0.9540318250656128, | |
| "rewards/margins": 1.9801336526870728, | |
| "rewards/rejected": -1.0261015892028809, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2492627259905116, | |
| "grad_norm": 68.33245668142573, | |
| "learning_rate": 4.991039879715898e-07, | |
| "logits/chosen": -1.2239530086517334, | |
| "logits/rejected": -1.2506436109542847, | |
| "logps/chosen": -48.302852630615234, | |
| "logps/rejected": -70.37635803222656, | |
| "loss": 0.4095, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 1.1005271673202515, | |
| "rewards/margins": 1.5407756567001343, | |
| "rewards/rejected": -0.4402484893798828, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2538787023977433, | |
| "grad_norm": 41.58256253343475, | |
| "learning_rate": 4.989253201206736e-07, | |
| "logits/chosen": -1.3317803144454956, | |
| "logits/rejected": -1.3334723711013794, | |
| "logps/chosen": -47.94260787963867, | |
| "logps/rejected": -48.03237533569336, | |
| "loss": 0.4524, | |
| "rewards/accuracies": 0.6944444179534912, | |
| "rewards/chosen": 0.9174912571907043, | |
| "rewards/margins": 1.0279741287231445, | |
| "rewards/rejected": -0.11048289388418198, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.258494678804975, | |
| "grad_norm": 66.23818262108296, | |
| "learning_rate": 4.987304583664712e-07, | |
| "logits/chosen": -1.2193766832351685, | |
| "logits/rejected": -1.2288120985031128, | |
| "logps/chosen": -55.089717864990234, | |
| "logps/rejected": -61.21225357055664, | |
| "loss": 0.4449, | |
| "rewards/accuracies": 0.7222222089767456, | |
| "rewards/chosen": 0.7610146999359131, | |
| "rewards/margins": 1.0799916982650757, | |
| "rewards/rejected": -0.3189769983291626, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.26311065521220667, | |
| "grad_norm": 58.36439834992655, | |
| "learning_rate": 4.985194153857662e-07, | |
| "logits/chosen": -1.3395094871520996, | |
| "logits/rejected": -1.3416942358016968, | |
| "logps/chosen": -43.976890563964844, | |
| "logps/rejected": -45.82760238647461, | |
| "loss": 0.4929, | |
| "rewards/accuracies": 0.7222222089767456, | |
| "rewards/chosen": 0.4750349223613739, | |
| "rewards/margins": 0.913371741771698, | |
| "rewards/rejected": -0.4383367896080017, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2677266316194384, | |
| "grad_norm": 41.441657797600875, | |
| "learning_rate": 4.982922049080163e-07, | |
| "logits/chosen": -1.3572431802749634, | |
| "logits/rejected": -1.3625115156173706, | |
| "logps/chosen": -42.45515441894531, | |
| "logps/rejected": -49.73173522949219, | |
| "loss": 0.3691, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.41610708832740784, | |
| "rewards/margins": 1.2800379991531372, | |
| "rewards/rejected": -0.8639309406280518, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2723426080266701, | |
| "grad_norm": 60.30435878433939, | |
| "learning_rate": 4.980488417144599e-07, | |
| "logits/chosen": -1.2863659858703613, | |
| "logits/rejected": -1.3199315071105957, | |
| "logps/chosen": -48.62416076660156, | |
| "logps/rejected": -85.73699188232422, | |
| "loss": 0.4597, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.11831729859113693, | |
| "rewards/margins": 1.6947745084762573, | |
| "rewards/rejected": -1.5764573812484741, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.27695858443390176, | |
| "grad_norm": 41.355652091388535, | |
| "learning_rate": 4.977893416371544e-07, | |
| "logits/chosen": -1.2884269952774048, | |
| "logits/rejected": -1.2976269721984863, | |
| "logps/chosen": -41.79518127441406, | |
| "logps/rejected": -54.925662994384766, | |
| "loss": 0.3826, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.0362311452627182, | |
| "rewards/margins": 1.4945807456970215, | |
| "rewards/rejected": -1.4583497047424316, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.27695858443390176, | |
| "eval_logits/chosen": -1.207366704940796, | |
| "eval_logits/rejected": -1.2173371315002441, | |
| "eval_logps/chosen": -48.50833511352539, | |
| "eval_logps/rejected": -58.64011764526367, | |
| "eval_loss": 0.4093886911869049, | |
| "eval_rewards/accuracies": 0.7724654674530029, | |
| "eval_rewards/chosen": 0.06255079805850983, | |
| "eval_rewards/margins": 1.336057424545288, | |
| "eval_rewards/rejected": -1.27350652217865, | |
| "eval_runtime": 227.1307, | |
| "eval_samples_per_second": 7.634, | |
| "eval_steps_per_second": 1.911, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.28157456084113347, | |
| "grad_norm": 57.25843545889309, | |
| "learning_rate": 4.975137215579469e-07, | |
| "logits/chosen": -1.1866450309753418, | |
| "logits/rejected": -1.186094045639038, | |
| "logps/chosen": -55.403018951416016, | |
| "logps/rejected": -51.23255920410156, | |
| "loss": 0.4266, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 0.05289599671959877, | |
| "rewards/margins": 1.1414172649383545, | |
| "rewards/rejected": -1.0885213613510132, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2861905372483652, | |
| "grad_norm": 39.62322228531182, | |
| "learning_rate": 4.972219994073755e-07, | |
| "logits/chosen": -1.18235182762146, | |
| "logits/rejected": -1.2118382453918457, | |
| "logps/chosen": -48.611637115478516, | |
| "logps/rejected": -79.41681671142578, | |
| "loss": 0.3892, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": -0.14360421895980835, | |
| "rewards/margins": 1.8774958848953247, | |
| "rewards/rejected": -2.0211000442504883, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2908065136555969, | |
| "grad_norm": 52.17921097346343, | |
| "learning_rate": 4.969141941635025e-07, | |
| "logits/chosen": -1.2253869771957397, | |
| "logits/rejected": -1.2435510158538818, | |
| "logps/chosen": -50.45375061035156, | |
| "logps/rejected": -69.55599212646484, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.31797370314598083, | |
| "rewards/margins": 1.7364860773086548, | |
| "rewards/rejected": -2.054459571838379, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.29542249006282856, | |
| "grad_norm": 70.38261738253976, | |
| "learning_rate": 4.965903258506806e-07, | |
| "logits/chosen": -1.1796499490737915, | |
| "logits/rejected": -1.1957367658615112, | |
| "logps/chosen": -49.15879440307617, | |
| "logps/rejected": -73.3575210571289, | |
| "loss": 0.3346, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.04160241410136223, | |
| "rewards/margins": 1.7600233554840088, | |
| "rewards/rejected": -1.7184207439422607, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.30003846647006027, | |
| "grad_norm": 57.934452383223125, | |
| "learning_rate": 4.962504155382493e-07, | |
| "logits/chosen": -1.3256597518920898, | |
| "logits/rejected": -1.3279542922973633, | |
| "logps/chosen": -44.50282287597656, | |
| "logps/rejected": -48.98247146606445, | |
| "loss": 0.382, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.2423497587442398, | |
| "rewards/margins": 1.2484761476516724, | |
| "rewards/rejected": -1.0061264038085938, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.304654442877292, | |
| "grad_norm": 58.71983276565311, | |
| "learning_rate": 4.958944853391652e-07, | |
| "logits/chosen": -1.1831316947937012, | |
| "logits/rejected": -1.1885439157485962, | |
| "logps/chosen": -45.726505279541016, | |
| "logps/rejected": -54.00067138671875, | |
| "loss": 0.4078, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": 0.6056569814682007, | |
| "rewards/margins": 1.2579783201217651, | |
| "rewards/rejected": -0.6523212790489197, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.30927041928452365, | |
| "grad_norm": 50.9411946225504, | |
| "learning_rate": 4.955225584085624e-07, | |
| "logits/chosen": -1.3628097772598267, | |
| "logits/rejected": -1.3736504316329956, | |
| "logps/chosen": -44.589229583740234, | |
| "logps/rejected": -60.06134796142578, | |
| "loss": 0.4245, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": 0.8993210196495056, | |
| "rewards/margins": 1.6077146530151367, | |
| "rewards/rejected": -0.7083935737609863, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.31388639569175536, | |
| "grad_norm": 53.494824083866554, | |
| "learning_rate": 4.951346589422467e-07, | |
| "logits/chosen": -1.2143707275390625, | |
| "logits/rejected": -1.2256660461425781, | |
| "logps/chosen": -44.73698425292969, | |
| "logps/rejected": -63.74873733520508, | |
| "loss": 0.4379, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 1.0820810794830322, | |
| "rewards/margins": 1.6512118577957153, | |
| "rewards/rejected": -0.5691307187080383, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3185023720989871, | |
| "grad_norm": 87.56798544187119, | |
| "learning_rate": 4.94730812175122e-07, | |
| "logits/chosen": -1.338615894317627, | |
| "logits/rejected": -1.344118595123291, | |
| "logps/chosen": -45.98448181152344, | |
| "logps/rejected": -49.50959396362305, | |
| "loss": 0.4706, | |
| "rewards/accuracies": 0.6805555820465088, | |
| "rewards/chosen": 1.05098295211792, | |
| "rewards/margins": 1.33613121509552, | |
| "rewards/rejected": -0.2851482033729553, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.32311834850621873, | |
| "grad_norm": 68.21005063884613, | |
| "learning_rate": 4.943110443795476e-07, | |
| "logits/chosen": -1.258334755897522, | |
| "logits/rejected": -1.2637797594070435, | |
| "logps/chosen": -50.91215515136719, | |
| "logps/rejected": -52.46604537963867, | |
| "loss": 0.4759, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.7942720055580139, | |
| "rewards/margins": 1.164951205253601, | |
| "rewards/rejected": -0.37067916989326477, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.32311834850621873, | |
| "eval_logits/chosen": -1.1890074014663696, | |
| "eval_logits/rejected": -1.1990076303482056, | |
| "eval_logps/chosen": -47.355777740478516, | |
| "eval_logps/rejected": -58.06831359863281, | |
| "eval_loss": 0.38173291087150574, | |
| "eval_rewards/accuracies": 0.7718893885612488, | |
| "eval_rewards/chosen": 0.6388264298439026, | |
| "eval_rewards/margins": 1.6264294385910034, | |
| "eval_rewards/rejected": -0.987602949142456, | |
| "eval_runtime": 227.2808, | |
| "eval_samples_per_second": 7.629, | |
| "eval_steps_per_second": 1.91, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.32773432491345045, | |
| "grad_norm": 50.18122842606284, | |
| "learning_rate": 4.938753828636297e-07, | |
| "logits/chosen": -1.2129461765289307, | |
| "logits/rejected": -1.21940279006958, | |
| "logps/chosen": -53.97553253173828, | |
| "logps/rejected": -53.66658020019531, | |
| "loss": 0.4623, | |
| "rewards/accuracies": 0.6805555820465088, | |
| "rewards/chosen": 0.7083945274353027, | |
| "rewards/margins": 1.2968156337738037, | |
| "rewards/rejected": -0.5884211659431458, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.33235030132068216, | |
| "grad_norm": 50.59660648914643, | |
| "learning_rate": 4.934238559694447e-07, | |
| "logits/chosen": -1.1950477361679077, | |
| "logits/rejected": -1.2141423225402832, | |
| "logps/chosen": -47.11637878417969, | |
| "logps/rejected": -65.34754943847656, | |
| "loss": 0.3506, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.5280824899673462, | |
| "rewards/margins": 1.7753053903579712, | |
| "rewards/rejected": -1.2472230195999146, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3369662777279138, | |
| "grad_norm": 48.072274874250304, | |
| "learning_rate": 4.929564930711957e-07, | |
| "logits/chosen": -1.281104564666748, | |
| "logits/rejected": -1.2873430252075195, | |
| "logps/chosen": -46.95094299316406, | |
| "logps/rejected": -52.25088882446289, | |
| "loss": 0.3785, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.3459606468677521, | |
| "rewards/margins": 1.2943564653396606, | |
| "rewards/rejected": -0.9483956098556519, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.34158225413514554, | |
| "grad_norm": 45.84138607827678, | |
| "learning_rate": 4.924733245733008e-07, | |
| "logits/chosen": -1.168983817100525, | |
| "logits/rejected": -1.1675101518630981, | |
| "logps/chosen": -53.398841857910156, | |
| "logps/rejected": -48.5836181640625, | |
| "loss": 0.372, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": 0.4610249102115631, | |
| "rewards/margins": 1.1595698595046997, | |
| "rewards/rejected": -0.6985449194908142, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.34619823054237725, | |
| "grad_norm": 41.47855724733255, | |
| "learning_rate": 4.91974381908416e-07, | |
| "logits/chosen": -1.3004454374313354, | |
| "logits/rejected": -1.3239775896072388, | |
| "logps/chosen": -44.39426803588867, | |
| "logps/rejected": -69.07200622558594, | |
| "loss": 0.3055, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.01821739226579666, | |
| "rewards/margins": 2.1426663398742676, | |
| "rewards/rejected": -2.124448776245117, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3508142069496089, | |
| "grad_norm": 38.94525175640219, | |
| "learning_rate": 4.914596975353898e-07, | |
| "logits/chosen": -1.263897180557251, | |
| "logits/rejected": -1.2764997482299805, | |
| "logps/chosen": -47.19563674926758, | |
| "logps/rejected": -57.78141403198242, | |
| "loss": 0.386, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.055071063339710236, | |
| "rewards/margins": 1.5383036136627197, | |
| "rewards/rejected": -1.4832323789596558, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3554301833568406, | |
| "grad_norm": 66.86847446450086, | |
| "learning_rate": 4.909293049371519e-07, | |
| "logits/chosen": -1.2072829008102417, | |
| "logits/rejected": -1.2103402614593506, | |
| "logps/chosen": -55.665191650390625, | |
| "logps/rejected": -53.18820571899414, | |
| "loss": 0.4106, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.15528617799282074, | |
| "rewards/margins": 1.4208853244781494, | |
| "rewards/rejected": -1.2655991315841675, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.36004615976407234, | |
| "grad_norm": 58.39718753271016, | |
| "learning_rate": 4.903832386185343e-07, | |
| "logits/chosen": -1.246012568473816, | |
| "logits/rejected": -1.2516732215881348, | |
| "logps/chosen": -52.729427337646484, | |
| "logps/rejected": -53.41749572753906, | |
| "loss": 0.4389, | |
| "rewards/accuracies": 0.7361111044883728, | |
| "rewards/chosen": 0.07690320909023285, | |
| "rewards/margins": 1.1573313474655151, | |
| "rewards/rejected": -1.080428123474121, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.364662136171304, | |
| "grad_norm": 48.60983015034901, | |
| "learning_rate": 4.89821534104028e-07, | |
| "logits/chosen": -1.2771211862564087, | |
| "logits/rejected": -1.2919840812683105, | |
| "logps/chosen": -50.660606384277344, | |
| "logps/rejected": -68.34771728515625, | |
| "loss": 0.3141, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.26177337765693665, | |
| "rewards/margins": 2.4097442626953125, | |
| "rewards/rejected": -2.1479711532592773, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3692781125785357, | |
| "grad_norm": 53.41098297744381, | |
| "learning_rate": 4.892442279354698e-07, | |
| "logits/chosen": -1.2370442152023315, | |
| "logits/rejected": -1.2537869215011597, | |
| "logps/chosen": -52.96739959716797, | |
| "logps/rejected": -71.32413482666016, | |
| "loss": 0.3591, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": -0.09003262966871262, | |
| "rewards/margins": 1.8999587297439575, | |
| "rewards/rejected": -1.989991545677185, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3692781125785357, | |
| "eval_logits/chosen": -1.1776655912399292, | |
| "eval_logits/rejected": -1.1867269277572632, | |
| "eval_logps/chosen": -48.62409973144531, | |
| "eval_logps/rejected": -59.5378303527832, | |
| "eval_loss": 0.3585492968559265, | |
| "eval_rewards/accuracies": 0.7908986210823059, | |
| "eval_rewards/chosen": 0.0046647959388792515, | |
| "eval_rewards/margins": 1.7270255088806152, | |
| "eval_rewards/rejected": -1.7223609685897827, | |
| "eval_runtime": 226.9778, | |
| "eval_samples_per_second": 7.64, | |
| "eval_steps_per_second": 1.912, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3738940889857674, | |
| "grad_norm": 48.30891742529218, | |
| "learning_rate": 4.886513576696673e-07, | |
| "logits/chosen": -1.2570397853851318, | |
| "logits/rejected": -1.2753015756607056, | |
| "logps/chosen": -50.679901123046875, | |
| "logps/rejected": -69.58779907226562, | |
| "loss": 0.3731, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.10189250111579895, | |
| "rewards/margins": 1.991517424583435, | |
| "rewards/rejected": -2.093410015106201, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3785100653929991, | |
| "grad_norm": 38.94107066608042, | |
| "learning_rate": 4.880429618759543e-07, | |
| "logits/chosen": -1.300181269645691, | |
| "logits/rejected": -1.309295892715454, | |
| "logps/chosen": -54.4401969909668, | |
| "logps/rejected": -57.44432830810547, | |
| "loss": 0.4199, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": -0.0659228190779686, | |
| "rewards/margins": 1.5560578107833862, | |
| "rewards/rejected": -1.6219807863235474, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3831260418002308, | |
| "grad_norm": 42.007218912580726, | |
| "learning_rate": 4.874190801336817e-07, | |
| "logits/chosen": -1.2063199281692505, | |
| "logits/rejected": -1.2187817096710205, | |
| "logps/chosen": -52.59967041015625, | |
| "logps/rejected": -62.30234909057617, | |
| "loss": 0.3373, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.0022201803512871265, | |
| "rewards/margins": 1.9247175455093384, | |
| "rewards/rejected": -1.9269376993179321, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3877420182074625, | |
| "grad_norm": 41.8692578527763, | |
| "learning_rate": 4.867797530296431e-07, | |
| "logits/chosen": -1.2532522678375244, | |
| "logits/rejected": -1.2671799659729004, | |
| "logps/chosen": -54.77018737792969, | |
| "logps/rejected": -65.7540512084961, | |
| "loss": 0.2993, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": -0.03232141211628914, | |
| "rewards/margins": 2.12778377532959, | |
| "rewards/rejected": -2.1601054668426514, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.39235799461469417, | |
| "grad_norm": 34.82958012225073, | |
| "learning_rate": 4.861250221554343e-07, | |
| "logits/chosen": -1.208885669708252, | |
| "logits/rejected": -1.2256441116333008, | |
| "logps/chosen": -43.97615051269531, | |
| "logps/rejected": -71.41213989257812, | |
| "loss": 0.3062, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.07487620413303375, | |
| "rewards/margins": 2.4894187450408936, | |
| "rewards/rejected": -2.4145421981811523, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3969739710219259, | |
| "grad_norm": 31.38457100645333, | |
| "learning_rate": 4.854549301047476e-07, | |
| "logits/chosen": -1.232684850692749, | |
| "logits/rejected": -1.2298487424850464, | |
| "logps/chosen": -51.118751525878906, | |
| "logps/rejected": -52.634708404541016, | |
| "loss": 0.3429, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.21625421941280365, | |
| "rewards/margins": 1.5239174365997314, | |
| "rewards/rejected": -1.3076633214950562, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4015899474291576, | |
| "grad_norm": 51.50376706566093, | |
| "learning_rate": 4.847695204706005e-07, | |
| "logits/chosen": -1.26514732837677, | |
| "logits/rejected": -1.266494631767273, | |
| "logps/chosen": -46.0894889831543, | |
| "logps/rejected": -49.9418830871582, | |
| "loss": 0.3849, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.47246673703193665, | |
| "rewards/margins": 1.3556917905807495, | |
| "rewards/rejected": -0.88322514295578, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.40620592383638926, | |
| "grad_norm": 39.155946696884016, | |
| "learning_rate": 4.840688378425e-07, | |
| "logits/chosen": -1.1615080833435059, | |
| "logits/rejected": -1.1706922054290771, | |
| "logps/chosen": -54.5849609375, | |
| "logps/rejected": -67.2327651977539, | |
| "loss": 0.2596, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.5416942238807678, | |
| "rewards/margins": 2.2614989280700684, | |
| "rewards/rejected": -1.7198045253753662, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.410821900243621, | |
| "grad_norm": 41.671444907690415, | |
| "learning_rate": 4.833529278035422e-07, | |
| "logits/chosen": -1.30013108253479, | |
| "logits/rejected": -1.3356281518936157, | |
| "logps/chosen": -45.55073547363281, | |
| "logps/rejected": -83.93589782714844, | |
| "loss": 0.3114, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.6670578718185425, | |
| "rewards/margins": 3.266463279724121, | |
| "rewards/rejected": -2.599405527114868, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4154378766508527, | |
| "grad_norm": 39.182516678430176, | |
| "learning_rate": 4.826218369274459e-07, | |
| "logits/chosen": -1.1979715824127197, | |
| "logits/rejected": -1.2180922031402588, | |
| "logps/chosen": -46.9954948425293, | |
| "logps/rejected": -76.03369140625, | |
| "loss": 0.3212, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.5715588927268982, | |
| "rewards/margins": 2.937516212463379, | |
| "rewards/rejected": -2.3659567832946777, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4154378766508527, | |
| "eval_logits/chosen": -1.169126272201538, | |
| "eval_logits/rejected": -1.1776955127716064, | |
| "eval_logps/chosen": -46.824092864990234, | |
| "eval_logps/rejected": -58.37122344970703, | |
| "eval_loss": 0.33698517084121704, | |
| "eval_rewards/accuracies": 0.7857142686843872, | |
| "eval_rewards/chosen": 0.9046696424484253, | |
| "eval_rewards/margins": 2.043729782104492, | |
| "eval_rewards/rejected": -1.139060139656067, | |
| "eval_runtime": 227.2193, | |
| "eval_samples_per_second": 7.631, | |
| "eval_steps_per_second": 1.91, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.42005385305808435, | |
| "grad_norm": 34.66268965474155, | |
| "learning_rate": 4.818756127755237e-07, | |
| "logits/chosen": -1.245609164237976, | |
| "logits/rejected": -1.2478203773498535, | |
| "logps/chosen": -44.18710708618164, | |
| "logps/rejected": -48.15581130981445, | |
| "loss": 0.2919, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 1.065934658050537, | |
| "rewards/margins": 1.712632656097412, | |
| "rewards/rejected": -0.6466982364654541, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.42466982946531606, | |
| "grad_norm": 48.14030342071319, | |
| "learning_rate": 4.811143038935873e-07, | |
| "logits/chosen": -1.1818798780441284, | |
| "logits/rejected": -1.189144253730774, | |
| "logps/chosen": -51.0610237121582, | |
| "logps/rejected": -54.243797302246094, | |
| "loss": 0.3902, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 1.165444254875183, | |
| "rewards/margins": 1.9199776649475098, | |
| "rewards/rejected": -0.7545332908630371, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4292858058725478, | |
| "grad_norm": 45.03104875996758, | |
| "learning_rate": 4.803379598087899e-07, | |
| "logits/chosen": -1.2626315355300903, | |
| "logits/rejected": -1.2647953033447266, | |
| "logps/chosen": -47.61143493652344, | |
| "logps/rejected": -46.91539764404297, | |
| "loss": 0.3381, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.8671760559082031, | |
| "rewards/margins": 1.6762652397155762, | |
| "rewards/rejected": -0.8090891242027283, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.43390178227977944, | |
| "grad_norm": 63.031567731654626, | |
| "learning_rate": 4.795466310264034e-07, | |
| "logits/chosen": -1.18837308883667, | |
| "logits/rejected": -1.2106761932373047, | |
| "logps/chosen": -45.96682357788086, | |
| "logps/rejected": -75.37088012695312, | |
| "loss": 0.4522, | |
| "rewards/accuracies": 0.7222222089767456, | |
| "rewards/chosen": 0.41675278544425964, | |
| "rewards/margins": 2.4167346954345703, | |
| "rewards/rejected": -1.9999819993972778, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.43851775868701115, | |
| "grad_norm": 30.590120071737818, | |
| "learning_rate": 4.787403690265335e-07, | |
| "logits/chosen": -1.2696727514266968, | |
| "logits/rejected": -1.2787107229232788, | |
| "logps/chosen": -46.919532775878906, | |
| "logps/rejected": -57.34629440307617, | |
| "loss": 0.3259, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.6651458144187927, | |
| "rewards/margins": 2.023615837097168, | |
| "rewards/rejected": -1.358469843864441, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.44313373509424286, | |
| "grad_norm": 65.00881113396572, | |
| "learning_rate": 4.779192262607702e-07, | |
| "logits/chosen": -1.1799297332763672, | |
| "logits/rejected": -1.1909632682800293, | |
| "logps/chosen": -51.90432357788086, | |
| "logps/rejected": -67.85346221923828, | |
| "loss": 0.3613, | |
| "rewards/accuracies": 0.7361111044883728, | |
| "rewards/chosen": 0.8239190578460693, | |
| "rewards/margins": 2.4376637935638428, | |
| "rewards/rejected": -1.6137449741363525, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4477497115014745, | |
| "grad_norm": 45.51586779707086, | |
| "learning_rate": 4.770832561487758e-07, | |
| "logits/chosen": -1.273619294166565, | |
| "logits/rejected": -1.2734426259994507, | |
| "logps/chosen": -51.00930404663086, | |
| "logps/rejected": -50.82846450805664, | |
| "loss": 0.2704, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.6313294172286987, | |
| "rewards/margins": 1.9725594520568848, | |
| "rewards/rejected": -1.3412299156188965, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.45236568790870624, | |
| "grad_norm": 59.66597715979004, | |
| "learning_rate": 4.762325130748097e-07, | |
| "logits/chosen": -1.1289526224136353, | |
| "logits/rejected": -1.1318069696426392, | |
| "logps/chosen": -56.16923522949219, | |
| "logps/rejected": -53.92496109008789, | |
| "loss": 0.3301, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.5848253965377808, | |
| "rewards/margins": 1.7668564319610596, | |
| "rewards/rejected": -1.1820309162139893, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.45698166431593795, | |
| "grad_norm": 27.39596009585691, | |
| "learning_rate": 4.7536705238418995e-07, | |
| "logits/chosen": -1.2408547401428223, | |
| "logits/rejected": -1.2465531826019287, | |
| "logps/chosen": -51.9420280456543, | |
| "logps/rejected": -59.4866943359375, | |
| "loss": 0.2815, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.4031212031841278, | |
| "rewards/margins": 2.25166654586792, | |
| "rewards/rejected": -1.8485453128814697, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4615976407231696, | |
| "grad_norm": 55.89572206023644, | |
| "learning_rate": 4.7448693037969336e-07, | |
| "logits/chosen": -1.221846103668213, | |
| "logits/rejected": -1.229733943939209, | |
| "logps/chosen": -51.18299102783203, | |
| "logps/rejected": -57.01997375488281, | |
| "loss": 0.3319, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.33577215671539307, | |
| "rewards/margins": 1.9459080696105957, | |
| "rewards/rejected": -1.6101359128952026, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4615976407231696, | |
| "eval_logits/chosen": -1.1536659002304077, | |
| "eval_logits/rejected": -1.1624053716659546, | |
| "eval_logps/chosen": -47.9106559753418, | |
| "eval_logps/rejected": -59.707550048828125, | |
| "eval_loss": 0.3186802566051483, | |
| "eval_rewards/accuracies": 0.8012672662734985, | |
| "eval_rewards/chosen": 0.3613872528076172, | |
| "eval_rewards/margins": 2.168609857559204, | |
| "eval_rewards/rejected": -1.807222604751587, | |
| "eval_runtime": 227.3404, | |
| "eval_samples_per_second": 7.627, | |
| "eval_steps_per_second": 1.909, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4662136171304013, | |
| "grad_norm": 47.53414859111433, | |
| "learning_rate": 4.735922043178923e-07, | |
| "logits/chosen": -1.130042552947998, | |
| "logits/rejected": -1.1414666175842285, | |
| "logps/chosen": -50.747615814208984, | |
| "logps/rejected": -67.29204559326172, | |
| "loss": 0.2861, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.3648765981197357, | |
| "rewards/margins": 2.223259449005127, | |
| "rewards/rejected": -1.8583827018737793, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.47082959353763304, | |
| "grad_norm": 45.15212642774012, | |
| "learning_rate": 4.7268293240543017e-07, | |
| "logits/chosen": -1.2075278759002686, | |
| "logits/rejected": -1.2145761251449585, | |
| "logps/chosen": -50.80036163330078, | |
| "logps/rejected": -64.42232513427734, | |
| "loss": 0.3686, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.3706679344177246, | |
| "rewards/margins": 2.0169572830200195, | |
| "rewards/rejected": -1.646289348602295, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4754455699448647, | |
| "grad_norm": 51.8551798480754, | |
| "learning_rate": 4.717591737952344e-07, | |
| "logits/chosen": -1.225889801979065, | |
| "logits/rejected": -1.2406290769577026, | |
| "logps/chosen": -42.99085235595703, | |
| "logps/rejected": -63.13933563232422, | |
| "loss": 0.31, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.2732833921909332, | |
| "rewards/margins": 2.319049835205078, | |
| "rewards/rejected": -2.045766592025757, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4800615463520964, | |
| "grad_norm": 45.48745944142938, | |
| "learning_rate": 4.7082098858266837e-07, | |
| "logits/chosen": -1.2216678857803345, | |
| "logits/rejected": -1.2374674081802368, | |
| "logps/chosen": -39.35933303833008, | |
| "logps/rejected": -69.94754791259766, | |
| "loss": 0.3897, | |
| "rewards/accuracies": 0.7083333134651184, | |
| "rewards/chosen": 0.15072119235992432, | |
| "rewards/margins": 2.5561957359313965, | |
| "rewards/rejected": -2.4054746627807617, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4846775227593281, | |
| "grad_norm": 23.330292154413335, | |
| "learning_rate": 4.698684378016222e-07, | |
| "logits/chosen": -1.235012412071228, | |
| "logits/rejected": -1.2410335540771484, | |
| "logps/chosen": -51.16822052001953, | |
| "logps/rejected": -67.03107452392578, | |
| "loss": 0.2774, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.5210573673248291, | |
| "rewards/margins": 2.396049976348877, | |
| "rewards/rejected": -1.8749926090240479, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.48929349916655984, | |
| "grad_norm": 44.38100018119066, | |
| "learning_rate": 4.6890158342054174e-07, | |
| "logits/chosen": -1.2579890489578247, | |
| "logits/rejected": -1.2686585187911987, | |
| "logps/chosen": -43.69232177734375, | |
| "logps/rejected": -59.38545608520508, | |
| "loss": 0.3209, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.6999004483222961, | |
| "rewards/margins": 2.572566032409668, | |
| "rewards/rejected": -1.8726658821105957, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4939094755737915, | |
| "grad_norm": 48.05758145861009, | |
| "learning_rate": 4.679204883383973e-07, | |
| "logits/chosen": -1.291759729385376, | |
| "logits/rejected": -1.311813235282898, | |
| "logps/chosen": -42.26872253417969, | |
| "logps/rejected": -75.81320190429688, | |
| "loss": 0.2963, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.7384893894195557, | |
| "rewards/margins": 3.459284782409668, | |
| "rewards/rejected": -2.7207956314086914, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4985254519810232, | |
| "grad_norm": 49.231820908366814, | |
| "learning_rate": 4.669252163805919e-07, | |
| "logits/chosen": -1.2944141626358032, | |
| "logits/rejected": -1.3142738342285156, | |
| "logps/chosen": -45.93452453613281, | |
| "logps/rejected": -61.74488830566406, | |
| "loss": 0.3873, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.7554634809494019, | |
| "rewards/margins": 2.4260568618774414, | |
| "rewards/rejected": -1.6705933809280396, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5031414283882549, | |
| "grad_norm": 39.371230612811594, | |
| "learning_rate": 4.65915832294809e-07, | |
| "logits/chosen": -1.2367289066314697, | |
| "logits/rejected": -1.2460105419158936, | |
| "logps/chosen": -43.46434783935547, | |
| "logps/rejected": -65.94113159179688, | |
| "loss": 0.3123, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.8203165531158447, | |
| "rewards/margins": 2.6564347743988037, | |
| "rewards/rejected": -1.836118221282959, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5077574047954866, | |
| "grad_norm": 36.504230004493856, | |
| "learning_rate": 4.6489240174680026e-07, | |
| "logits/chosen": -1.274338722229004, | |
| "logits/rejected": -1.274038314819336, | |
| "logps/chosen": -46.72291564941406, | |
| "logps/rejected": -46.98860549926758, | |
| "loss": 0.3653, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.7758186459541321, | |
| "rewards/margins": 1.619003415107727, | |
| "rewards/rejected": -0.8431846499443054, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5077574047954866, | |
| "eval_logits/chosen": -1.1508095264434814, | |
| "eval_logits/rejected": -1.158846139907837, | |
| "eval_logps/chosen": -47.08699035644531, | |
| "eval_logps/rejected": -59.3014030456543, | |
| "eval_loss": 0.30850934982299805, | |
| "eval_rewards/accuracies": 0.8029953837394714, | |
| "eval_rewards/chosen": 0.7732176184654236, | |
| "eval_rewards/margins": 2.377366065979004, | |
| "eval_rewards/rejected": -1.6041483879089355, | |
| "eval_runtime": 227.3591, | |
| "eval_samples_per_second": 7.627, | |
| "eval_steps_per_second": 1.909, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5123733812027182, | |
| "grad_norm": 58.78605593626179, | |
| "learning_rate": 4.638549913161138e-07, | |
| "logits/chosen": -1.163451910018921, | |
| "logits/rejected": -1.1646764278411865, | |
| "logps/chosen": -52.2976188659668, | |
| "logps/rejected": -53.40987777709961, | |
| "loss": 0.2728, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.7431490421295166, | |
| "rewards/margins": 2.2729909420013428, | |
| "rewards/rejected": -1.5298418998718262, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.51698935760995, | |
| "grad_norm": 47.481692611666446, | |
| "learning_rate": 4.6280366849176267e-07, | |
| "logits/chosen": -1.1689667701721191, | |
| "logits/rejected": -1.176836371421814, | |
| "logps/chosen": -48.97719192504883, | |
| "logps/rejected": -53.70207977294922, | |
| "loss": 0.3088, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.4173290729522705, | |
| "rewards/margins": 1.9205392599105835, | |
| "rewards/rejected": -1.503210186958313, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5216053340171817, | |
| "grad_norm": 26.23879949976686, | |
| "learning_rate": 4.6173850166783446e-07, | |
| "logits/chosen": -1.1005306243896484, | |
| "logits/rejected": -1.1028097867965698, | |
| "logps/chosen": -48.85009002685547, | |
| "logps/rejected": -60.73183822631836, | |
| "loss": 0.2688, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.3439117670059204, | |
| "rewards/margins": 2.138643741607666, | |
| "rewards/rejected": -1.7947319746017456, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5262213104244133, | |
| "grad_norm": 42.68538113603284, | |
| "learning_rate": 4.606595601390417e-07, | |
| "logits/chosen": -1.1647553443908691, | |
| "logits/rejected": -1.1835236549377441, | |
| "logps/chosen": -47.58196258544922, | |
| "logps/rejected": -73.24917602539062, | |
| "loss": 0.2677, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.032037004828453064, | |
| "rewards/margins": 2.9576616287231445, | |
| "rewards/rejected": -2.9256248474121094, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5308372868316451, | |
| "grad_norm": 47.39085343776366, | |
| "learning_rate": 4.595669140962143e-07, | |
| "logits/chosen": -1.2832393646240234, | |
| "logits/rejected": -1.3168139457702637, | |
| "logps/chosen": -42.34735870361328, | |
| "logps/rejected": -91.76000213623047, | |
| "loss": 0.3269, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.22074511647224426, | |
| "rewards/margins": 3.9149208068847656, | |
| "rewards/rejected": -4.1356658935546875, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5354532632388768, | |
| "grad_norm": 34.48196631675489, | |
| "learning_rate": 4.5846063462173284e-07, | |
| "logits/chosen": -1.2154712677001953, | |
| "logits/rejected": -1.2207145690917969, | |
| "logps/chosen": -46.80333709716797, | |
| "logps/rejected": -61.56758499145508, | |
| "loss": 0.299, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.07934803515672684, | |
| "rewards/margins": 2.2728195190429688, | |
| "rewards/rejected": -2.1934714317321777, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5400692396461084, | |
| "grad_norm": 53.84175279608908, | |
| "learning_rate": 4.573407936849044e-07, | |
| "logits/chosen": -1.235826015472412, | |
| "logits/rejected": -1.2379093170166016, | |
| "logps/chosen": -53.666229248046875, | |
| "logps/rejected": -56.39564895629883, | |
| "loss": 0.34, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.15499740839004517, | |
| "rewards/margins": 2.093020439147949, | |
| "rewards/rejected": -1.9380230903625488, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5446852160533402, | |
| "grad_norm": 55.22566434624621, | |
| "learning_rate": 4.5620746413728063e-07, | |
| "logits/chosen": -1.168860912322998, | |
| "logits/rejected": -1.167116641998291, | |
| "logps/chosen": -59.95442199707031, | |
| "logps/rejected": -55.52897644042969, | |
| "loss": 0.2556, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.048602912575006485, | |
| "rewards/margins": 2.1055731773376465, | |
| "rewards/rejected": -2.0569701194763184, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5493011924605719, | |
| "grad_norm": 40.26544195874405, | |
| "learning_rate": 4.550607197079185e-07, | |
| "logits/chosen": -1.1609958410263062, | |
| "logits/rejected": -1.158744215965271, | |
| "logps/chosen": -46.247127532958984, | |
| "logps/rejected": -53.768096923828125, | |
| "loss": 0.2776, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.5583436489105225, | |
| "rewards/margins": 1.9103565216064453, | |
| "rewards/rejected": -1.3520128726959229, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5539171688678035, | |
| "grad_norm": 22.296733757945297, | |
| "learning_rate": 4.5390063499858353e-07, | |
| "logits/chosen": -1.143466591835022, | |
| "logits/rejected": -1.1510720252990723, | |
| "logps/chosen": -56.236507415771484, | |
| "logps/rejected": -72.3839111328125, | |
| "loss": 0.2216, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.30002227425575256, | |
| "rewards/margins": 2.8752281665802, | |
| "rewards/rejected": -2.5752060413360596, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5539171688678035, | |
| "eval_logits/chosen": -1.1655231714248657, | |
| "eval_logits/rejected": -1.1714258193969727, | |
| "eval_logps/chosen": -47.62031173706055, | |
| "eval_logps/rejected": -60.05898666381836, | |
| "eval_loss": 0.2956756353378296, | |
| "eval_rewards/accuracies": 0.8093317747116089, | |
| "eval_rewards/chosen": 0.5065575242042542, | |
| "eval_rewards/margins": 2.4894962310791016, | |
| "eval_rewards/rejected": -1.9829388856887817, | |
| "eval_runtime": 227.4458, | |
| "eval_samples_per_second": 7.624, | |
| "eval_steps_per_second": 1.908, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5585331452750353, | |
| "grad_norm": 25.00341489978113, | |
| "learning_rate": 4.5272728547889687e-07, | |
| "logits/chosen": -1.2413771152496338, | |
| "logits/rejected": -1.2534655332565308, | |
| "logps/chosen": -51.89939880371094, | |
| "logps/rejected": -61.60024642944336, | |
| "loss": 0.2157, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.444425106048584, | |
| "rewards/margins": 3.039713144302368, | |
| "rewards/rejected": -2.595287799835205, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5631491216822669, | |
| "grad_norm": 59.288877946216886, | |
| "learning_rate": 4.5154074748142535e-07, | |
| "logits/chosen": -1.1979435682296753, | |
| "logits/rejected": -1.200371265411377, | |
| "logps/chosen": -51.88737106323242, | |
| "logps/rejected": -62.96666717529297, | |
| "loss": 0.3125, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.536859929561615, | |
| "rewards/margins": 2.3762218952178955, | |
| "rewards/rejected": -1.8393617868423462, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5677650980894986, | |
| "grad_norm": 49.81197105903562, | |
| "learning_rate": 4.503410981967158e-07, | |
| "logits/chosen": -1.1927361488342285, | |
| "logits/rejected": -1.2023940086364746, | |
| "logps/chosen": -43.52791213989258, | |
| "logps/rejected": -68.50804138183594, | |
| "loss": 0.3784, | |
| "rewards/accuracies": 0.7083333134651184, | |
| "rewards/chosen": 0.9149570465087891, | |
| "rewards/margins": 2.78161883354187, | |
| "rewards/rejected": -1.8666616678237915, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5723810744967304, | |
| "grad_norm": 36.729769896842384, | |
| "learning_rate": 4.4912841566827333e-07, | |
| "logits/chosen": -1.2108688354492188, | |
| "logits/rejected": -1.2207088470458984, | |
| "logps/chosen": -47.57191848754883, | |
| "logps/rejected": -65.24411010742188, | |
| "loss": 0.2484, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 1.3121000528335571, | |
| "rewards/margins": 2.9495744705200195, | |
| "rewards/rejected": -1.6374740600585938, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.576997050903962, | |
| "grad_norm": 52.94413715920982, | |
| "learning_rate": 4.4790277878748415e-07, | |
| "logits/chosen": -1.2431470155715942, | |
| "logits/rejected": -1.2514811754226685, | |
| "logps/chosen": -41.430206298828125, | |
| "logps/rejected": -57.99823760986328, | |
| "loss": 0.2988, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.8287627696990967, | |
| "rewards/margins": 2.483140707015991, | |
| "rewards/rejected": -1.6543784141540527, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5816130273111938, | |
| "grad_norm": 35.54104658985613, | |
| "learning_rate": 4.466642672884835e-07, | |
| "logits/chosen": -1.1631712913513184, | |
| "logits/rejected": -1.168829321861267, | |
| "logps/chosen": -44.09839630126953, | |
| "logps/rejected": -58.30562973022461, | |
| "loss": 0.2578, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.6901536583900452, | |
| "rewards/margins": 2.6523261070251465, | |
| "rewards/rejected": -1.9621726274490356, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5862290037184255, | |
| "grad_norm": 37.73217497274028, | |
| "learning_rate": 4.454129617429682e-07, | |
| "logits/chosen": -1.2659639120101929, | |
| "logits/rejected": -1.26548433303833, | |
| "logps/chosen": -48.81840515136719, | |
| "logps/rejected": -50.18553161621094, | |
| "loss": 0.303, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.6821924448013306, | |
| "rewards/margins": 2.1100261211395264, | |
| "rewards/rejected": -1.4278337955474854, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5908449801256571, | |
| "grad_norm": 19.374735161669783, | |
| "learning_rate": 4.441489435549551e-07, | |
| "logits/chosen": -1.1399126052856445, | |
| "logits/rejected": -1.1459710597991943, | |
| "logps/chosen": -51.82578659057617, | |
| "logps/rejected": -68.86441040039062, | |
| "loss": 0.2382, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.7167325615882874, | |
| "rewards/margins": 3.1428277492523193, | |
| "rewards/rejected": -2.426095485687256, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5954609565328889, | |
| "grad_norm": 41.763567555359415, | |
| "learning_rate": 4.4287229495548573e-07, | |
| "logits/chosen": -1.183684229850769, | |
| "logits/rejected": -1.190173625946045, | |
| "logps/chosen": -53.23274230957031, | |
| "logps/rejected": -66.342041015625, | |
| "loss": 0.2552, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.7042552828788757, | |
| "rewards/margins": 3.324800729751587, | |
| "rewards/rejected": -2.6205453872680664, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6000769329401205, | |
| "grad_norm": 53.08605142258815, | |
| "learning_rate": 4.415830989972761e-07, | |
| "logits/chosen": -1.1716980934143066, | |
| "logits/rejected": -1.1756532192230225, | |
| "logps/chosen": -48.24959182739258, | |
| "logps/rejected": -57.04353713989258, | |
| "loss": 0.2986, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.6846582889556885, | |
| "rewards/margins": 2.4726505279541016, | |
| "rewards/rejected": -1.7879924774169922, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6000769329401205, | |
| "eval_logits/chosen": -1.1839433908462524, | |
| "eval_logits/rejected": -1.1875815391540527, | |
| "eval_logps/chosen": -47.51332092285156, | |
| "eval_logps/rejected": -60.25726318359375, | |
| "eval_loss": 0.2889851927757263, | |
| "eval_rewards/accuracies": 0.8139401078224182, | |
| "eval_rewards/chosen": 0.5600550174713135, | |
| "eval_rewards/margins": 2.6421334743499756, | |
| "eval_rewards/rejected": -2.082078218460083, | |
| "eval_runtime": 227.3437, | |
| "eval_samples_per_second": 7.627, | |
| "eval_steps_per_second": 1.909, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6046929093473522, | |
| "grad_norm": 40.94616402060229, | |
| "learning_rate": 4.402814395493142e-07, | |
| "logits/chosen": -1.2763242721557617, | |
| "logits/rejected": -1.2830497026443481, | |
| "logps/chosen": -46.679168701171875, | |
| "logps/rejected": -46.02785873413086, | |
| "loss": 0.3389, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.5309950113296509, | |
| "rewards/margins": 1.931261658668518, | |
| "rewards/rejected": -1.4002668857574463, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.609308885754584, | |
| "grad_norm": 37.45563925423882, | |
| "learning_rate": 4.3896740129140354e-07, | |
| "logits/chosen": -1.2274925708770752, | |
| "logits/rejected": -1.2251402139663696, | |
| "logps/chosen": -49.84899139404297, | |
| "logps/rejected": -48.70707702636719, | |
| "loss": 0.2565, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.42680761218070984, | |
| "rewards/margins": 2.4617788791656494, | |
| "rewards/rejected": -2.0349714756011963, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6139248621618156, | |
| "grad_norm": 39.43164361196429, | |
| "learning_rate": 4.3764106970865456e-07, | |
| "logits/chosen": -1.2649712562561035, | |
| "logits/rejected": -1.2683297395706177, | |
| "logps/chosen": -42.68367004394531, | |
| "logps/rejected": -59.22146224975586, | |
| "loss": 0.3223, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.032459404319524765, | |
| "rewards/margins": 2.20072603225708, | |
| "rewards/rejected": -2.168266534805298, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6185408385690473, | |
| "grad_norm": 38.60665005525895, | |
| "learning_rate": 4.3630253108592305e-07, | |
| "logits/chosen": -1.1689542531967163, | |
| "logits/rejected": -1.1707943677902222, | |
| "logps/chosen": -55.54972839355469, | |
| "logps/rejected": -63.90589141845703, | |
| "loss": 0.2454, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.198521688580513, | |
| "rewards/margins": 2.9068400859832764, | |
| "rewards/rejected": -3.1053614616394043, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6231568149762791, | |
| "grad_norm": 52.06719541190528, | |
| "learning_rate": 4.3495187250219723e-07, | |
| "logits/chosen": -1.2435555458068848, | |
| "logits/rejected": -1.247399926185608, | |
| "logps/chosen": -44.81391906738281, | |
| "logps/rejected": -66.15491485595703, | |
| "loss": 0.3261, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.1677350103855133, | |
| "rewards/margins": 2.8860220909118652, | |
| "rewards/rejected": -3.053757429122925, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6277727913835107, | |
| "grad_norm": 34.05944822234142, | |
| "learning_rate": 4.3358918182493253e-07, | |
| "logits/chosen": -1.1470799446105957, | |
| "logits/rejected": -1.146039366722107, | |
| "logps/chosen": -48.27168273925781, | |
| "logps/rejected": -55.25007629394531, | |
| "loss": 0.2124, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.0416293665766716, | |
| "rewards/margins": 2.1943235397338867, | |
| "rewards/rejected": -2.1526942253112793, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6323887677907424, | |
| "grad_norm": 32.55880144659725, | |
| "learning_rate": 4.3221454770433554e-07, | |
| "logits/chosen": -1.2215373516082764, | |
| "logits/rejected": -1.223750352859497, | |
| "logps/chosen": -53.73216247558594, | |
| "logps/rejected": -58.15590286254883, | |
| "loss": 0.217, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.4631071090698242, | |
| "rewards/margins": 2.812567949295044, | |
| "rewards/rejected": -2.3494608402252197, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6370047441979741, | |
| "grad_norm": 37.29286642220413, | |
| "learning_rate": 4.308280595675966e-07, | |
| "logits/chosen": -1.2593313455581665, | |
| "logits/rejected": -1.2601191997528076, | |
| "logps/chosen": -51.24105453491211, | |
| "logps/rejected": -58.06007766723633, | |
| "loss": 0.305, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.41045814752578735, | |
| "rewards/margins": 2.3772544860839844, | |
| "rewards/rejected": -1.9667962789535522, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6416207206052058, | |
| "grad_norm": 27.73832644868097, | |
| "learning_rate": 4.2942980761307227e-07, | |
| "logits/chosen": -1.2309229373931885, | |
| "logits/rejected": -1.2335686683654785, | |
| "logps/chosen": -48.84478759765625, | |
| "logps/rejected": -58.93284225463867, | |
| "loss": 0.2202, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.5729148387908936, | |
| "rewards/margins": 2.5622036457061768, | |
| "rewards/rejected": -1.9892889261245728, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6462366970124375, | |
| "grad_norm": 49.846247584121116, | |
| "learning_rate": 4.2801988280441765e-07, | |
| "logits/chosen": -1.2041369676589966, | |
| "logits/rejected": -1.2069140672683716, | |
| "logps/chosen": -50.05432891845703, | |
| "logps/rejected": -59.74306106567383, | |
| "loss": 0.2356, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.4567064046859741, | |
| "rewards/margins": 2.9142255783081055, | |
| "rewards/rejected": -1.4575190544128418, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6462366970124375, | |
| "eval_logits/chosen": -1.1684162616729736, | |
| "eval_logits/rejected": -1.1727546453475952, | |
| "eval_logps/chosen": -45.66713333129883, | |
| "eval_logps/rejected": -58.96503829956055, | |
| "eval_loss": 0.28808632493019104, | |
| "eval_rewards/accuracies": 0.807603657245636, | |
| "eval_rewards/chosen": 1.483147144317627, | |
| "eval_rewards/margins": 2.9191133975982666, | |
| "eval_rewards/rejected": -1.4359666109085083, | |
| "eval_runtime": 227.2327, | |
| "eval_samples_per_second": 7.631, | |
| "eval_steps_per_second": 1.91, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6508526734196692, | |
| "grad_norm": 37.200050161533866, | |
| "learning_rate": 4.2659837686466813e-07, | |
| "logits/chosen": -1.2258718013763428, | |
| "logits/rejected": -1.226365327835083, | |
| "logps/chosen": -45.65945816040039, | |
| "logps/rejected": -56.360023498535156, | |
| "loss": 0.2663, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 1.7170668840408325, | |
| "rewards/margins": 2.946417808532715, | |
| "rewards/rejected": -1.2293510437011719, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6554686498269009, | |
| "grad_norm": 46.91080670819706, | |
| "learning_rate": 4.25165382270273e-07, | |
| "logits/chosen": -1.194913387298584, | |
| "logits/rejected": -1.1949591636657715, | |
| "logps/chosen": -41.45118713378906, | |
| "logps/rejected": -49.03273391723633, | |
| "loss": 0.2576, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 1.5383753776550293, | |
| "rewards/margins": 2.4985907077789307, | |
| "rewards/rejected": -0.9602153301239014, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6600846262341326, | |
| "grad_norm": 38.28106252628991, | |
| "learning_rate": 4.2372099224507875e-07, | |
| "logits/chosen": -1.262522578239441, | |
| "logits/rejected": -1.2750235795974731, | |
| "logps/chosen": -38.96574401855469, | |
| "logps/rejected": -67.78875732421875, | |
| "loss": 0.2815, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 1.0857527256011963, | |
| "rewards/margins": 3.374871253967285, | |
| "rewards/rejected": -2.289118766784668, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6647006026413643, | |
| "grad_norm": 48.61343601009988, | |
| "learning_rate": 4.2226530075426503e-07, | |
| "logits/chosen": -1.1528538465499878, | |
| "logits/rejected": -1.1555874347686768, | |
| "logps/chosen": -56.5745849609375, | |
| "logps/rejected": -58.78268051147461, | |
| "loss": 0.2806, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.9941625595092773, | |
| "rewards/margins": 2.6789402961730957, | |
| "rewards/rejected": -1.6847774982452393, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.669316579048596, | |
| "grad_norm": 41.895687140764245, | |
| "learning_rate": 4.2079840249823106e-07, | |
| "logits/chosen": -1.1860905885696411, | |
| "logits/rejected": -1.1894373893737793, | |
| "logps/chosen": -50.26545715332031, | |
| "logps/rejected": -72.03532409667969, | |
| "loss": 0.289, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.2916366159915924, | |
| "rewards/margins": 3.1359870433807373, | |
| "rewards/rejected": -2.8443503379821777, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6739325554558276, | |
| "grad_norm": 41.288114692753076, | |
| "learning_rate": 4.193203929064353e-07, | |
| "logits/chosen": -1.2005698680877686, | |
| "logits/rejected": -1.2083783149719238, | |
| "logps/chosen": -51.12635803222656, | |
| "logps/rejected": -71.51712036132812, | |
| "loss": 0.345, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.0697176456451416, | |
| "rewards/margins": 2.9638874530792236, | |
| "rewards/rejected": -2.894169807434082, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6785485318630594, | |
| "grad_norm": 42.3623221323418, | |
| "learning_rate": 4.1783136813118705e-07, | |
| "logits/chosen": -1.222592830657959, | |
| "logits/rejected": -1.225614309310913, | |
| "logps/chosen": -50.67860794067383, | |
| "logps/rejected": -61.38881301879883, | |
| "loss": 0.2915, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": -0.12122215330600739, | |
| "rewards/margins": 2.681208610534668, | |
| "rewards/rejected": -2.8024303913116455, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6831645082702911, | |
| "grad_norm": 22.982390981962695, | |
| "learning_rate": 4.163314250413913e-07, | |
| "logits/chosen": -1.1687074899673462, | |
| "logits/rejected": -1.165205955505371, | |
| "logps/chosen": -46.08445739746094, | |
| "logps/rejected": -56.52994918823242, | |
| "loss": 0.1833, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.2055283784866333, | |
| "rewards/margins": 2.5608911514282227, | |
| "rewards/rejected": -2.355362892150879, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6877804846775227, | |
| "grad_norm": 37.84043310920863, | |
| "learning_rate": 4.1482066121624716e-07, | |
| "logits/chosen": -1.207397222518921, | |
| "logits/rejected": -1.208192229270935, | |
| "logps/chosen": -49.74457550048828, | |
| "logps/rejected": -50.62785339355469, | |
| "loss": 0.3247, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.17621606588363647, | |
| "rewards/margins": 2.258789539337158, | |
| "rewards/rejected": -2.082573413848877, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6923964610847545, | |
| "grad_norm": 41.026889158159065, | |
| "learning_rate": 4.1329917493889933e-07, | |
| "logits/chosen": -1.3157416582107544, | |
| "logits/rejected": -1.3190845251083374, | |
| "logps/chosen": -46.27326965332031, | |
| "logps/rejected": -61.60360336303711, | |
| "loss": 0.2407, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.021905170753598213, | |
| "rewards/margins": 2.830016613006592, | |
| "rewards/rejected": -2.8081114292144775, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6923964610847545, | |
| "eval_logits/chosen": -1.1676603555679321, | |
| "eval_logits/rejected": -1.1713868379592896, | |
| "eval_logps/chosen": -47.389183044433594, | |
| "eval_logps/rejected": -60.791683197021484, | |
| "eval_loss": 0.2743883430957794, | |
| "eval_rewards/accuracies": 0.8133640289306641, | |
| "eval_rewards/chosen": 0.6221204400062561, | |
| "eval_rewards/margins": 2.9714088439941406, | |
| "eval_rewards/rejected": -2.349287986755371, | |
| "eval_runtime": 227.521, | |
| "eval_samples_per_second": 7.621, | |
| "eval_steps_per_second": 1.908, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6970124374919862, | |
| "grad_norm": 42.14326526501706, | |
| "learning_rate": 4.117670651900446e-07, | |
| "logits/chosen": -1.2038425207138062, | |
| "logits/rejected": -1.2065904140472412, | |
| "logps/chosen": -50.9475212097168, | |
| "logps/rejected": -56.376590728759766, | |
| "loss": 0.3052, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.660882830619812, | |
| "rewards/margins": 2.4505326747894287, | |
| "rewards/rejected": -1.789649486541748, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7016284138992178, | |
| "grad_norm": 59.036735448430065, | |
| "learning_rate": 4.1022443164149237e-07, | |
| "logits/chosen": -1.1808403730392456, | |
| "logits/rejected": -1.1900469064712524, | |
| "logps/chosen": -51.46991729736328, | |
| "logps/rejected": -69.61980438232422, | |
| "loss": 0.2946, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.5994511842727661, | |
| "rewards/margins": 3.409363269805908, | |
| "rewards/rejected": -2.8099122047424316, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7062443903064496, | |
| "grad_norm": 29.330872231920097, | |
| "learning_rate": 4.086713746496808e-07, | |
| "logits/chosen": -1.2124900817871094, | |
| "logits/rejected": -1.2128535509109497, | |
| "logps/chosen": -45.35523223876953, | |
| "logps/rejected": -56.160545349121094, | |
| "loss": 0.3006, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 0.7718818187713623, | |
| "rewards/margins": 2.726104259490967, | |
| "rewards/rejected": -1.954222559928894, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7108603667136812, | |
| "grad_norm": 34.74292770737631, | |
| "learning_rate": 4.0710799524914805e-07, | |
| "logits/chosen": -1.1383283138275146, | |
| "logits/rejected": -1.1413955688476562, | |
| "logps/chosen": -55.99458312988281, | |
| "logps/rejected": -62.90273666381836, | |
| "loss": 0.2295, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.6053013205528259, | |
| "rewards/margins": 2.9940264225006104, | |
| "rewards/rejected": -2.3887250423431396, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7154763431209129, | |
| "grad_norm": 30.476243441323902, | |
| "learning_rate": 4.055343951459592e-07, | |
| "logits/chosen": -1.191731572151184, | |
| "logits/rejected": -1.1988056898117065, | |
| "logps/chosen": -44.046875, | |
| "logps/rejected": -64.41764068603516, | |
| "loss": 0.2429, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.2551959455013275, | |
| "rewards/margins": 3.2936155796051025, | |
| "rewards/rejected": -3.038419485092163, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7200923195281447, | |
| "grad_norm": 30.700879501780552, | |
| "learning_rate": 4.0395067671108985e-07, | |
| "logits/chosen": -1.2448992729187012, | |
| "logits/rejected": -1.2440135478973389, | |
| "logps/chosen": -41.98812484741211, | |
| "logps/rejected": -49.34878921508789, | |
| "loss": 0.2697, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.3203623592853546, | |
| "rewards/margins": 2.6056876182556152, | |
| "rewards/rejected": -2.285325050354004, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7247082959353763, | |
| "grad_norm": 42.617870752566596, | |
| "learning_rate": 4.0235694297376637e-07, | |
| "logits/chosen": -1.139160394668579, | |
| "logits/rejected": -1.1415181159973145, | |
| "logps/chosen": -58.74495315551758, | |
| "logps/rejected": -63.99246597290039, | |
| "loss": 0.2614, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.32423093914985657, | |
| "rewards/margins": 2.9529130458831787, | |
| "rewards/rejected": -2.6286821365356445, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.729324272342608, | |
| "grad_norm": 40.78600953952273, | |
| "learning_rate": 4.0075329761476347e-07, | |
| "logits/chosen": -1.216194748878479, | |
| "logits/rejected": -1.2174675464630127, | |
| "logps/chosen": -50.62156677246094, | |
| "logps/rejected": -53.62016296386719, | |
| "loss": 0.2407, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.07387811690568924, | |
| "rewards/margins": 2.2588860988616943, | |
| "rewards/rejected": -2.332764148712158, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7339402487498398, | |
| "grad_norm": 30.002748027873594, | |
| "learning_rate": 3.991398449596588e-07, | |
| "logits/chosen": -1.2065101861953735, | |
| "logits/rejected": -1.211814045906067, | |
| "logps/chosen": -53.182777404785156, | |
| "logps/rejected": -66.31592559814453, | |
| "loss": 0.209, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.0831962302327156, | |
| "rewards/margins": 3.359541177749634, | |
| "rewards/rejected": -3.2763442993164062, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7385562251570714, | |
| "grad_norm": 23.665196371870486, | |
| "learning_rate": 3.9751668997204647e-07, | |
| "logits/chosen": -1.1500531435012817, | |
| "logits/rejected": -1.154016375541687, | |
| "logps/chosen": -52.478214263916016, | |
| "logps/rejected": -61.09043884277344, | |
| "loss": 0.199, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.16100816428661346, | |
| "rewards/margins": 2.913641929626465, | |
| "rewards/rejected": -2.752634048461914, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7385562251570714, | |
| "eval_logits/chosen": -1.152096152305603, | |
| "eval_logits/rejected": -1.1563034057617188, | |
| "eval_logps/chosen": -48.12975311279297, | |
| "eval_logps/rejected": -61.73065948486328, | |
| "eval_loss": 0.2689039707183838, | |
| "eval_rewards/accuracies": 0.8185483813285828, | |
| "eval_rewards/chosen": 0.2518383860588074, | |
| "eval_rewards/margins": 3.0706160068511963, | |
| "eval_rewards/rejected": -2.818777322769165, | |
| "eval_runtime": 227.7908, | |
| "eval_samples_per_second": 7.612, | |
| "eval_steps_per_second": 1.905, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7431722015643031, | |
| "grad_norm": 48.30684834494479, | |
| "learning_rate": 3.958839382467084e-07, | |
| "logits/chosen": -1.222053050994873, | |
| "logits/rejected": -1.2289328575134277, | |
| "logps/chosen": -43.89524459838867, | |
| "logps/rejected": -56.49114990234375, | |
| "loss": 0.3092, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.43327876925468445, | |
| "rewards/margins": 2.904792308807373, | |
| "rewards/rejected": -2.471513032913208, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7477881779715349, | |
| "grad_norm": 42.66239436350315, | |
| "learning_rate": 3.9424169600274494e-07, | |
| "logits/chosen": -1.2450088262557983, | |
| "logits/rejected": -1.24375581741333, | |
| "logps/chosen": -50.182308197021484, | |
| "logps/rejected": -56.25563049316406, | |
| "loss": 0.2956, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.13703547418117523, | |
| "rewards/margins": 2.3541619777679443, | |
| "rewards/rejected": -2.4911975860595703, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7524041543787665, | |
| "grad_norm": 27.173798633882665, | |
| "learning_rate": 3.9259007007666436e-07, | |
| "logits/chosen": -1.212989330291748, | |
| "logits/rejected": -1.2185275554656982, | |
| "logps/chosen": -51.97760009765625, | |
| "logps/rejected": -63.76836395263672, | |
| "loss": 0.282, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.34300458431243896, | |
| "rewards/margins": 2.9810492992401123, | |
| "rewards/rejected": -2.638044595718384, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7570201307859982, | |
| "grad_norm": 41.510696079488966, | |
| "learning_rate": 3.909291679154332e-07, | |
| "logits/chosen": -1.2237902879714966, | |
| "logits/rejected": -1.232872486114502, | |
| "logps/chosen": -48.094669342041016, | |
| "logps/rejected": -73.24827575683594, | |
| "loss": 0.3194, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": -0.10229718685150146, | |
| "rewards/margins": 3.730062246322632, | |
| "rewards/rejected": -3.832359790802002, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7616361071932299, | |
| "grad_norm": 36.28616141840599, | |
| "learning_rate": 3.892590975694858e-07, | |
| "logits/chosen": -1.2199351787567139, | |
| "logits/rejected": -1.2302178144454956, | |
| "logps/chosen": -44.97592544555664, | |
| "logps/rejected": -71.27304077148438, | |
| "loss": 0.23, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.4918098449707031, | |
| "rewards/margins": 4.322786331176758, | |
| "rewards/rejected": -3.830976724624634, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7662520836004616, | |
| "grad_norm": 23.858921961912298, | |
| "learning_rate": 3.875799676856952e-07, | |
| "logits/chosen": -1.1334155797958374, | |
| "logits/rejected": -1.1350713968276978, | |
| "logps/chosen": -50.02524948120117, | |
| "logps/rejected": -62.23411560058594, | |
| "loss": 0.2071, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.42125022411346436, | |
| "rewards/margins": 3.270005226135254, | |
| "rewards/rejected": -2.8487555980682373, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7708680600076933, | |
| "grad_norm": 46.04858312353762, | |
| "learning_rate": 3.858918875003053e-07, | |
| "logits/chosen": -1.2362879514694214, | |
| "logits/rejected": -1.2460957765579224, | |
| "logps/chosen": -48.63706970214844, | |
| "logps/rejected": -72.94087219238281, | |
| "loss": 0.2514, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.5030243396759033, | |
| "rewards/margins": 4.145318031311035, | |
| "rewards/rejected": -3.64229416847229, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.775484036414925, | |
| "grad_norm": 21.37867116532646, | |
| "learning_rate": 3.8419496683182396e-07, | |
| "logits/chosen": -1.1005958318710327, | |
| "logits/rejected": -1.1029105186462402, | |
| "logps/chosen": -46.87886047363281, | |
| "logps/rejected": -63.63945007324219, | |
| "loss": 0.1861, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.5341323018074036, | |
| "rewards/margins": 3.1560137271881104, | |
| "rewards/rejected": -2.6218814849853516, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7801000128221567, | |
| "grad_norm": 34.41230269099275, | |
| "learning_rate": 3.824893160738792e-07, | |
| "logits/chosen": -1.1643880605697632, | |
| "logits/rejected": -1.1722698211669922, | |
| "logps/chosen": -47.30472946166992, | |
| "logps/rejected": -66.55563354492188, | |
| "loss": 0.2727, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.9107217788696289, | |
| "rewards/margins": 3.5939722061157227, | |
| "rewards/rejected": -2.683250904083252, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7847159892293883, | |
| "grad_norm": 20.704541622004893, | |
| "learning_rate": 3.8077504618803737e-07, | |
| "logits/chosen": -1.1662912368774414, | |
| "logits/rejected": -1.1619421243667603, | |
| "logps/chosen": -56.10293197631836, | |
| "logps/rejected": -53.84746170043945, | |
| "loss": 0.2129, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.9300886988639832, | |
| "rewards/margins": 2.765589475631714, | |
| "rewards/rejected": -1.8355004787445068, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7847159892293883, | |
| "eval_logits/chosen": -1.1408087015151978, | |
| "eval_logits/rejected": -1.1458781957626343, | |
| "eval_logps/chosen": -47.36520004272461, | |
| "eval_logps/rejected": -61.242679595947266, | |
| "eval_loss": 0.26537051796913147, | |
| "eval_rewards/accuracies": 0.8185483813285828, | |
| "eval_rewards/chosen": 0.6341149806976318, | |
| "eval_rewards/margins": 3.208899736404419, | |
| "eval_rewards/rejected": -2.574784755706787, | |
| "eval_runtime": 227.4228, | |
| "eval_samples_per_second": 7.625, | |
| "eval_steps_per_second": 1.908, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7893319656366201, | |
| "grad_norm": 29.91569671776041, | |
| "learning_rate": 3.7905226869658446e-07, | |
| "logits/chosen": -1.1567282676696777, | |
| "logits/rejected": -1.1574435234069824, | |
| "logps/chosen": -50.19194793701172, | |
| "logps/rejected": -61.67422866821289, | |
| "loss": 0.2453, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.956231415271759, | |
| "rewards/margins": 3.3507864475250244, | |
| "rewards/rejected": -2.394554853439331, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7939479420438518, | |
| "grad_norm": 35.82510237615759, | |
| "learning_rate": 3.773210956752709e-07, | |
| "logits/chosen": -1.1932220458984375, | |
| "logits/rejected": -1.1889605522155762, | |
| "logps/chosen": -46.48088073730469, | |
| "logps/rejected": -49.8135986328125, | |
| "loss": 0.2891, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": 0.5073845982551575, | |
| "rewards/margins": 2.379626989364624, | |
| "rewards/rejected": -1.8722424507141113, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7985639184510834, | |
| "grad_norm": 28.47228811073422, | |
| "learning_rate": 3.7558163974602093e-07, | |
| "logits/chosen": -1.1920644044876099, | |
| "logits/rejected": -1.201475977897644, | |
| "logps/chosen": -43.6226806640625, | |
| "logps/rejected": -64.39244842529297, | |
| "loss": 0.2762, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.10022352635860443, | |
| "rewards/margins": 3.255174398422241, | |
| "rewards/rejected": -3.1549510955810547, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.8031798948583152, | |
| "grad_norm": 41.618837869458865, | |
| "learning_rate": 3.73834014069605e-07, | |
| "logits/chosen": -1.1160246133804321, | |
| "logits/rejected": -1.1237598657608032, | |
| "logps/chosen": -55.9088249206543, | |
| "logps/rejected": -71.82222747802734, | |
| "loss": 0.236, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.08166421949863434, | |
| "rewards/margins": 3.3770830631256104, | |
| "rewards/rejected": -3.2954187393188477, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8077958712655469, | |
| "grad_norm": 34.292113974031, | |
| "learning_rate": 3.7207833233827914e-07, | |
| "logits/chosen": -1.221280813217163, | |
| "logits/rejected": -1.228824257850647, | |
| "logps/chosen": -51.23641586303711, | |
| "logps/rejected": -67.17003631591797, | |
| "loss": 0.2952, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": -0.29954856634140015, | |
| "rewards/margins": 3.7896947860717773, | |
| "rewards/rejected": -4.089242935180664, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8124118476727785, | |
| "grad_norm": 27.12194701535223, | |
| "learning_rate": 3.7031470876838786e-07, | |
| "logits/chosen": -1.1533750295639038, | |
| "logits/rejected": -1.1603755950927734, | |
| "logps/chosen": -49.57859802246094, | |
| "logps/rejected": -74.35897064208984, | |
| "loss": 0.2616, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.34589090943336487, | |
| "rewards/margins": 3.77805495262146, | |
| "rewards/rejected": -4.123946189880371, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8170278240800103, | |
| "grad_norm": 33.865318639168144, | |
| "learning_rate": 3.6854325809293455e-07, | |
| "logits/chosen": -1.2225959300994873, | |
| "logits/rejected": -1.2331852912902832, | |
| "logps/chosen": -42.565975189208984, | |
| "logps/rejected": -74.49568939208984, | |
| "loss": 0.2349, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.32742711901664734, | |
| "rewards/margins": 4.166980743408203, | |
| "rewards/rejected": -4.494408130645752, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.821643800487242, | |
| "grad_norm": 44.72468218463322, | |
| "learning_rate": 3.6676409555411653e-07, | |
| "logits/chosen": -1.1373627185821533, | |
| "logits/rejected": -1.1474027633666992, | |
| "logps/chosen": -51.0811882019043, | |
| "logps/rejected": -69.32566833496094, | |
| "loss": 0.2441, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.21452119946479797, | |
| "rewards/margins": 3.681729555130005, | |
| "rewards/rejected": -3.8962512016296387, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8262597768944736, | |
| "grad_norm": 18.3343181780076, | |
| "learning_rate": 3.6497733689582866e-07, | |
| "logits/chosen": -1.1717758178710938, | |
| "logits/rejected": -1.1715316772460938, | |
| "logps/chosen": -45.530574798583984, | |
| "logps/rejected": -56.84959030151367, | |
| "loss": 0.1942, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.07019754499197006, | |
| "rewards/margins": 3.02067232131958, | |
| "rewards/rejected": -2.950474739074707, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8308757533017054, | |
| "grad_norm": 35.844182387455, | |
| "learning_rate": 3.631830983561335e-07, | |
| "logits/chosen": -1.1136425733566284, | |
| "logits/rejected": -1.1134952306747437, | |
| "logps/chosen": -54.07844924926758, | |
| "logps/rejected": -60.01144027709961, | |
| "loss": 0.2174, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.009207261726260185, | |
| "rewards/margins": 3.035071611404419, | |
| "rewards/rejected": -3.025864601135254, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8308757533017054, | |
| "eval_logits/chosen": -1.137829303741455, | |
| "eval_logits/rejected": -1.142533779144287, | |
| "eval_logps/chosen": -49.13446807861328, | |
| "eval_logps/rejected": -63.1205940246582, | |
| "eval_loss": 0.2611147463321686, | |
| "eval_rewards/accuracies": 0.8231566548347473, | |
| "eval_rewards/chosen": -0.25051993131637573, | |
| "eval_rewards/margins": 3.2632253170013428, | |
| "eval_rewards/rejected": -3.5137455463409424, | |
| "eval_runtime": 227.4188, | |
| "eval_samples_per_second": 7.625, | |
| "eval_steps_per_second": 1.908, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.835491729708937, | |
| "grad_norm": 36.48149555986552, | |
| "learning_rate": 3.613814966596991e-07, | |
| "logits/chosen": -1.1987216472625732, | |
| "logits/rejected": -1.2045807838439941, | |
| "logps/chosen": -49.24374008178711, | |
| "logps/rejected": -66.37528228759766, | |
| "loss": 0.2604, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.2955743074417114, | |
| "rewards/margins": 3.364227533340454, | |
| "rewards/rejected": -3.659801959991455, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8401077061161687, | |
| "grad_norm": 17.123248215692428, | |
| "learning_rate": 3.595726490102059e-07, | |
| "logits/chosen": -1.1486543416976929, | |
| "logits/rejected": -1.155872106552124, | |
| "logps/chosen": -47.52320098876953, | |
| "logps/rejected": -72.59225463867188, | |
| "loss": 0.1309, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": -0.2556496858596802, | |
| "rewards/margins": 3.9981601238250732, | |
| "rewards/rejected": -4.253809928894043, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.8447236825234005, | |
| "grad_norm": 22.608323262045403, | |
| "learning_rate": 3.577566730827214e-07, | |
| "logits/chosen": -1.1728885173797607, | |
| "logits/rejected": -1.1819621324539185, | |
| "logps/chosen": -47.8609504699707, | |
| "logps/rejected": -66.17147064208984, | |
| "loss": 0.2715, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.29853469133377075, | |
| "rewards/margins": 3.493640661239624, | |
| "rewards/rejected": -3.792175769805908, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8493396589306321, | |
| "grad_norm": 37.233167771755866, | |
| "learning_rate": 3.559336870160453e-07, | |
| "logits/chosen": -1.1891751289367676, | |
| "logits/rejected": -1.193422794342041, | |
| "logps/chosen": -43.88676834106445, | |
| "logps/rejected": -61.30946350097656, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": -0.18390725553035736, | |
| "rewards/margins": 3.307628631591797, | |
| "rewards/rejected": -3.4915361404418945, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8539556353378638, | |
| "grad_norm": 26.22453096528665, | |
| "learning_rate": 3.541038094050241e-07, | |
| "logits/chosen": -1.155517339706421, | |
| "logits/rejected": -1.1603643894195557, | |
| "logps/chosen": -52.52191162109375, | |
| "logps/rejected": -73.11701202392578, | |
| "loss": 0.1787, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": -0.36556607484817505, | |
| "rewards/margins": 4.345615863800049, | |
| "rewards/rejected": -4.711181163787842, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8585716117450956, | |
| "grad_norm": 39.171509700373235, | |
| "learning_rate": 3.52267159292835e-07, | |
| "logits/chosen": -1.1714898347854614, | |
| "logits/rejected": -1.1753745079040527, | |
| "logps/chosen": -52.40201950073242, | |
| "logps/rejected": -71.5418701171875, | |
| "loss": 0.2399, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.4184909164905548, | |
| "rewards/margins": 3.973634958267212, | |
| "rewards/rejected": -4.392125606536865, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8631875881523272, | |
| "grad_norm": 23.054835061356226, | |
| "learning_rate": 3.5042385616324236e-07, | |
| "logits/chosen": -1.3357490301132202, | |
| "logits/rejected": -1.345274806022644, | |
| "logps/chosen": -41.36846923828125, | |
| "logps/rejected": -68.94818115234375, | |
| "loss": 0.2237, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.5928651094436646, | |
| "rewards/margins": 4.040990352630615, | |
| "rewards/rejected": -4.63385534286499, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8678035645595589, | |
| "grad_norm": 23.34114570013692, | |
| "learning_rate": 3.485740199328244e-07, | |
| "logits/chosen": -1.1034616231918335, | |
| "logits/rejected": -1.1045833826065063, | |
| "logps/chosen": -55.426727294921875, | |
| "logps/rejected": -59.57293701171875, | |
| "loss": 0.1942, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": -0.04073745757341385, | |
| "rewards/margins": 3.197631597518921, | |
| "rewards/rejected": -3.2383692264556885, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8724195409667906, | |
| "grad_norm": 19.26131705604103, | |
| "learning_rate": 3.4671777094317196e-07, | |
| "logits/chosen": -1.1123476028442383, | |
| "logits/rejected": -1.1115697622299194, | |
| "logps/chosen": -54.29782485961914, | |
| "logps/rejected": -60.28561782836914, | |
| "loss": 0.1845, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.4158029556274414, | |
| "rewards/margins": 3.2452781200408936, | |
| "rewards/rejected": -3.661081075668335, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8770355173740223, | |
| "grad_norm": 40.47170182440331, | |
| "learning_rate": 3.448552299530595e-07, | |
| "logits/chosen": -1.1856770515441895, | |
| "logits/rejected": -1.188659429550171, | |
| "logps/chosen": -47.90871047973633, | |
| "logps/rejected": -58.21343231201172, | |
| "loss": 0.2866, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.3236946761608124, | |
| "rewards/margins": 3.1452713012695312, | |
| "rewards/rejected": -3.468966245651245, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8770355173740223, | |
| "eval_logits/chosen": -1.1315786838531494, | |
| "eval_logits/rejected": -1.1364408731460571, | |
| "eval_logps/chosen": -48.8283576965332, | |
| "eval_logps/rejected": -63.289493560791016, | |
| "eval_loss": 0.2587234079837799, | |
| "eval_rewards/accuracies": 0.820852518081665, | |
| "eval_rewards/chosen": -0.09746361523866653, | |
| "eval_rewards/margins": 3.500731945037842, | |
| "eval_rewards/rejected": -3.5981955528259277, | |
| "eval_runtime": 227.2598, | |
| "eval_samples_per_second": 7.63, | |
| "eval_steps_per_second": 1.91, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.881651493781254, | |
| "grad_norm": 50.06622297059798, | |
| "learning_rate": 3.429865181305894e-07, | |
| "logits/chosen": -1.1800260543823242, | |
| "logits/rejected": -1.1820400953292847, | |
| "logps/chosen": -53.985992431640625, | |
| "logps/rejected": -63.98917770385742, | |
| "loss": 0.3147, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": -0.11410641670227051, | |
| "rewards/margins": 3.317570447921753, | |
| "rewards/rejected": -3.4316766262054443, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8862674701884857, | |
| "grad_norm": 50.74505917044877, | |
| "learning_rate": 3.411117570453091e-07, | |
| "logits/chosen": -1.1595518589019775, | |
| "logits/rejected": -1.1614227294921875, | |
| "logps/chosen": -48.32596969604492, | |
| "logps/rejected": -61.07521057128906, | |
| "loss": 0.2287, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": -0.054149970412254333, | |
| "rewards/margins": 3.22495174407959, | |
| "rewards/rejected": -3.279102087020874, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8908834465957174, | |
| "grad_norm": 33.82726208082807, | |
| "learning_rate": 3.392310686603025e-07, | |
| "logits/chosen": -1.2322266101837158, | |
| "logits/rejected": -1.235073447227478, | |
| "logps/chosen": -48.9878044128418, | |
| "logps/rejected": -57.65345001220703, | |
| "loss": 0.3178, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": -0.5888361930847168, | |
| "rewards/margins": 2.6325643062591553, | |
| "rewards/rejected": -3.221400260925293, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.895499423002949, | |
| "grad_norm": 31.968246601846598, | |
| "learning_rate": 3.3734457532425554e-07, | |
| "logits/chosen": -1.14549720287323, | |
| "logits/rejected": -1.1533808708190918, | |
| "logps/chosen": -47.60829544067383, | |
| "logps/rejected": -65.90504455566406, | |
| "loss": 0.2893, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": -0.4989345967769623, | |
| "rewards/margins": 3.784000873565674, | |
| "rewards/rejected": -4.28293514251709, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.9001153994101808, | |
| "grad_norm": 38.79859580208747, | |
| "learning_rate": 3.354523997634969e-07, | |
| "logits/chosen": -1.1520222425460815, | |
| "logits/rejected": -1.159006953239441, | |
| "logps/chosen": -51.533203125, | |
| "logps/rejected": -68.82389068603516, | |
| "loss": 0.2439, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.5372670292854309, | |
| "rewards/margins": 3.8664119243621826, | |
| "rewards/rejected": -4.403678894042969, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9047313758174125, | |
| "grad_norm": 48.99149097647652, | |
| "learning_rate": 3.3355466507401374e-07, | |
| "logits/chosen": -1.1964970827102661, | |
| "logits/rejected": -1.195936679840088, | |
| "logps/chosen": -49.486724853515625, | |
| "logps/rejected": -51.76087951660156, | |
| "loss": 0.3225, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": -0.6623955965042114, | |
| "rewards/margins": 2.538459539413452, | |
| "rewards/rejected": -3.200855255126953, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9093473522246441, | |
| "grad_norm": 34.31601060375262, | |
| "learning_rate": 3.3165149471344394e-07, | |
| "logits/chosen": -1.1766917705535889, | |
| "logits/rejected": -1.1769944429397583, | |
| "logps/chosen": -50.33845901489258, | |
| "logps/rejected": -60.9412841796875, | |
| "loss": 0.2826, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.7761159539222717, | |
| "rewards/margins": 2.734633445739746, | |
| "rewards/rejected": -3.510749340057373, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.9139633286318759, | |
| "grad_norm": 44.03751755180035, | |
| "learning_rate": 3.297430124930444e-07, | |
| "logits/chosen": -1.0980035066604614, | |
| "logits/rejected": -1.098144292831421, | |
| "logps/chosen": -56.62092590332031, | |
| "logps/rejected": -61.69611358642578, | |
| "loss": 0.3528, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.423416405916214, | |
| "rewards/margins": 2.6373236179351807, | |
| "rewards/rejected": -3.0607402324676514, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.9185793050391076, | |
| "grad_norm": 20.653109576582093, | |
| "learning_rate": 3.2782934256963647e-07, | |
| "logits/chosen": -1.1482434272766113, | |
| "logits/rejected": -1.1585140228271484, | |
| "logps/chosen": -52.95622253417969, | |
| "logps/rejected": -71.6562728881836, | |
| "loss": 0.2816, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": -0.51191645860672, | |
| "rewards/margins": 3.753627300262451, | |
| "rewards/rejected": -4.265543460845947, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.9231952814463392, | |
| "grad_norm": 33.58507227677532, | |
| "learning_rate": 3.259106094375289e-07, | |
| "logits/chosen": -1.1832419633865356, | |
| "logits/rejected": -1.1917306184768677, | |
| "logps/chosen": -46.096675872802734, | |
| "logps/rejected": -71.52079010009766, | |
| "loss": 0.2501, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.2719300389289856, | |
| "rewards/margins": 4.045370101928711, | |
| "rewards/rejected": -4.317299842834473, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9231952814463392, | |
| "eval_logits/chosen": -1.1317284107208252, | |
| "eval_logits/rejected": -1.1366225481033325, | |
| "eval_logps/chosen": -49.24271011352539, | |
| "eval_logps/rejected": -63.64417266845703, | |
| "eval_loss": 0.2558155655860901, | |
| "eval_rewards/accuracies": 0.820852518081665, | |
| "eval_rewards/chosen": -0.30464252829551697, | |
| "eval_rewards/margins": 3.470890522003174, | |
| "eval_rewards/rejected": -3.775533437728882, | |
| "eval_runtime": 227.3955, | |
| "eval_samples_per_second": 7.625, | |
| "eval_steps_per_second": 1.909, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.927811257853571, | |
| "grad_norm": 40.05600195486227, | |
| "learning_rate": 3.239869379204189e-07, | |
| "logits/chosen": -1.165150761604309, | |
| "logits/rejected": -1.1658389568328857, | |
| "logps/chosen": -51.360679626464844, | |
| "logps/rejected": -65.45414733886719, | |
| "loss": 0.2124, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.36249446868896484, | |
| "rewards/margins": 3.780195951461792, | |
| "rewards/rejected": -4.142690658569336, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.9324272342608027, | |
| "grad_norm": 39.216765683029095, | |
| "learning_rate": 3.2205845316327144e-07, | |
| "logits/chosen": -1.183584213256836, | |
| "logits/rejected": -1.1847604513168335, | |
| "logps/chosen": -40.19718933105469, | |
| "logps/rejected": -51.98136901855469, | |
| "loss": 0.3544, | |
| "rewards/accuracies": 0.7361111044883728, | |
| "rewards/chosen": -0.23264381289482117, | |
| "rewards/margins": 2.18786883354187, | |
| "rewards/rejected": -2.4205124378204346, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.9370432106680343, | |
| "grad_norm": 27.136867213884347, | |
| "learning_rate": 3.2012528062417845e-07, | |
| "logits/chosen": -1.1893184185028076, | |
| "logits/rejected": -1.186366081237793, | |
| "logps/chosen": -48.742347717285156, | |
| "logps/rejected": -53.711280822753906, | |
| "loss": 0.2583, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.4608592987060547, | |
| "rewards/margins": 2.4421236515045166, | |
| "rewards/rejected": -2.9029834270477295, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9416591870752661, | |
| "grad_norm": 28.572200442333273, | |
| "learning_rate": 3.1818754606619643e-07, | |
| "logits/chosen": -1.146033763885498, | |
| "logits/rejected": -1.154913306236267, | |
| "logps/chosen": -43.58420944213867, | |
| "logps/rejected": -65.13819885253906, | |
| "loss": 0.3209, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.25047793984413147, | |
| "rewards/margins": 4.219507217407227, | |
| "rewards/rejected": -3.969028949737549, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9462751634824977, | |
| "grad_norm": 22.48708730265553, | |
| "learning_rate": 3.162453755491655e-07, | |
| "logits/chosen": -1.2108149528503418, | |
| "logits/rejected": -1.2209829092025757, | |
| "logps/chosen": -45.49837112426758, | |
| "logps/rejected": -68.48489379882812, | |
| "loss": 0.1921, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.18967992067337036, | |
| "rewards/margins": 3.9147284030914307, | |
| "rewards/rejected": -3.725048542022705, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9508911398897294, | |
| "grad_norm": 37.73589723314555, | |
| "learning_rate": 3.142988954215079e-07, | |
| "logits/chosen": -1.1515933275222778, | |
| "logits/rejected": -1.1659475564956665, | |
| "logps/chosen": -48.16081619262695, | |
| "logps/rejected": -75.52259063720703, | |
| "loss": 0.2767, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.4500226378440857, | |
| "rewards/margins": 3.918086051940918, | |
| "rewards/rejected": -3.4680633544921875, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9555071162969612, | |
| "grad_norm": 50.447595273319266, | |
| "learning_rate": 3.1234823231200925e-07, | |
| "logits/chosen": -1.1608054637908936, | |
| "logits/rejected": -1.1741983890533447, | |
| "logps/chosen": -46.12180709838867, | |
| "logps/rejected": -76.99250030517578, | |
| "loss": 0.2659, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.0659295991063118, | |
| "rewards/margins": 4.484518527984619, | |
| "rewards/rejected": -4.4185895919799805, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9601230927041928, | |
| "grad_norm": 34.31172314274907, | |
| "learning_rate": 3.1039351312157993e-07, | |
| "logits/chosen": -1.1714129447937012, | |
| "logits/rejected": -1.1802603006362915, | |
| "logps/chosen": -47.3844108581543, | |
| "logps/rejected": -66.79096221923828, | |
| "loss": 0.2247, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.06639357656240463, | |
| "rewards/margins": 3.8744897842407227, | |
| "rewards/rejected": -3.8080966472625732, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9647390691114246, | |
| "grad_norm": 59.97310545853326, | |
| "learning_rate": 3.0843486501499967e-07, | |
| "logits/chosen": -1.1815389394760132, | |
| "logits/rejected": -1.1873387098312378, | |
| "logps/chosen": -49.0379638671875, | |
| "logps/rejected": -60.66644287109375, | |
| "loss": 0.375, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.37162768840789795, | |
| "rewards/margins": 2.9354913234710693, | |
| "rewards/rejected": -2.563863515853882, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9693550455186563, | |
| "grad_norm": 28.24836123391339, | |
| "learning_rate": 3.064724154126449e-07, | |
| "logits/chosen": -1.1865981817245483, | |
| "logits/rejected": -1.186213731765747, | |
| "logps/chosen": -49.98203659057617, | |
| "logps/rejected": -52.87804412841797, | |
| "loss": 0.231, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.10280448943376541, | |
| "rewards/margins": 2.791215181350708, | |
| "rewards/rejected": -2.688410758972168, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9693550455186563, | |
| "eval_logits/chosen": -1.1221855878829956, | |
| "eval_logits/rejected": -1.1280066967010498, | |
| "eval_logps/chosen": -48.28245544433594, | |
| "eval_logps/rejected": -62.80416488647461, | |
| "eval_loss": 0.252390593290329, | |
| "eval_rewards/accuracies": 0.820852518081665, | |
| "eval_rewards/chosen": 0.17548592388629913, | |
| "eval_rewards/margins": 3.531018018722534, | |
| "eval_rewards/rejected": -3.35553240776062, | |
| "eval_runtime": 227.4345, | |
| "eval_samples_per_second": 7.624, | |
| "eval_steps_per_second": 1.908, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9739710219258879, | |
| "grad_norm": 34.078391098200555, | |
| "learning_rate": 3.045062919821995e-07, | |
| "logits/chosen": -1.1267274618148804, | |
| "logits/rejected": -1.139290452003479, | |
| "logps/chosen": -46.573524475097656, | |
| "logps/rejected": -72.93567657470703, | |
| "loss": 0.2995, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.05330763757228851, | |
| "rewards/margins": 4.125481605529785, | |
| "rewards/rejected": -4.072174072265625, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9785869983331197, | |
| "grad_norm": 40.16987886487936, | |
| "learning_rate": 3.0253662263034925e-07, | |
| "logits/chosen": -1.1718653440475464, | |
| "logits/rejected": -1.1762607097625732, | |
| "logps/chosen": -51.13752746582031, | |
| "logps/rejected": -70.25437927246094, | |
| "loss": 0.2582, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.11435369402170181, | |
| "rewards/margins": 3.699098825454712, | |
| "rewards/rejected": -3.8134524822235107, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9832029747403513, | |
| "grad_norm": 32.13301323909956, | |
| "learning_rate": 3.005635354944606e-07, | |
| "logits/chosen": -1.1121575832366943, | |
| "logits/rejected": -1.113258957862854, | |
| "logps/chosen": -53.563053131103516, | |
| "logps/rejected": -52.64200210571289, | |
| "loss": 0.2696, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.11980216950178146, | |
| "rewards/margins": 2.639040231704712, | |
| "rewards/rejected": -2.7588419914245605, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.987818951147583, | |
| "grad_norm": 37.16965622890279, | |
| "learning_rate": 2.9858715893424504e-07, | |
| "logits/chosen": -1.1091896295547485, | |
| "logits/rejected": -1.1275534629821777, | |
| "logps/chosen": -45.88606643676758, | |
| "logps/rejected": -73.55078125, | |
| "loss": 0.1871, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.062331780791282654, | |
| "rewards/margins": 4.677850723266602, | |
| "rewards/rejected": -4.740182876586914, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9924349275548148, | |
| "grad_norm": 26.22123779547593, | |
| "learning_rate": 2.966076215234082e-07, | |
| "logits/chosen": -1.066051959991455, | |
| "logits/rejected": -1.0764615535736084, | |
| "logps/chosen": -54.595703125, | |
| "logps/rejected": -72.26995849609375, | |
| "loss": 0.1937, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.17351490259170532, | |
| "rewards/margins": 4.106486797332764, | |
| "rewards/rejected": -3.932971954345703, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9970509039620464, | |
| "grad_norm": 33.565390661724486, | |
| "learning_rate": 2.94625052041286e-07, | |
| "logits/chosen": -1.1814826726913452, | |
| "logits/rejected": -1.1842460632324219, | |
| "logps/chosen": -50.375, | |
| "logps/rejected": -58.42066955566406, | |
| "loss": 0.255, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.07062428444623947, | |
| "rewards/margins": 3.117582082748413, | |
| "rewards/rejected": -3.188206672668457, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.001666880369278, | |
| "grad_norm": 21.605417390859568, | |
| "learning_rate": 2.926395794644665e-07, | |
| "logits/chosen": -1.1752268075942993, | |
| "logits/rejected": -1.1771807670593262, | |
| "logps/chosen": -51.052242279052734, | |
| "logps/rejected": -61.03368377685547, | |
| "loss": 0.1838, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.23978914320468903, | |
| "rewards/margins": 3.637639045715332, | |
| "rewards/rejected": -3.3978495597839355, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.0062828567765099, | |
| "grad_norm": 29.001645648697252, | |
| "learning_rate": 2.906513329583991e-07, | |
| "logits/chosen": -1.186964511871338, | |
| "logits/rejected": -1.1928526163101196, | |
| "logps/chosen": -46.22761535644531, | |
| "logps/rejected": -62.58315658569336, | |
| "loss": 0.2362, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": -0.08661065995693207, | |
| "rewards/margins": 3.6797680854797363, | |
| "rewards/rejected": -3.76637864112854, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.0108988331837414, | |
| "grad_norm": 25.079284366199737, | |
| "learning_rate": 2.886604418689921e-07, | |
| "logits/chosen": -1.1703391075134277, | |
| "logits/rejected": -1.1838041543960571, | |
| "logps/chosen": -44.59703063964844, | |
| "logps/rejected": -76.01687622070312, | |
| "loss": 0.2554, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.19546601176261902, | |
| "rewards/margins": 4.618009567260742, | |
| "rewards/rejected": -4.813475131988525, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.0155148095909732, | |
| "grad_norm": 15.893743443702332, | |
| "learning_rate": 2.866670357141979e-07, | |
| "logits/chosen": -1.1566696166992188, | |
| "logits/rejected": -1.1605850458145142, | |
| "logps/chosen": -50.24718475341797, | |
| "logps/rejected": -61.23912048339844, | |
| "loss": 0.2096, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.3380340337753296, | |
| "rewards/margins": 4.049044609069824, | |
| "rewards/rejected": -3.711010456085205, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.0155148095909732, | |
| "eval_logits/chosen": -1.1188750267028809, | |
| "eval_logits/rejected": -1.124423861503601, | |
| "eval_logps/chosen": -47.42933654785156, | |
| "eval_logps/rejected": -62.06835174560547, | |
| "eval_loss": 0.2514457404613495, | |
| "eval_rewards/accuracies": 0.8237327337265015, | |
| "eval_rewards/chosen": 0.6020476222038269, | |
| "eval_rewards/margins": 3.589672088623047, | |
| "eval_rewards/rejected": -2.9876248836517334, | |
| "eval_runtime": 227.3665, | |
| "eval_samples_per_second": 7.626, | |
| "eval_steps_per_second": 1.909, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.020130785998205, | |
| "grad_norm": 13.432673135223586, | |
| "learning_rate": 2.8467124417558737e-07, | |
| "logits/chosen": -1.1185688972473145, | |
| "logits/rejected": -1.1209557056427002, | |
| "logps/chosen": -48.8853759765625, | |
| "logps/rejected": -63.88041305541992, | |
| "loss": 0.1931, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.8135783076286316, | |
| "rewards/margins": 4.117891788482666, | |
| "rewards/rejected": -3.3043136596679688, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.0247467624054365, | |
| "grad_norm": 26.849141741016137, | |
| "learning_rate": 2.8267319708991253e-07, | |
| "logits/chosen": -1.096121907234192, | |
| "logits/rejected": -1.0977866649627686, | |
| "logps/chosen": -52.22743225097656, | |
| "logps/rejected": -55.36363220214844, | |
| "loss": 0.1924, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.6431276202201843, | |
| "rewards/margins": 3.097036123275757, | |
| "rewards/rejected": -2.453908681869507, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.0293627388126683, | |
| "grad_norm": 31.09854358728895, | |
| "learning_rate": 2.806730244406612e-07, | |
| "logits/chosen": -1.1671628952026367, | |
| "logits/rejected": -1.1714211702346802, | |
| "logps/chosen": -46.41295623779297, | |
| "logps/rejected": -59.33990478515625, | |
| "loss": 0.2477, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.5287280082702637, | |
| "rewards/margins": 3.4002442359924316, | |
| "rewards/rejected": -2.871516227722168, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.0339787152199, | |
| "grad_norm": 31.751017976380037, | |
| "learning_rate": 2.786708563496001e-07, | |
| "logits/chosen": -1.2408480644226074, | |
| "logits/rejected": -1.2548807859420776, | |
| "logps/chosen": -49.92308044433594, | |
| "logps/rejected": -67.60689544677734, | |
| "loss": 0.179, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.6711897850036621, | |
| "rewards/margins": 4.588629722595215, | |
| "rewards/rejected": -3.9174396991729736, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.0385946916271316, | |
| "grad_norm": 32.2047076869101, | |
| "learning_rate": 2.7666682306830994e-07, | |
| "logits/chosen": -1.19577157497406, | |
| "logits/rejected": -1.194454550743103, | |
| "logps/chosen": -46.63425827026367, | |
| "logps/rejected": -49.01081848144531, | |
| "loss": 0.2547, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.5076774954795837, | |
| "rewards/margins": 2.832526206970215, | |
| "rewards/rejected": -2.3248488903045654, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0432106680343634, | |
| "grad_norm": 28.553486090102076, | |
| "learning_rate": 2.746610549697119e-07, | |
| "logits/chosen": -1.1639982461929321, | |
| "logits/rejected": -1.1696867942810059, | |
| "logps/chosen": -49.013301849365234, | |
| "logps/rejected": -65.69493865966797, | |
| "loss": 0.2036, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.5706920623779297, | |
| "rewards/margins": 3.6936237812042236, | |
| "rewards/rejected": -3.122931957244873, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0478266444415951, | |
| "grad_norm": 13.92439999757166, | |
| "learning_rate": 2.7265368253958615e-07, | |
| "logits/chosen": -1.2167223691940308, | |
| "logits/rejected": -1.2195782661437988, | |
| "logps/chosen": -45.21904754638672, | |
| "logps/rejected": -53.50370407104492, | |
| "loss": 0.1746, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.7654822468757629, | |
| "rewards/margins": 3.2732510566711426, | |
| "rewards/rejected": -2.5077688694000244, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0524426208488267, | |
| "grad_norm": 36.82786613306219, | |
| "learning_rate": 2.706448363680831e-07, | |
| "logits/chosen": -1.1744914054870605, | |
| "logits/rejected": -1.1839237213134766, | |
| "logps/chosen": -47.35738754272461, | |
| "logps/rejected": -73.86341094970703, | |
| "loss": 0.1569, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.696943998336792, | |
| "rewards/margins": 4.708230018615723, | |
| "rewards/rejected": -4.011285781860352, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0570585972560584, | |
| "grad_norm": 13.78515040845917, | |
| "learning_rate": 2.686346471412277e-07, | |
| "logits/chosen": -1.1262328624725342, | |
| "logits/rejected": -1.1403940916061401, | |
| "logps/chosen": -49.68544387817383, | |
| "logps/rejected": -76.74578857421875, | |
| "loss": 0.1419, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.3619132339954376, | |
| "rewards/margins": 4.7866668701171875, | |
| "rewards/rejected": -4.424753665924072, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0616745736632902, | |
| "grad_norm": 26.57858405427076, | |
| "learning_rate": 2.6662324563241805e-07, | |
| "logits/chosen": -1.2429147958755493, | |
| "logits/rejected": -1.2455251216888428, | |
| "logps/chosen": -45.16366195678711, | |
| "logps/rejected": -57.56427001953125, | |
| "loss": 0.2357, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.5941984057426453, | |
| "rewards/margins": 3.2229466438293457, | |
| "rewards/rejected": -2.6287484169006348, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0616745736632902, | |
| "eval_logits/chosen": -1.1103266477584839, | |
| "eval_logits/rejected": -1.1161012649536133, | |
| "eval_logps/chosen": -47.78204345703125, | |
| "eval_logps/rejected": -62.643646240234375, | |
| "eval_loss": 0.2479603886604309, | |
| "eval_rewards/accuracies": 0.8277649879455566, | |
| "eval_rewards/chosen": 0.42569395899772644, | |
| "eval_rewards/margins": 3.700965642929077, | |
| "eval_rewards/rejected": -3.2752716541290283, | |
| "eval_runtime": 227.8171, | |
| "eval_samples_per_second": 7.611, | |
| "eval_steps_per_second": 1.905, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0662905500705218, | |
| "grad_norm": 16.869585682992792, | |
| "learning_rate": 2.6461076269391713e-07, | |
| "logits/chosen": -1.0661816596984863, | |
| "logits/rejected": -1.0739065408706665, | |
| "logps/chosen": -54.75306701660156, | |
| "logps/rejected": -72.30966186523438, | |
| "loss": 0.1519, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.5898687839508057, | |
| "rewards/margins": 4.626287460327148, | |
| "rewards/rejected": -4.03641939163208, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0709065264777535, | |
| "grad_norm": 39.49599828169064, | |
| "learning_rate": 2.625973292483409e-07, | |
| "logits/chosen": -1.1013195514678955, | |
| "logits/rejected": -1.1079678535461426, | |
| "logps/chosen": -56.018310546875, | |
| "logps/rejected": -69.99239349365234, | |
| "loss": 0.2341, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.3379661738872528, | |
| "rewards/margins": 3.941251516342163, | |
| "rewards/rejected": -3.603285551071167, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0755225028849853, | |
| "grad_norm": 19.11015860441972, | |
| "learning_rate": 2.6058307628014065e-07, | |
| "logits/chosen": -1.110822319984436, | |
| "logits/rejected": -1.1167054176330566, | |
| "logps/chosen": -53.93489074707031, | |
| "logps/rejected": -66.05422973632812, | |
| "loss": 0.1708, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.3638114929199219, | |
| "rewards/margins": 4.259352207183838, | |
| "rewards/rejected": -3.895540952682495, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0801384792922168, | |
| "grad_norm": 29.43582021185457, | |
| "learning_rate": 2.5856813482708217e-07, | |
| "logits/chosen": -1.184598445892334, | |
| "logits/rejected": -1.1916086673736572, | |
| "logps/chosen": -49.6500244140625, | |
| "logps/rejected": -56.30369567871094, | |
| "loss": 0.2254, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.36434826254844666, | |
| "rewards/margins": 3.6883039474487305, | |
| "rewards/rejected": -3.32395601272583, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0847544556994486, | |
| "grad_norm": 25.173770275980058, | |
| "learning_rate": 2.565526359717206e-07, | |
| "logits/chosen": -1.1290383338928223, | |
| "logits/rejected": -1.1306836605072021, | |
| "logps/chosen": -43.971435546875, | |
| "logps/rejected": -53.389156341552734, | |
| "loss": 0.289, | |
| "rewards/accuracies": 0.7777777910232544, | |
| "rewards/chosen": 0.18997710943222046, | |
| "rewards/margins": 3.0762457847595215, | |
| "rewards/rejected": -2.8862688541412354, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0893704321066804, | |
| "grad_norm": 19.76426072194178, | |
| "learning_rate": 2.545367108328731e-07, | |
| "logits/chosen": -1.163740873336792, | |
| "logits/rejected": -1.1682920455932617, | |
| "logps/chosen": -49.15140914916992, | |
| "logps/rejected": -59.53387451171875, | |
| "loss": 0.187, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.26795661449432373, | |
| "rewards/margins": 3.4266703128814697, | |
| "rewards/rejected": -3.1587133407592773, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0939864085139122, | |
| "grad_norm": 17.775087193890624, | |
| "learning_rate": 2.525204905570889e-07, | |
| "logits/chosen": -1.1204829216003418, | |
| "logits/rejected": -1.1253838539123535, | |
| "logps/chosen": -54.046390533447266, | |
| "logps/rejected": -66.79469299316406, | |
| "loss": 0.1607, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.39596712589263916, | |
| "rewards/margins": 4.161929130554199, | |
| "rewards/rejected": -3.7659616470336914, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0986023849211437, | |
| "grad_norm": 24.757995548476888, | |
| "learning_rate": 2.505041063101171e-07, | |
| "logits/chosen": -1.1805049180984497, | |
| "logits/rejected": -1.1901381015777588, | |
| "logps/chosen": -53.46992492675781, | |
| "logps/rejected": -59.58029556274414, | |
| "loss": 0.2762, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.13624940812587738, | |
| "rewards/margins": 3.440009832382202, | |
| "rewards/rejected": -3.303760290145874, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.1032183613283755, | |
| "grad_norm": 19.970980113968885, | |
| "learning_rate": 2.4848768926837466e-07, | |
| "logits/chosen": -1.0963982343673706, | |
| "logits/rejected": -1.112579345703125, | |
| "logps/chosen": -47.13343811035156, | |
| "logps/rejected": -87.57925415039062, | |
| "loss": 0.1717, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.06597856432199478, | |
| "rewards/margins": 5.439146518707275, | |
| "rewards/rejected": -5.373167991638184, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.107834337735607, | |
| "grad_norm": 16.28084665624267, | |
| "learning_rate": 2.464713706104113e-07, | |
| "logits/chosen": -1.1157184839248657, | |
| "logits/rejected": -1.1205692291259766, | |
| "logps/chosen": -50.34828567504883, | |
| "logps/rejected": -63.41987609863281, | |
| "loss": 0.1608, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.043437667191028595, | |
| "rewards/margins": 4.042869567871094, | |
| "rewards/rejected": -3.99943208694458, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.107834337735607, | |
| "eval_logits/chosen": -1.1090331077575684, | |
| "eval_logits/rejected": -1.1147438287734985, | |
| "eval_logps/chosen": -48.225521087646484, | |
| "eval_logps/rejected": -63.15263748168945, | |
| "eval_loss": 0.24383509159088135, | |
| "eval_rewards/accuracies": 0.8294931054115295, | |
| "eval_rewards/chosen": 0.203952819108963, | |
| "eval_rewards/margins": 3.733717441558838, | |
| "eval_rewards/rejected": -3.529764175415039, | |
| "eval_runtime": 227.3827, | |
| "eval_samples_per_second": 7.626, | |
| "eval_steps_per_second": 1.909, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.1124503141428388, | |
| "grad_norm": 22.059822838666445, | |
| "learning_rate": 2.444552815083767e-07, | |
| "logits/chosen": -1.1268048286437988, | |
| "logits/rejected": -1.1286168098449707, | |
| "logps/chosen": -49.519527435302734, | |
| "logps/rejected": -52.751312255859375, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.3959563970565796, | |
| "rewards/margins": 3.2894110679626465, | |
| "rewards/rejected": -2.8934545516967773, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.1170662905500706, | |
| "grad_norm": 19.4616654191151, | |
| "learning_rate": 2.4243955311948693e-07, | |
| "logits/chosen": -1.1610568761825562, | |
| "logits/rejected": -1.1703104972839355, | |
| "logps/chosen": -45.62093734741211, | |
| "logps/rejected": -71.41989135742188, | |
| "loss": 0.2218, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.056832365691661835, | |
| "rewards/margins": 4.676138877868652, | |
| "rewards/rejected": -4.619307041168213, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.1216822669573023, | |
| "grad_norm": 29.74478192767247, | |
| "learning_rate": 2.4042431657749115e-07, | |
| "logits/chosen": -1.082115650177002, | |
| "logits/rejected": -1.097312092781067, | |
| "logps/chosen": -47.262996673583984, | |
| "logps/rejected": -84.21355438232422, | |
| "loss": 0.1955, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.2266267091035843, | |
| "rewards/margins": 4.9980058670043945, | |
| "rewards/rejected": -4.771378993988037, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.1262982433645339, | |
| "grad_norm": 34.83946230592783, | |
| "learning_rate": 2.384097029841419e-07, | |
| "logits/chosen": -1.1644551753997803, | |
| "logits/rejected": -1.1694614887237549, | |
| "logps/chosen": -49.46389389038086, | |
| "logps/rejected": -59.47831344604492, | |
| "loss": 0.2086, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.2441619485616684, | |
| "rewards/margins": 3.580085277557373, | |
| "rewards/rejected": -3.335923433303833, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.1309142197717656, | |
| "grad_norm": 21.45284163289699, | |
| "learning_rate": 2.3639584340066544e-07, | |
| "logits/chosen": -1.1405658721923828, | |
| "logits/rejected": -1.146559238433838, | |
| "logps/chosen": -41.80732727050781, | |
| "logps/rejected": -62.37071990966797, | |
| "loss": 0.2166, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.7920265197753906, | |
| "rewards/margins": 4.414507865905762, | |
| "rewards/rejected": -3.622481346130371, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.1355301961789972, | |
| "grad_norm": 20.900830046410174, | |
| "learning_rate": 2.3438286883923539e-07, | |
| "logits/chosen": -1.164839267730713, | |
| "logits/rejected": -1.1716415882110596, | |
| "logps/chosen": -52.77006912231445, | |
| "logps/rejected": -60.97998046875, | |
| "loss": 0.2024, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.7002817988395691, | |
| "rewards/margins": 3.6204490661621094, | |
| "rewards/rejected": -2.9201676845550537, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.140146172586229, | |
| "grad_norm": 22.811055008597176, | |
| "learning_rate": 2.323709102544506e-07, | |
| "logits/chosen": -1.1385387182235718, | |
| "logits/rejected": -1.1350369453430176, | |
| "logps/chosen": -44.97706604003906, | |
| "logps/rejected": -47.24876403808594, | |
| "loss": 0.267, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.8821587562561035, | |
| "rewards/margins": 2.6500444412231445, | |
| "rewards/rejected": -1.7678859233856201, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.1447621489934607, | |
| "grad_norm": 25.7099907801656, | |
| "learning_rate": 2.3036009853481474e-07, | |
| "logits/chosen": -1.1164131164550781, | |
| "logits/rejected": -1.124334454536438, | |
| "logps/chosen": -44.46446228027344, | |
| "logps/rejected": -66.84891510009766, | |
| "loss": 0.252, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.42438387870788574, | |
| "rewards/margins": 4.415454387664795, | |
| "rewards/rejected": -3.9910707473754883, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.1493781254006925, | |
| "grad_norm": 24.703959475838438, | |
| "learning_rate": 2.283505644942223e-07, | |
| "logits/chosen": -1.1680512428283691, | |
| "logits/rejected": -1.172341227531433, | |
| "logps/chosen": -39.898555755615234, | |
| "logps/rejected": -61.25082778930664, | |
| "loss": 0.1888, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.8999897837638855, | |
| "rewards/margins": 4.018680095672607, | |
| "rewards/rejected": -3.118690252304077, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.153994101807924, | |
| "grad_norm": 22.54617945872361, | |
| "learning_rate": 2.2634243886344781e-07, | |
| "logits/chosen": -1.1353996992111206, | |
| "logits/rejected": -1.1466150283813477, | |
| "logps/chosen": -47.095890045166016, | |
| "logps/rejected": -63.17418670654297, | |
| "loss": 0.1944, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.0475714206695557, | |
| "rewards/margins": 4.362390518188477, | |
| "rewards/rejected": -3.314818859100342, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.153994101807924, | |
| "eval_logits/chosen": -1.1096464395523071, | |
| "eval_logits/rejected": -1.1153897047042847, | |
| "eval_logps/chosen": -46.75843811035156, | |
| "eval_logps/rejected": -61.96721649169922, | |
| "eval_loss": 0.24556967616081238, | |
| "eval_rewards/accuracies": 0.8312212228775024, | |
| "eval_rewards/chosen": 0.9374985098838806, | |
| "eval_rewards/margins": 3.8745529651641846, | |
| "eval_rewards/rejected": -2.9370551109313965, | |
| "eval_runtime": 227.4325, | |
| "eval_samples_per_second": 7.624, | |
| "eval_steps_per_second": 1.908, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1586100782151558, | |
| "grad_norm": 27.18085844791722, | |
| "learning_rate": 2.2433585228164115e-07, | |
| "logits/chosen": -1.1491751670837402, | |
| "logits/rejected": -1.1580781936645508, | |
| "logps/chosen": -50.64473342895508, | |
| "logps/rejected": -75.20685577392578, | |
| "loss": 0.2218, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.8257203698158264, | |
| "rewards/margins": 5.088500022888184, | |
| "rewards/rejected": -4.26278018951416, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1632260546223874, | |
| "grad_norm": 23.89654504779422, | |
| "learning_rate": 2.2233093528782938e-07, | |
| "logits/chosen": -1.1577401161193848, | |
| "logits/rejected": -1.1688594818115234, | |
| "logps/chosen": -54.145572662353516, | |
| "logps/rejected": -66.3442611694336, | |
| "loss": 0.1751, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 1.085695743560791, | |
| "rewards/margins": 4.018051624298096, | |
| "rewards/rejected": -2.9323554039001465, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1678420310296191, | |
| "grad_norm": 23.769575263027864, | |
| "learning_rate": 2.2032781831242367e-07, | |
| "logits/chosen": -1.1783199310302734, | |
| "logits/rejected": -1.182464838027954, | |
| "logps/chosen": -41.669734954833984, | |
| "logps/rejected": -51.315006256103516, | |
| "loss": 0.2418, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.9586374759674072, | |
| "rewards/margins": 3.437027931213379, | |
| "rewards/rejected": -2.4783899784088135, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.172458007436851, | |
| "grad_norm": 36.28094297883332, | |
| "learning_rate": 2.183266316687347e-07, | |
| "logits/chosen": -1.1632755994796753, | |
| "logits/rejected": -1.1601239442825317, | |
| "logps/chosen": -47.738983154296875, | |
| "logps/rejected": -49.51511001586914, | |
| "loss": 0.2641, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 1.0999305248260498, | |
| "rewards/margins": 2.7960052490234375, | |
| "rewards/rejected": -1.6960747241973877, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.1770739838440827, | |
| "grad_norm": 18.77553292711027, | |
| "learning_rate": 2.16327505544495e-07, | |
| "logits/chosen": -1.1429252624511719, | |
| "logits/rejected": -1.1522622108459473, | |
| "logps/chosen": -50.42318344116211, | |
| "logps/rejected": -66.82962036132812, | |
| "loss": 0.1438, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 1.0773425102233887, | |
| "rewards/margins": 4.845379829406738, | |
| "rewards/rejected": -3.7680368423461914, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1816899602513142, | |
| "grad_norm": 17.949048045246396, | |
| "learning_rate": 2.143305699933892e-07, | |
| "logits/chosen": -1.1755365133285522, | |
| "logits/rejected": -1.180452823638916, | |
| "logps/chosen": -43.959930419921875, | |
| "logps/rejected": -64.32354736328125, | |
| "loss": 0.2051, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.7747830748558044, | |
| "rewards/margins": 4.00773811340332, | |
| "rewards/rejected": -3.23295521736145, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.186305936658546, | |
| "grad_norm": 29.609702959008317, | |
| "learning_rate": 2.1233595492659382e-07, | |
| "logits/chosen": -1.0717233419418335, | |
| "logits/rejected": -1.0747785568237305, | |
| "logps/chosen": -56.3906135559082, | |
| "logps/rejected": -58.85280990600586, | |
| "loss": 0.1651, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.6519337892532349, | |
| "rewards/margins": 3.8963236808776855, | |
| "rewards/rejected": -3.244389772415161, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1909219130657775, | |
| "grad_norm": 20.918336765308602, | |
| "learning_rate": 2.1034379010432542e-07, | |
| "logits/chosen": -1.1730085611343384, | |
| "logits/rejected": -1.1727977991104126, | |
| "logps/chosen": -44.0880126953125, | |
| "logps/rejected": -56.85806655883789, | |
| "loss": 0.1976, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.4944622218608856, | |
| "rewards/margins": 3.6419970989227295, | |
| "rewards/rejected": -3.1475343704223633, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1955378894730093, | |
| "grad_norm": 25.288305102390122, | |
| "learning_rate": 2.0835420512739957e-07, | |
| "logits/chosen": -1.1438689231872559, | |
| "logits/rejected": -1.1519646644592285, | |
| "logps/chosen": -47.1004638671875, | |
| "logps/rejected": -83.05133819580078, | |
| "loss": 0.1839, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.2153804749250412, | |
| "rewards/margins": 5.248907089233398, | |
| "rewards/rejected": -5.03352689743042, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.200153865880241, | |
| "grad_norm": 19.59880042292574, | |
| "learning_rate": 2.0636732942879917e-07, | |
| "logits/chosen": -1.1264581680297852, | |
| "logits/rejected": -1.1307095289230347, | |
| "logps/chosen": -50.47024917602539, | |
| "logps/rejected": -64.73220825195312, | |
| "loss": 0.1619, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.3765060007572174, | |
| "rewards/margins": 4.3557209968566895, | |
| "rewards/rejected": -3.979214906692505, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.200153865880241, | |
| "eval_logits/chosen": -1.1257847547531128, | |
| "eval_logits/rejected": -1.1301593780517578, | |
| "eval_logps/chosen": -48.62382507324219, | |
| "eval_logps/rejected": -63.695838928222656, | |
| "eval_loss": 0.24041977524757385, | |
| "eval_rewards/accuracies": 0.8335253596305847, | |
| "eval_rewards/chosen": 0.004803389776498079, | |
| "eval_rewards/margins": 3.8061721324920654, | |
| "eval_rewards/rejected": -3.8013687133789062, | |
| "eval_runtime": 227.3215, | |
| "eval_samples_per_second": 7.628, | |
| "eval_steps_per_second": 1.909, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.2047698422874729, | |
| "grad_norm": 17.44277679097044, | |
| "learning_rate": 2.0438329226525415e-07, | |
| "logits/chosen": -1.144020915031433, | |
| "logits/rejected": -1.1453847885131836, | |
| "logps/chosen": -49.49299621582031, | |
| "logps/rejected": -53.57789611816406, | |
| "loss": 0.2025, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.42205584049224854, | |
| "rewards/margins": 3.364236831665039, | |
| "rewards/rejected": -2.942180871963501, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.2093858186947044, | |
| "grad_norm": 37.73486109040497, | |
| "learning_rate": 2.0240222270883288e-07, | |
| "logits/chosen": -1.1556731462478638, | |
| "logits/rejected": -1.1702880859375, | |
| "logps/chosen": -50.98395538330078, | |
| "logps/rejected": -76.1040267944336, | |
| "loss": 0.2225, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.07603338360786438, | |
| "rewards/margins": 4.920933246612549, | |
| "rewards/rejected": -4.9969658851623535, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.2140017951019362, | |
| "grad_norm": 25.377653028594498, | |
| "learning_rate": 2.0042424963854542e-07, | |
| "logits/chosen": -1.196961760520935, | |
| "logits/rejected": -1.2145916223526, | |
| "logps/chosen": -47.896732330322266, | |
| "logps/rejected": -83.96469116210938, | |
| "loss": 0.1463, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.26778745651245117, | |
| "rewards/margins": 5.1378092765808105, | |
| "rewards/rejected": -5.40559720993042, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.2186177715091677, | |
| "grad_norm": 21.037533939572622, | |
| "learning_rate": 1.9844950173195883e-07, | |
| "logits/chosen": -1.2031718492507935, | |
| "logits/rejected": -1.207302212715149, | |
| "logps/chosen": -48.16632843017578, | |
| "logps/rejected": -63.03504943847656, | |
| "loss": 0.1798, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.3718983829021454, | |
| "rewards/margins": 3.580104351043701, | |
| "rewards/rejected": -3.95200252532959, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.2232337479163995, | |
| "grad_norm": 22.283357312346677, | |
| "learning_rate": 1.964781074568265e-07, | |
| "logits/chosen": -1.2361119985580444, | |
| "logits/rejected": -1.2355589866638184, | |
| "logps/chosen": -48.786136627197266, | |
| "logps/rejected": -53.23166275024414, | |
| "loss": 0.1959, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.26044440269470215, | |
| "rewards/margins": 3.046844482421875, | |
| "rewards/rejected": -3.307288885116577, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.2278497243236313, | |
| "grad_norm": 32.696983725912126, | |
| "learning_rate": 1.9451019506273018e-07, | |
| "logits/chosen": -1.1622250080108643, | |
| "logits/rejected": -1.16159987449646, | |
| "logps/chosen": -42.64177322387695, | |
| "logps/rejected": -55.659339904785156, | |
| "loss": 0.2379, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.17224609851837158, | |
| "rewards/margins": 3.0260884761810303, | |
| "rewards/rejected": -3.1983346939086914, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.232465700730863, | |
| "grad_norm": 31.81561570512381, | |
| "learning_rate": 1.9254589257273712e-07, | |
| "logits/chosen": -1.1568024158477783, | |
| "logits/rejected": -1.1644660234451294, | |
| "logps/chosen": -43.20817565917969, | |
| "logps/rejected": -66.71856689453125, | |
| "loss": 0.1648, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.03500910475850105, | |
| "rewards/margins": 4.864539623260498, | |
| "rewards/rejected": -4.899548530578613, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.2370816771380946, | |
| "grad_norm": 28.042577352949, | |
| "learning_rate": 1.9058532777507141e-07, | |
| "logits/chosen": -1.1810351610183716, | |
| "logits/rejected": -1.186877727508545, | |
| "logps/chosen": -46.86735534667969, | |
| "logps/rejected": -58.00865173339844, | |
| "loss": 0.1946, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.07706524431705475, | |
| "rewards/margins": 3.6953649520874023, | |
| "rewards/rejected": -3.772430896759033, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.2416976535453264, | |
| "grad_norm": 31.06776524909045, | |
| "learning_rate": 1.886286282148002e-07, | |
| "logits/chosen": -1.1419155597686768, | |
| "logits/rejected": -1.1518969535827637, | |
| "logps/chosen": -48.36051559448242, | |
| "logps/rejected": -68.71621704101562, | |
| "loss": 0.2628, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": -0.3396787643432617, | |
| "rewards/margins": 4.181530952453613, | |
| "rewards/rejected": -4.521209716796875, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.246313629952558, | |
| "grad_norm": 25.267305267108494, | |
| "learning_rate": 1.8667592118553693e-07, | |
| "logits/chosen": -1.2024831771850586, | |
| "logits/rejected": -1.2076871395111084, | |
| "logps/chosen": -52.623023986816406, | |
| "logps/rejected": -61.89004898071289, | |
| "loss": 0.2083, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": -0.21523982286453247, | |
| "rewards/margins": 3.845079183578491, | |
| "rewards/rejected": -4.060319423675537, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.246313629952558, | |
| "eval_logits/chosen": -1.1086678504943848, | |
| "eval_logits/rejected": -1.11427903175354, | |
| "eval_logps/chosen": -48.652427673339844, | |
| "eval_logps/rejected": -63.7818717956543, | |
| "eval_loss": 0.2395094931125641, | |
| "eval_rewards/accuracies": 0.8323732614517212, | |
| "eval_rewards/chosen": -0.009500053711235523, | |
| "eval_rewards/margins": 3.8348822593688965, | |
| "eval_rewards/rejected": -3.8443822860717773, | |
| "eval_runtime": 227.3478, | |
| "eval_samples_per_second": 7.627, | |
| "eval_steps_per_second": 1.909, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.2509296063597897, | |
| "grad_norm": 29.79858133121253, | |
| "learning_rate": 1.8472733372115956e-07, | |
| "logits/chosen": -1.2199275493621826, | |
| "logits/rejected": -1.2279648780822754, | |
| "logps/chosen": -50.78293228149414, | |
| "logps/rejected": -72.13998413085938, | |
| "loss": 0.2006, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.18516860902309418, | |
| "rewards/margins": 4.692964553833008, | |
| "rewards/rejected": -4.878133773803711, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.2555455827670214, | |
| "grad_norm": 18.335692774068683, | |
| "learning_rate": 1.8278299258754692e-07, | |
| "logits/chosen": -1.1382191181182861, | |
| "logits/rejected": -1.1532717943191528, | |
| "logps/chosen": -51.21404266357422, | |
| "logps/rejected": -84.46864318847656, | |
| "loss": 0.2507, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.002586497226729989, | |
| "rewards/margins": 6.08302116394043, | |
| "rewards/rejected": -6.085607528686523, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2601615591742532, | |
| "grad_norm": 16.812848293352094, | |
| "learning_rate": 1.808430242743316e-07, | |
| "logits/chosen": -1.1635518074035645, | |
| "logits/rejected": -1.1678366661071777, | |
| "logps/chosen": -50.37785339355469, | |
| "logps/rejected": -63.68332290649414, | |
| "loss": 0.207, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.4088381230831146, | |
| "rewards/margins": 4.470311641693115, | |
| "rewards/rejected": -4.0614728927612305, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2647775355814848, | |
| "grad_norm": 15.038796138409147, | |
| "learning_rate": 1.7890755498667104e-07, | |
| "logits/chosen": -1.1664639711380005, | |
| "logits/rejected": -1.1719496250152588, | |
| "logps/chosen": -42.69700622558594, | |
| "logps/rejected": -64.51258850097656, | |
| "loss": 0.1614, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.17392092943191528, | |
| "rewards/margins": 4.139625549316406, | |
| "rewards/rejected": -3.9657046794891357, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2693935119887165, | |
| "grad_norm": 27.84348089734983, | |
| "learning_rate": 1.7697671063703756e-07, | |
| "logits/chosen": -1.1588454246520996, | |
| "logits/rejected": -1.1661314964294434, | |
| "logps/chosen": -44.9870719909668, | |
| "logps/rejected": -65.78500366210938, | |
| "loss": 0.2201, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.46526622772216797, | |
| "rewards/margins": 4.418604373931885, | |
| "rewards/rejected": -3.953338623046875, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.274009488395948, | |
| "grad_norm": 44.28075638457142, | |
| "learning_rate": 1.750506168370267e-07, | |
| "logits/chosen": -1.1754214763641357, | |
| "logits/rejected": -1.175520896911621, | |
| "logps/chosen": -46.97793960571289, | |
| "logps/rejected": -54.8778076171875, | |
| "loss": 0.2392, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.9450171589851379, | |
| "rewards/margins": 3.314897298812866, | |
| "rewards/rejected": -2.369880199432373, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2786254648031798, | |
| "grad_norm": 17.20774183339189, | |
| "learning_rate": 1.7312939888918594e-07, | |
| "logits/chosen": -1.1337149143218994, | |
| "logits/rejected": -1.1420766115188599, | |
| "logps/chosen": -49.699161529541016, | |
| "logps/rejected": -73.22956085205078, | |
| "loss": 0.1671, | |
| "rewards/accuracies": 0.9444444179534912, | |
| "rewards/chosen": 0.23295314610004425, | |
| "rewards/margins": 4.598634243011475, | |
| "rewards/rejected": -4.36568021774292, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2832414412104116, | |
| "grad_norm": 12.879730723232214, | |
| "learning_rate": 1.712131817788628e-07, | |
| "logits/chosen": -1.1121582984924316, | |
| "logits/rejected": -1.1107348203659058, | |
| "logps/chosen": -46.21583938598633, | |
| "logps/rejected": -56.827064514160156, | |
| "loss": 0.2081, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.3201579451560974, | |
| "rewards/margins": 3.4782567024230957, | |
| "rewards/rejected": -3.1580984592437744, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2878574176176434, | |
| "grad_norm": 18.379299257318767, | |
| "learning_rate": 1.693020901660738e-07, | |
| "logits/chosen": -1.1363815069198608, | |
| "logits/rejected": -1.1399273872375488, | |
| "logps/chosen": -53.33127212524414, | |
| "logps/rejected": -65.93392181396484, | |
| "loss": 0.1484, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.24407842755317688, | |
| "rewards/margins": 4.509576320648193, | |
| "rewards/rejected": -4.265497207641602, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.292473394024875, | |
| "grad_norm": 30.348807794181617, | |
| "learning_rate": 1.6739624837739518e-07, | |
| "logits/chosen": -1.1836738586425781, | |
| "logits/rejected": -1.188474416732788, | |
| "logps/chosen": -53.792484283447266, | |
| "logps/rejected": -60.70852279663086, | |
| "loss": 0.2063, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.07742541283369064, | |
| "rewards/margins": 3.257985830307007, | |
| "rewards/rejected": -3.335411310195923, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.292473394024875, | |
| "eval_logits/chosen": -1.1171412467956543, | |
| "eval_logits/rejected": -1.1220649480819702, | |
| "eval_logps/chosen": -48.55030059814453, | |
| "eval_logps/rejected": -63.84280014038086, | |
| "eval_loss": 0.2384917438030243, | |
| "eval_rewards/accuracies": 0.8329492807388306, | |
| "eval_rewards/chosen": 0.04156512767076492, | |
| "eval_rewards/margins": 3.9164135456085205, | |
| "eval_rewards/rejected": -3.8748483657836914, | |
| "eval_runtime": 227.4385, | |
| "eval_samples_per_second": 7.624, | |
| "eval_steps_per_second": 1.908, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2970893704321067, | |
| "grad_norm": 16.156045882880065, | |
| "learning_rate": 1.6549578039787434e-07, | |
| "logits/chosen": -1.174346923828125, | |
| "logits/rejected": -1.1770930290222168, | |
| "logps/chosen": -50.916481018066406, | |
| "logps/rejected": -73.8545913696289, | |
| "loss": 0.2379, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.09411442279815674, | |
| "rewards/margins": 4.1428680419921875, | |
| "rewards/rejected": -4.04875373840332, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.3017053468393383, | |
| "grad_norm": 16.73959305434838, | |
| "learning_rate": 1.6360080986296384e-07, | |
| "logits/chosen": -1.14383065700531, | |
| "logits/rejected": -1.1593358516693115, | |
| "logps/chosen": -43.3416862487793, | |
| "logps/rejected": -74.88224029541016, | |
| "loss": 0.1743, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": -0.07543455064296722, | |
| "rewards/margins": 5.390232563018799, | |
| "rewards/rejected": -5.465667247772217, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.30632132324657, | |
| "grad_norm": 19.01188883701719, | |
| "learning_rate": 1.6171146005047894e-07, | |
| "logits/chosen": -1.1023046970367432, | |
| "logits/rejected": -1.1084368228912354, | |
| "logps/chosen": -55.277671813964844, | |
| "logps/rejected": -72.66205596923828, | |
| "loss": 0.1773, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.12563864886760712, | |
| "rewards/margins": 4.530452728271484, | |
| "rewards/rejected": -4.404813766479492, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.3109372996538018, | |
| "grad_norm": 21.87767395078719, | |
| "learning_rate": 1.5982785387257694e-07, | |
| "logits/chosen": -1.1128134727478027, | |
| "logits/rejected": -1.1113499402999878, | |
| "logps/chosen": -49.72315979003906, | |
| "logps/rejected": -56.22350311279297, | |
| "loss": 0.1993, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.21243299543857574, | |
| "rewards/margins": 3.1131999492645264, | |
| "rewards/rejected": -3.3256328105926514, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.3155532760610336, | |
| "grad_norm": 23.959413620362366, | |
| "learning_rate": 1.5795011386776159e-07, | |
| "logits/chosen": -1.2445893287658691, | |
| "logits/rejected": -1.2455319166183472, | |
| "logps/chosen": -49.374000549316406, | |
| "logps/rejected": -54.24154281616211, | |
| "loss": 0.2022, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.03143635019659996, | |
| "rewards/margins": 3.3321659564971924, | |
| "rewards/rejected": -3.300729751586914, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.320169252468265, | |
| "grad_norm": 16.120326025420926, | |
| "learning_rate": 1.560783621929113e-07, | |
| "logits/chosen": -1.216759204864502, | |
| "logits/rejected": -1.2203327417373657, | |
| "logps/chosen": -57.26462936401367, | |
| "logps/rejected": -62.25014877319336, | |
| "loss": 0.1895, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.36375537514686584, | |
| "rewards/margins": 3.7947163581848145, | |
| "rewards/rejected": -3.4309606552124023, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.3247852288754969, | |
| "grad_norm": 48.11745636864845, | |
| "learning_rate": 1.5421272061533177e-07, | |
| "logits/chosen": -1.1405613422393799, | |
| "logits/rejected": -1.1527469158172607, | |
| "logps/chosen": -43.10445785522461, | |
| "logps/rejected": -70.5496826171875, | |
| "loss": 0.3131, | |
| "rewards/accuracies": 0.7638888955116272, | |
| "rewards/chosen": 0.3498223125934601, | |
| "rewards/margins": 4.530541896820068, | |
| "rewards/rejected": -4.180719375610352, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.3294012052827284, | |
| "grad_norm": 36.29330680340726, | |
| "learning_rate": 1.5235331050483513e-07, | |
| "logits/chosen": -1.110296607017517, | |
| "logits/rejected": -1.1132102012634277, | |
| "logps/chosen": -50.03364944458008, | |
| "logps/rejected": -65.54788208007812, | |
| "loss": 0.2241, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": -0.058246809989213943, | |
| "rewards/margins": 3.8965775966644287, | |
| "rewards/rejected": -3.954824209213257, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.3340171816899602, | |
| "grad_norm": 12.515890509319174, | |
| "learning_rate": 1.5050025282584327e-07, | |
| "logits/chosen": -1.1224780082702637, | |
| "logits/rejected": -1.1311124563217163, | |
| "logps/chosen": -56.89671325683594, | |
| "logps/rejected": -74.29792785644531, | |
| "loss": 0.1357, | |
| "rewards/accuracies": 0.9444444179534912, | |
| "rewards/chosen": 0.14432966709136963, | |
| "rewards/margins": 4.722050666809082, | |
| "rewards/rejected": -4.577720642089844, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.338633158097192, | |
| "grad_norm": 16.31324930298768, | |
| "learning_rate": 1.4865366812951921e-07, | |
| "logits/chosen": -1.0956053733825684, | |
| "logits/rejected": -1.0948615074157715, | |
| "logps/chosen": -44.66694641113281, | |
| "logps/rejected": -53.35417175292969, | |
| "loss": 0.186, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.01459770742803812, | |
| "rewards/margins": 3.5722241401672363, | |
| "rewards/rejected": -3.586822032928467, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.338633158097192, | |
| "eval_logits/chosen": -1.1196925640106201, | |
| "eval_logits/rejected": -1.1241984367370605, | |
| "eval_logps/chosen": -49.20341873168945, | |
| "eval_logps/rejected": -64.5632095336914, | |
| "eval_loss": 0.23692870140075684, | |
| "eval_rewards/accuracies": 0.8352534770965576, | |
| "eval_rewards/chosen": -0.28499341011047363, | |
| "eval_rewards/margins": 3.9500606060028076, | |
| "eval_rewards/rejected": -4.2350544929504395, | |
| "eval_runtime": 227.2284, | |
| "eval_samples_per_second": 7.631, | |
| "eval_steps_per_second": 1.91, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.3432491345044237, | |
| "grad_norm": 20.781517687408783, | |
| "learning_rate": 1.4681367654592446e-07, | |
| "logits/chosen": -1.1334997415542603, | |
| "logits/rejected": -1.1324316263198853, | |
| "logps/chosen": -51.77867889404297, | |
| "logps/rejected": -59.6033935546875, | |
| "loss": 0.1616, | |
| "rewards/accuracies": 0.9444444179534912, | |
| "rewards/chosen": -0.17863652110099792, | |
| "rewards/margins": 3.280036449432373, | |
| "rewards/rejected": -3.4586730003356934, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.3478651109116553, | |
| "grad_norm": 23.414987494186462, | |
| "learning_rate": 1.4498039777620353e-07, | |
| "logits/chosen": -1.1315072774887085, | |
| "logits/rejected": -1.1402992010116577, | |
| "logps/chosen": -56.817928314208984, | |
| "logps/rejected": -78.69415283203125, | |
| "loss": 0.1822, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.08201665431261063, | |
| "rewards/margins": 4.948797702789307, | |
| "rewards/rejected": -4.866781234741211, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.352481087318887, | |
| "grad_norm": 29.704044318217893, | |
| "learning_rate": 1.4315395108479728e-07, | |
| "logits/chosen": -1.170173168182373, | |
| "logits/rejected": -1.1773362159729004, | |
| "logps/chosen": -49.731021881103516, | |
| "logps/rejected": -67.60116577148438, | |
| "loss": 0.1818, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.5396385788917542, | |
| "rewards/margins": 3.7138872146606445, | |
| "rewards/rejected": -4.253525733947754, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.3570970637261186, | |
| "grad_norm": 26.692588714788616, | |
| "learning_rate": 1.4133445529168365e-07, | |
| "logits/chosen": -1.1388497352600098, | |
| "logits/rejected": -1.1425156593322754, | |
| "logps/chosen": -54.95615005493164, | |
| "logps/rejected": -69.78382873535156, | |
| "loss": 0.1626, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.5585896968841553, | |
| "rewards/margins": 4.065035820007324, | |
| "rewards/rejected": -4.623625755310059, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3617130401333504, | |
| "grad_norm": 17.240579232471294, | |
| "learning_rate": 1.395220287646483e-07, | |
| "logits/chosen": -1.1489366292953491, | |
| "logits/rejected": -1.1541228294372559, | |
| "logps/chosen": -52.8204345703125, | |
| "logps/rejected": -65.34224700927734, | |
| "loss": 0.1726, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.6690115332603455, | |
| "rewards/margins": 3.6602673530578613, | |
| "rewards/rejected": -4.329278945922852, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3663290165405821, | |
| "grad_norm": 26.542897065871216, | |
| "learning_rate": 1.377167894115837e-07, | |
| "logits/chosen": -1.092912197113037, | |
| "logits/rejected": -1.1030445098876953, | |
| "logps/chosen": -45.3072509765625, | |
| "logps/rejected": -80.26785278320312, | |
| "loss": 0.1874, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.4750543236732483, | |
| "rewards/margins": 4.889781475067139, | |
| "rewards/rejected": -5.3648362159729, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.370944992947814, | |
| "grad_norm": 14.292615359684781, | |
| "learning_rate": 1.3591885467281877e-07, | |
| "logits/chosen": -1.2319241762161255, | |
| "logits/rejected": -1.2370344400405884, | |
| "logps/chosen": -47.54234313964844, | |
| "logps/rejected": -69.16310119628906, | |
| "loss": 0.175, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": -0.6050369739532471, | |
| "rewards/margins": 4.743907451629639, | |
| "rewards/rejected": -5.348944664001465, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3755609693550455, | |
| "grad_norm": 24.44530683273813, | |
| "learning_rate": 1.3412834151347896e-07, | |
| "logits/chosen": -1.1558417081832886, | |
| "logits/rejected": -1.1581149101257324, | |
| "logps/chosen": -51.461524963378906, | |
| "logps/rejected": -66.95028686523438, | |
| "loss": 0.1852, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.5921568274497986, | |
| "rewards/margins": 4.088617324829102, | |
| "rewards/rejected": -4.680773735046387, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3801769457622772, | |
| "grad_norm": 20.235949045232623, | |
| "learning_rate": 1.323453664158769e-07, | |
| "logits/chosen": -1.15675687789917, | |
| "logits/rejected": -1.1713188886642456, | |
| "logps/chosen": -47.177574157714844, | |
| "logps/rejected": -76.56927490234375, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": -0.8099576234817505, | |
| "rewards/margins": 4.676289081573486, | |
| "rewards/rejected": -5.486246585845947, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3847929221695088, | |
| "grad_norm": 17.33444042750883, | |
| "learning_rate": 1.3057004537193422e-07, | |
| "logits/chosen": -1.178117036819458, | |
| "logits/rejected": -1.1799274682998657, | |
| "logps/chosen": -53.55020523071289, | |
| "logps/rejected": -62.39503860473633, | |
| "loss": 0.1845, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": -0.5546784996986389, | |
| "rewards/margins": 4.285789966583252, | |
| "rewards/rejected": -4.840468883514404, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3847929221695088, | |
| "eval_logits/chosen": -1.1150095462799072, | |
| "eval_logits/rejected": -1.1200028657913208, | |
| "eval_logps/chosen": -49.0994873046875, | |
| "eval_logps/rejected": -64.55924987792969, | |
| "eval_loss": 0.23688365519046783, | |
| "eval_rewards/accuracies": 0.8312212228775024, | |
| "eval_rewards/chosen": -0.2330285757780075, | |
| "eval_rewards/margins": 4.000042915344238, | |
| "eval_rewards/rejected": -4.2330708503723145, | |
| "eval_runtime": 227.4951, | |
| "eval_samples_per_second": 7.622, | |
| "eval_steps_per_second": 1.908, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3894088985767405, | |
| "grad_norm": 37.7777346930926, | |
| "learning_rate": 1.2880249387563662e-07, | |
| "logits/chosen": -1.1187705993652344, | |
| "logits/rejected": -1.1262003183364868, | |
| "logps/chosen": -51.49440002441406, | |
| "logps/rejected": -74.16474914550781, | |
| "loss": 0.1865, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.5179623365402222, | |
| "rewards/margins": 4.877338409423828, | |
| "rewards/rejected": -5.395299911499023, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3940248749839723, | |
| "grad_norm": 9.235672719669628, | |
| "learning_rate": 1.2704282691551938e-07, | |
| "logits/chosen": -1.116797924041748, | |
| "logits/rejected": -1.1314423084259033, | |
| "logps/chosen": -47.851707458496094, | |
| "logps/rejected": -79.61589813232422, | |
| "loss": 0.1526, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.13351619243621826, | |
| "rewards/margins": 5.681245803833008, | |
| "rewards/rejected": -5.5477294921875, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.398640851391204, | |
| "grad_norm": 22.69345823971204, | |
| "learning_rate": 1.2529115896718714e-07, | |
| "logits/chosen": -1.1550525426864624, | |
| "logits/rejected": -1.158216118812561, | |
| "logps/chosen": -52.753231048583984, | |
| "logps/rejected": -61.50723648071289, | |
| "loss": 0.1924, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": -0.3491640090942383, | |
| "rewards/margins": 3.678175926208496, | |
| "rewards/rejected": -4.027340412139893, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.4032568277984356, | |
| "grad_norm": 18.832219757797976, | |
| "learning_rate": 1.2354760398586708e-07, | |
| "logits/chosen": -1.0966627597808838, | |
| "logits/rejected": -1.108534574508667, | |
| "logps/chosen": -55.49364471435547, | |
| "logps/rejected": -82.76998901367188, | |
| "loss": 0.1459, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": -0.10722073167562485, | |
| "rewards/margins": 5.6869659423828125, | |
| "rewards/rejected": -5.794186592102051, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.4078728042056674, | |
| "grad_norm": 14.9525121487636, | |
| "learning_rate": 1.2181227539899468e-07, | |
| "logits/chosen": -1.1296883821487427, | |
| "logits/rejected": -1.1345244646072388, | |
| "logps/chosen": -52.32283020019531, | |
| "logps/rejected": -67.40359497070312, | |
| "loss": 0.1857, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.009007109329104424, | |
| "rewards/margins": 4.094158172607422, | |
| "rewards/rejected": -4.103165626525879, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.412488780612899, | |
| "grad_norm": 14.336076086644082, | |
| "learning_rate": 1.2008528609883557e-07, | |
| "logits/chosen": -1.1148794889450073, | |
| "logits/rejected": -1.125455379486084, | |
| "logps/chosen": -54.79003143310547, | |
| "logps/rejected": -75.44340515136719, | |
| "loss": 0.1438, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.29885244369506836, | |
| "rewards/margins": 5.630979537963867, | |
| "rewards/rejected": -5.332127571105957, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.4171047570201307, | |
| "grad_norm": 19.871071765511616, | |
| "learning_rate": 1.1836674843514042e-07, | |
| "logits/chosen": -1.1470178365707397, | |
| "logits/rejected": -1.1546311378479004, | |
| "logps/chosen": -43.762474060058594, | |
| "logps/rejected": -64.04544830322266, | |
| "loss": 0.1737, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.18334037065505981, | |
| "rewards/margins": 4.691874027252197, | |
| "rewards/rejected": -4.508533477783203, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.4217207334273625, | |
| "grad_norm": 12.336389870346276, | |
| "learning_rate": 1.1665677420783671e-07, | |
| "logits/chosen": -1.1334505081176758, | |
| "logits/rejected": -1.1354079246520996, | |
| "logps/chosen": -48.85646057128906, | |
| "logps/rejected": -58.98961639404297, | |
| "loss": 0.1774, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.5762940645217896, | |
| "rewards/margins": 4.42154598236084, | |
| "rewards/rejected": -3.84525203704834, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.4263367098345943, | |
| "grad_norm": 26.99839545918975, | |
| "learning_rate": 1.149554746597553e-07, | |
| "logits/chosen": -1.153773546218872, | |
| "logits/rejected": -1.1617780923843384, | |
| "logps/chosen": -51.52302551269531, | |
| "logps/rejected": -71.93586730957031, | |
| "loss": 0.2209, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": -0.06947656720876694, | |
| "rewards/margins": 4.659074783325195, | |
| "rewards/rejected": -4.728551387786865, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.4309526862418258, | |
| "grad_norm": 23.120670434819147, | |
| "learning_rate": 1.1326296046939333e-07, | |
| "logits/chosen": -1.2297728061676025, | |
| "logits/rejected": -1.2327196598052979, | |
| "logps/chosen": -46.15282440185547, | |
| "logps/rejected": -57.13591766357422, | |
| "loss": 0.2511, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.22867631912231445, | |
| "rewards/margins": 3.8101589679718018, | |
| "rewards/rejected": -3.5814828872680664, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.4309526862418258, | |
| "eval_logits/chosen": -1.1074106693267822, | |
| "eval_logits/rejected": -1.1129966974258423, | |
| "eval_logps/chosen": -48.4957389831543, | |
| "eval_logps/rejected": -64.15252685546875, | |
| "eval_loss": 0.23753519356250763, | |
| "eval_rewards/accuracies": 0.8341013789176941, | |
| "eval_rewards/chosen": 0.06884526461362839, | |
| "eval_rewards/margins": 4.098559379577637, | |
| "eval_rewards/rejected": -4.029714107513428, | |
| "eval_runtime": 227.1817, | |
| "eval_samples_per_second": 7.633, | |
| "eval_steps_per_second": 1.91, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.4355686626490576, | |
| "grad_norm": 19.372274896588006, | |
| "learning_rate": 1.1157934174371413e-07, | |
| "logits/chosen": -1.1408151388168335, | |
| "logits/rejected": -1.15198814868927, | |
| "logps/chosen": -50.5245361328125, | |
| "logps/rejected": -73.73757934570312, | |
| "loss": 0.1935, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.12286674976348877, | |
| "rewards/margins": 4.926203727722168, | |
| "rewards/rejected": -4.803337574005127, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.4401846390562894, | |
| "grad_norm": 17.679599039225316, | |
| "learning_rate": 1.0990472801098419e-07, | |
| "logits/chosen": -1.1729627847671509, | |
| "logits/rejected": -1.1764029264450073, | |
| "logps/chosen": -45.59742736816406, | |
| "logps/rejected": -67.27173614501953, | |
| "loss": 0.1504, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.4040929973125458, | |
| "rewards/margins": 4.669921398162842, | |
| "rewards/rejected": -4.265828609466553, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.444800615463521, | |
| "grad_norm": 21.626789388307476, | |
| "learning_rate": 1.0823922821364795e-07, | |
| "logits/chosen": -1.0937749147415161, | |
| "logits/rejected": -1.0970505475997925, | |
| "logps/chosen": -57.45598602294922, | |
| "logps/rejected": -64.92636108398438, | |
| "loss": 0.1922, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.20599400997161865, | |
| "rewards/margins": 4.274693489074707, | |
| "rewards/rejected": -4.068699836730957, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.4494165918707527, | |
| "grad_norm": 22.846453164687816, | |
| "learning_rate": 1.0658295070124026e-07, | |
| "logits/chosen": -1.1624855995178223, | |
| "logits/rejected": -1.1643033027648926, | |
| "logps/chosen": -54.79329299926758, | |
| "logps/rejected": -61.6580810546875, | |
| "loss": 0.2111, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.43274542689323425, | |
| "rewards/margins": 4.024546146392822, | |
| "rewards/rejected": -3.5918006896972656, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.4540325682779844, | |
| "grad_norm": 35.15148920852289, | |
| "learning_rate": 1.0493600322333762e-07, | |
| "logits/chosen": -1.1498773097991943, | |
| "logits/rejected": -1.1634445190429688, | |
| "logps/chosen": -50.85679626464844, | |
| "logps/rejected": -84.39616394042969, | |
| "loss": 0.167, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": -0.023858733475208282, | |
| "rewards/margins": 6.123683929443359, | |
| "rewards/rejected": -6.147542953491211, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.458648544685216, | |
| "grad_norm": 30.044775332799237, | |
| "learning_rate": 1.0329849292254883e-07, | |
| "logits/chosen": -1.0466785430908203, | |
| "logits/rejected": -1.0532869100570679, | |
| "logps/chosen": -50.80934143066406, | |
| "logps/rejected": -70.91539001464844, | |
| "loss": 0.2205, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.08874227106571198, | |
| "rewards/margins": 4.631314277648926, | |
| "rewards/rejected": -4.542571544647217, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4632645210924478, | |
| "grad_norm": 27.395602840016966, | |
| "learning_rate": 1.0167052632754458e-07, | |
| "logits/chosen": -1.1735262870788574, | |
| "logits/rejected": -1.1718860864639282, | |
| "logps/chosen": -46.975406646728516, | |
| "logps/rejected": -56.96874237060547, | |
| "loss": 0.2108, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.012903611175715923, | |
| "rewards/margins": 3.0641860961914062, | |
| "rewards/rejected": -3.0512828826904297, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.4678804974996795, | |
| "grad_norm": 19.703963008444365, | |
| "learning_rate": 1.0005220934612713e-07, | |
| "logits/chosen": -1.0792059898376465, | |
| "logits/rejected": -1.0779961347579956, | |
| "logps/chosen": -54.955718994140625, | |
| "logps/rejected": -60.88299560546875, | |
| "loss": 0.1733, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.10138123482465744, | |
| "rewards/margins": 3.779219627380371, | |
| "rewards/rejected": -3.6778385639190674, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.472496473906911, | |
| "grad_norm": 26.332520125277515, | |
| "learning_rate": 9.844364725834056e-08, | |
| "logits/chosen": -1.1881197690963745, | |
| "logits/rejected": -1.201986312866211, | |
| "logps/chosen": -51.59233093261719, | |
| "logps/rejected": -88.54263305664062, | |
| "loss": 0.1426, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.19230936467647552, | |
| "rewards/margins": 6.799825668334961, | |
| "rewards/rejected": -6.607515811920166, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4771124503141428, | |
| "grad_norm": 28.45092893361144, | |
| "learning_rate": 9.68449447096217e-08, | |
| "logits/chosen": -1.2595468759536743, | |
| "logits/rejected": -1.2635498046875, | |
| "logps/chosen": -45.43921661376953, | |
| "logps/rejected": -59.12704849243164, | |
| "loss": 0.3573, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": 0.18244637548923492, | |
| "rewards/margins": 3.4541211128234863, | |
| "rewards/rejected": -3.271674394607544, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4771124503141428, | |
| "eval_logits/chosen": -1.1004152297973633, | |
| "eval_logits/rejected": -1.1063543558120728, | |
| "eval_logps/chosen": -48.19259262084961, | |
| "eval_logps/rejected": -63.969642639160156, | |
| "eval_loss": 0.23769782483577728, | |
| "eval_rewards/accuracies": 0.8346773982048035, | |
| "eval_rewards/chosen": 0.22041727602481842, | |
| "eval_rewards/margins": 4.15868616104126, | |
| "eval_rewards/rejected": -3.9382688999176025, | |
| "eval_runtime": 227.6304, | |
| "eval_samples_per_second": 7.618, | |
| "eval_steps_per_second": 1.907, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4817284267213746, | |
| "grad_norm": 24.31958981085731, | |
| "learning_rate": 9.525620570399259e-08, | |
| "logits/chosen": -1.1648564338684082, | |
| "logits/rejected": -1.1766667366027832, | |
| "logps/chosen": -50.69769287109375, | |
| "logps/rejected": -73.31265258789062, | |
| "loss": 0.138, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.1818230152130127, | |
| "rewards/margins": 4.770647048950195, | |
| "rewards/rejected": -4.5888237953186035, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4863444031286062, | |
| "grad_norm": 16.147422378148644, | |
| "learning_rate": 9.36775335972943e-08, | |
| "logits/chosen": -1.1746997833251953, | |
| "logits/rejected": -1.2103776931762695, | |
| "logps/chosen": -45.329200744628906, | |
| "logps/rejected": -116.44515228271484, | |
| "loss": 0.166, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.2069842666387558, | |
| "rewards/margins": 9.259431838989258, | |
| "rewards/rejected": -9.052447319030762, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.490960379535838, | |
| "grad_norm": 26.623831420383585, | |
| "learning_rate": 9.210903109046284e-08, | |
| "logits/chosen": -1.186785340309143, | |
| "logits/rejected": -1.1978414058685303, | |
| "logps/chosen": -49.64828872680664, | |
| "logps/rejected": -75.10488891601562, | |
| "loss": 0.1953, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": -0.27270856499671936, | |
| "rewards/margins": 5.708976745605469, | |
| "rewards/rejected": -5.981686115264893, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4955763559430697, | |
| "grad_norm": 20.261642357856545, | |
| "learning_rate": 9.05508002228485e-08, | |
| "logits/chosen": -1.1485164165496826, | |
| "logits/rejected": -1.1560351848602295, | |
| "logps/chosen": -43.76812744140625, | |
| "logps/rejected": -61.48163604736328, | |
| "loss": 0.2165, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.30232900381088257, | |
| "rewards/margins": 4.651912689208984, | |
| "rewards/rejected": -4.349584579467773, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.5001923323503012, | |
| "grad_norm": 16.434029065803497, | |
| "learning_rate": 8.900294236557707e-08, | |
| "logits/chosen": -1.1660001277923584, | |
| "logits/rejected": -1.1689536571502686, | |
| "logps/chosen": -43.022518157958984, | |
| "logps/rejected": -55.89129638671875, | |
| "loss": 0.2104, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.252067506313324, | |
| "rewards/margins": 3.607164144515991, | |
| "rewards/rejected": -3.3550968170166016, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.504808308757533, | |
| "grad_norm": 17.153074627752197, | |
| "learning_rate": 8.746555821495561e-08, | |
| "logits/chosen": -1.1337089538574219, | |
| "logits/rejected": -1.149064540863037, | |
| "logps/chosen": -49.65050506591797, | |
| "logps/rejected": -73.29889678955078, | |
| "loss": 0.168, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.3097386956214905, | |
| "rewards/margins": 5.331449031829834, | |
| "rewards/rejected": -5.0217108726501465, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.5094242851647648, | |
| "grad_norm": 23.56988440345321, | |
| "learning_rate": 8.593874778592122e-08, | |
| "logits/chosen": -1.1784387826919556, | |
| "logits/rejected": -1.1783757209777832, | |
| "logps/chosen": -43.02362823486328, | |
| "logps/rejected": -56.035400390625, | |
| "loss": 0.1716, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.2998148500919342, | |
| "rewards/margins": 3.706061840057373, | |
| "rewards/rejected": -3.4062466621398926, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.5140402615719966, | |
| "grad_norm": 24.44217268761521, | |
| "learning_rate": 8.442261040553472e-08, | |
| "logits/chosen": -1.1735872030258179, | |
| "logits/rejected": -1.1768840551376343, | |
| "logps/chosen": -50.59683609008789, | |
| "logps/rejected": -56.11674880981445, | |
| "loss": 0.1561, | |
| "rewards/accuracies": 0.9444444179534912, | |
| "rewards/chosen": 0.28288352489471436, | |
| "rewards/margins": 3.881596088409424, | |
| "rewards/rejected": -3.59871244430542, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.518656237979228, | |
| "grad_norm": 32.02127603424348, | |
| "learning_rate": 8.291724470651903e-08, | |
| "logits/chosen": -1.1492629051208496, | |
| "logits/rejected": -1.1569753885269165, | |
| "logps/chosen": -51.28215026855469, | |
| "logps/rejected": -65.28805541992188, | |
| "loss": 0.2696, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": -0.23993118107318878, | |
| "rewards/margins": 3.802645444869995, | |
| "rewards/rejected": -4.042576313018799, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.5232722143864597, | |
| "grad_norm": 16.1030974632108, | |
| "learning_rate": 8.14227486208423e-08, | |
| "logits/chosen": -1.2377209663391113, | |
| "logits/rejected": -1.2406808137893677, | |
| "logps/chosen": -45.322593688964844, | |
| "logps/rejected": -61.13237762451172, | |
| "loss": 0.1727, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.44821304082870483, | |
| "rewards/margins": 4.551301956176758, | |
| "rewards/rejected": -4.103089332580566, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.5232722143864597, | |
| "eval_logits/chosen": -1.106886386871338, | |
| "eval_logits/rejected": -1.1123414039611816, | |
| "eval_logps/chosen": -48.28040313720703, | |
| "eval_logps/rejected": -64.126708984375, | |
| "eval_loss": 0.23655745387077332, | |
| "eval_rewards/accuracies": 0.8346773982048035, | |
| "eval_rewards/chosen": 0.17651285231113434, | |
| "eval_rewards/margins": 4.193314552307129, | |
| "eval_rewards/rejected": -4.016801834106445, | |
| "eval_runtime": 227.3008, | |
| "eval_samples_per_second": 7.629, | |
| "eval_steps_per_second": 1.909, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.5278881907936914, | |
| "grad_norm": 26.89012407667588, | |
| "learning_rate": 7.993921937334716e-08, | |
| "logits/chosen": -1.1547244787216187, | |
| "logits/rejected": -1.1549021005630493, | |
| "logps/chosen": -48.07485580444336, | |
| "logps/rejected": -55.9273567199707, | |
| "loss": 0.2437, | |
| "rewards/accuracies": 0.8055555820465088, | |
| "rewards/chosen": 0.16911821067333221, | |
| "rewards/margins": 3.7287042140960693, | |
| "rewards/rejected": -3.5595860481262207, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.5325041672009232, | |
| "grad_norm": 9.277145884274647, | |
| "learning_rate": 7.846675347542578e-08, | |
| "logits/chosen": -1.0987221002578735, | |
| "logits/rejected": -1.0982441902160645, | |
| "logps/chosen": -44.44068908691406, | |
| "logps/rejected": -55.658538818359375, | |
| "loss": 0.1239, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.7777649760246277, | |
| "rewards/margins": 4.145383834838867, | |
| "rewards/rejected": -3.3676185607910156, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.537120143608155, | |
| "grad_norm": 23.47472041668218, | |
| "learning_rate": 7.700544671874079e-08, | |
| "logits/chosen": -1.1225018501281738, | |
| "logits/rejected": -1.1206145286560059, | |
| "logps/chosen": -55.324520111083984, | |
| "logps/rejected": -59.72848892211914, | |
| "loss": 0.1971, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.16201752424240112, | |
| "rewards/margins": 3.718522071838379, | |
| "rewards/rejected": -3.556504487991333, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.5417361200153867, | |
| "grad_norm": 36.079809765281745, | |
| "learning_rate": 7.555539416899437e-08, | |
| "logits/chosen": -1.1977320909500122, | |
| "logits/rejected": -1.2057994604110718, | |
| "logps/chosen": -42.88581848144531, | |
| "logps/rejected": -62.64973831176758, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.011536385864019394, | |
| "rewards/margins": 4.496410369873047, | |
| "rewards/rejected": -4.484873294830322, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.5463520964226183, | |
| "grad_norm": 24.671849788858164, | |
| "learning_rate": 7.41166901597429e-08, | |
| "logits/chosen": -1.115236520767212, | |
| "logits/rejected": -1.117641806602478, | |
| "logps/chosen": -48.77861785888672, | |
| "logps/rejected": -63.02436065673828, | |
| "loss": 0.1757, | |
| "rewards/accuracies": 0.9027777910232544, | |
| "rewards/chosen": 0.31542646884918213, | |
| "rewards/margins": 4.368900775909424, | |
| "rewards/rejected": -4.053474426269531, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.5509680728298498, | |
| "grad_norm": 22.124151348706462, | |
| "learning_rate": 7.268942828626046e-08, | |
| "logits/chosen": -1.201170802116394, | |
| "logits/rejected": -1.2061718702316284, | |
| "logps/chosen": -46.17606735229492, | |
| "logps/rejected": -60.99459457397461, | |
| "loss": 0.2017, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.4047514796257019, | |
| "rewards/margins": 4.501495361328125, | |
| "rewards/rejected": -4.096743583679199, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.5555840492370816, | |
| "grad_norm": 24.405076004524442, | |
| "learning_rate": 7.127370139945018e-08, | |
| "logits/chosen": -1.1625523567199707, | |
| "logits/rejected": -1.1654243469238281, | |
| "logps/chosen": -48.26844024658203, | |
| "logps/rejected": -65.458740234375, | |
| "loss": 0.1711, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.4592117667198181, | |
| "rewards/margins": 4.522068500518799, | |
| "rewards/rejected": -4.062856674194336, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.5602000256443134, | |
| "grad_norm": 18.612456705571446, | |
| "learning_rate": 6.986960159980326e-08, | |
| "logits/chosen": -1.1604725122451782, | |
| "logits/rejected": -1.16167414188385, | |
| "logps/chosen": -50.374324798583984, | |
| "logps/rejected": -61.40808868408203, | |
| "loss": 0.1842, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.5715973377227783, | |
| "rewards/margins": 3.9924135208129883, | |
| "rewards/rejected": -3.42081618309021, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5648160020515451, | |
| "grad_norm": 43.48667538563028, | |
| "learning_rate": 6.847722023140776e-08, | |
| "logits/chosen": -1.2388062477111816, | |
| "logits/rejected": -1.234206199645996, | |
| "logps/chosen": -44.29112243652344, | |
| "logps/rejected": -51.78242492675781, | |
| "loss": 0.2001, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.6832877993583679, | |
| "rewards/margins": 3.618730306625366, | |
| "rewards/rejected": -2.9354422092437744, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.569431978458777, | |
| "grad_norm": 28.795198035373833, | |
| "learning_rate": 6.709664787600616e-08, | |
| "logits/chosen": -1.209147572517395, | |
| "logits/rejected": -1.2093759775161743, | |
| "logps/chosen": -42.70883560180664, | |
| "logps/rejected": -50.379886627197266, | |
| "loss": 0.2779, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.21883456408977509, | |
| "rewards/margins": 2.795126438140869, | |
| "rewards/rejected": -2.576291799545288, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.569431978458777, | |
| "eval_logits/chosen": -1.118242621421814, | |
| "eval_logits/rejected": -1.1229368448257446, | |
| "eval_logps/chosen": -47.97200012207031, | |
| "eval_logps/rejected": -63.929046630859375, | |
| "eval_loss": 0.2367607206106186, | |
| "eval_rewards/accuracies": 0.8312212228775024, | |
| "eval_rewards/chosen": 0.33071503043174744, | |
| "eval_rewards/margins": 4.248687744140625, | |
| "eval_rewards/rejected": -3.9179720878601074, | |
| "eval_runtime": 227.4731, | |
| "eval_samples_per_second": 7.623, | |
| "eval_steps_per_second": 1.908, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5740479548660085, | |
| "grad_norm": 22.618944291226985, | |
| "learning_rate": 6.572797434710219e-08, | |
| "logits/chosen": -1.2124552726745605, | |
| "logits/rejected": -1.2253483533859253, | |
| "logps/chosen": -45.591331481933594, | |
| "logps/rejected": -77.16690063476562, | |
| "loss": 0.1948, | |
| "rewards/accuracies": 0.8611111044883728, | |
| "rewards/chosen": 0.740224301815033, | |
| "rewards/margins": 5.656050682067871, | |
| "rewards/rejected": -4.91582727432251, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.57866393127324, | |
| "grad_norm": 17.206603905842275, | |
| "learning_rate": 6.437128868411856e-08, | |
| "logits/chosen": -1.1554473638534546, | |
| "logits/rejected": -1.1552696228027344, | |
| "logps/chosen": -45.49040222167969, | |
| "logps/rejected": -53.40934753417969, | |
| "loss": 0.2204, | |
| "rewards/accuracies": 0.8194444179534912, | |
| "rewards/chosen": 0.516470193862915, | |
| "rewards/margins": 3.7008235454559326, | |
| "rewards/rejected": -3.1843535900115967, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5832799076804718, | |
| "grad_norm": 14.245504042199357, | |
| "learning_rate": 6.302667914660384e-08, | |
| "logits/chosen": -1.1610139608383179, | |
| "logits/rejected": -1.1691855192184448, | |
| "logps/chosen": -41.98570251464844, | |
| "logps/rejected": -61.45694351196289, | |
| "loss": 0.2166, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.34538733959198, | |
| "rewards/margins": 4.076152324676514, | |
| "rewards/rejected": -3.7307653427124023, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5878958840877035, | |
| "grad_norm": 25.659568066978697, | |
| "learning_rate": 6.169423320849112e-08, | |
| "logits/chosen": -1.139572024345398, | |
| "logits/rejected": -1.1325372457504272, | |
| "logps/chosen": -52.09668731689453, | |
| "logps/rejected": -52.88841247558594, | |
| "loss": 0.1753, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.5496838092803955, | |
| "rewards/margins": 3.8007943630218506, | |
| "rewards/rejected": -3.251110553741455, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5925118604949353, | |
| "grad_norm": 23.501661963559606, | |
| "learning_rate": 6.037403755240748e-08, | |
| "logits/chosen": -1.1798467636108398, | |
| "logits/rejected": -1.1862027645111084, | |
| "logps/chosen": -51.716487884521484, | |
| "logps/rejected": -67.60077667236328, | |
| "loss": 0.175, | |
| "rewards/accuracies": 0.9305555820465088, | |
| "rewards/chosen": 0.23453055322170258, | |
| "rewards/margins": 4.55530309677124, | |
| "rewards/rejected": -4.320772171020508, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.597127836902167, | |
| "grad_norm": 17.275740083868094, | |
| "learning_rate": 5.9066178064034326e-08, | |
| "logits/chosen": -1.1528249979019165, | |
| "logits/rejected": -1.1694761514663696, | |
| "logps/chosen": -38.17667770385742, | |
| "logps/rejected": -83.85364532470703, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.28412505984306335, | |
| "rewards/margins": 6.359869003295898, | |
| "rewards/rejected": -6.075743675231934, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.6017438133093986, | |
| "grad_norm": 26.873806928959727, | |
| "learning_rate": 5.777073982652064e-08, | |
| "logits/chosen": -1.1307650804519653, | |
| "logits/rejected": -1.132928490638733, | |
| "logps/chosen": -41.0783576965332, | |
| "logps/rejected": -60.499267578125, | |
| "loss": 0.2291, | |
| "rewards/accuracies": 0.8472222089767456, | |
| "rewards/chosen": 0.24884945154190063, | |
| "rewards/margins": 4.286365509033203, | |
| "rewards/rejected": -4.037516117095947, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.6063597897166302, | |
| "grad_norm": 32.76563481878735, | |
| "learning_rate": 5.6487807114947325e-08, | |
| "logits/chosen": -1.133086085319519, | |
| "logits/rejected": -1.149233102798462, | |
| "logps/chosen": -49.21199035644531, | |
| "logps/rejected": -82.91252899169922, | |
| "loss": 0.1834, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.2540854215621948, | |
| "rewards/margins": 5.426255226135254, | |
| "rewards/rejected": -5.172169208526611, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.610975766123862, | |
| "grad_norm": 30.041135081079663, | |
| "learning_rate": 5.521746339084532e-08, | |
| "logits/chosen": -1.1215559244155884, | |
| "logits/rejected": -1.130250334739685, | |
| "logps/chosen": -54.02631759643555, | |
| "logps/rejected": -67.99839782714844, | |
| "loss": 0.2581, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.2139756679534912, | |
| "rewards/margins": 4.321435928344727, | |
| "rewards/rejected": -4.107460021972656, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.6155917425310937, | |
| "grad_norm": 21.854488700327504, | |
| "learning_rate": 5.39597912967652e-08, | |
| "logits/chosen": -1.131272315979004, | |
| "logits/rejected": -1.1449000835418701, | |
| "logps/chosen": -45.570159912109375, | |
| "logps/rejected": -73.73873901367188, | |
| "loss": 0.1902, | |
| "rewards/accuracies": 0.8888888955116272, | |
| "rewards/chosen": 0.329825758934021, | |
| "rewards/margins": 5.084650039672852, | |
| "rewards/rejected": -4.754825115203857, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.6155917425310937, | |
| "eval_logits/chosen": -1.1056500673294067, | |
| "eval_logits/rejected": -1.1112511157989502, | |
| "eval_logps/chosen": -48.541831970214844, | |
| "eval_logps/rejected": -64.42839050292969, | |
| "eval_loss": 0.23584917187690735, | |
| "eval_rewards/accuracies": 0.8323732614517212, | |
| "eval_rewards/chosen": 0.04579799994826317, | |
| "eval_rewards/margins": 4.213436603546143, | |
| "eval_rewards/rejected": -4.167638778686523, | |
| "eval_runtime": 227.3674, | |
| "eval_samples_per_second": 7.626, | |
| "eval_steps_per_second": 1.909, | |
| "step": 700 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 866, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |