{ "best_metric": 0.44274672865867615, "best_model_checkpoint": "saves/Llama-3.1-8B-Instruct/lora/saa-200/checkpoint-100", "epoch": 9.777777777777779, "eval_steps": 50, "global_step": 110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8888888888888888, "grad_norm": 7.16807222366333, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -0.41954341530799866, "logits/rejected": -0.4998183846473694, "logps/chosen": -1.7371799945831299, "logps/rejected": -2.1413609981536865, "loss": 1.7918, "odds_ratio_loss": 15.833269119262695, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.17371799051761627, "rewards/margins": 0.040418121963739395, "rewards/rejected": -0.21413612365722656, "sft_loss": 0.208452507853508, "step": 10 }, { "epoch": 1.7777777777777777, "grad_norm": 5.526947975158691, "learning_rate": 4.8987324340362445e-06, "logits/chosen": -0.40991297364234924, "logits/rejected": -0.5038880109786987, "logps/chosen": -1.6078698635101318, "logps/rejected": -2.1200649738311768, "loss": 1.6582, "odds_ratio_loss": 14.647314071655273, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.16078701615333557, "rewards/margins": 0.05121947452425957, "rewards/rejected": -0.21200647950172424, "sft_loss": 0.1935155689716339, "step": 20 }, { "epoch": 2.6666666666666665, "grad_norm": 5.686792850494385, "learning_rate": 4.559191453574582e-06, "logits/chosen": -0.3767138719558716, "logits/rejected": -0.4611291289329529, "logps/chosen": -1.556265115737915, "logps/rejected": -1.9789321422576904, "loss": 1.6106, "odds_ratio_loss": 14.2000732421875, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.15562652051448822, "rewards/margins": 0.04226671904325485, "rewards/rejected": -0.19789323210716248, "sft_loss": 0.19055578112602234, "step": 30 }, { "epoch": 3.5555555555555554, "grad_norm": 4.273503303527832, "learning_rate": 4.014024217844167e-06, "logits/chosen": -0.3970526158809662, "logits/rejected": -0.4816371500492096, "logps/chosen": -1.2386114597320557, "logps/rejected": -1.6725366115570068, "loss": 1.2891, "odds_ratio_loss": 11.406962394714355, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.12386113405227661, "rewards/margins": 0.043392546474933624, "rewards/rejected": -0.16725368797779083, "sft_loss": 0.14835821092128754, "step": 40 }, { "epoch": 4.444444444444445, "grad_norm": 4.05686616897583, "learning_rate": 3.3176699082935546e-06, "logits/chosen": -0.4154660701751709, "logits/rejected": -0.5044783353805542, "logps/chosen": -0.9328486323356628, "logps/rejected": -1.4596943855285645, "loss": 0.978, "odds_ratio_loss": 8.69324016571045, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09328486770391464, "rewards/margins": 0.05268458276987076, "rewards/rejected": -0.1459694653749466, "sft_loss": 0.10864432901144028, "step": 50 }, { "epoch": 4.444444444444445, "eval_logits/chosen": -0.38087520003318787, "eval_logits/rejected": -0.4789901673793793, "eval_logps/chosen": -0.800963282585144, "eval_logps/rejected": -1.4605594873428345, "eval_loss": 0.8368849754333496, "eval_odds_ratio_loss": 7.350626468658447, "eval_rewards/accuracies": 0.949999988079071, "eval_rewards/chosen": -0.08009632676839828, "eval_rewards/margins": 0.06595960259437561, "eval_rewards/rejected": -0.1460559368133545, "eval_runtime": 0.853, "eval_samples_per_second": 23.447, "eval_sft_loss": 0.10182241350412369, "eval_steps_per_second": 11.723, "step": 50 }, { "epoch": 5.333333333333333, "grad_norm": 4.957231521606445, "learning_rate": 2.53966490958702e-06, "logits/chosen": -0.38898375630378723, "logits/rejected": -0.4743614196777344, "logps/chosen": -0.871505618095398, "logps/rejected": -1.3147119283676147, "loss": 0.9207, "odds_ratio_loss": 8.203462600708008, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08715056627988815, "rewards/margins": 0.04432063177227974, "rewards/rejected": -0.1314712017774582, "sft_loss": 0.1003449410200119, "step": 60 }, { "epoch": 6.222222222222222, "grad_norm": 3.597581148147583, "learning_rate": 1.7576990616793139e-06, "logits/chosen": -0.3860725462436676, "logits/rejected": -0.486691415309906, "logps/chosen": -0.6816452145576477, "logps/rejected": -1.1765670776367188, "loss": 0.7275, "odds_ratio_loss": 6.550581455230713, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06816451996564865, "rewards/margins": 0.04949219152331352, "rewards/rejected": -0.11765670776367188, "sft_loss": 0.07243607193231583, "step": 70 }, { "epoch": 7.111111111111111, "grad_norm": 2.979541540145874, "learning_rate": 1.049857726072005e-06, "logits/chosen": -0.38262826204299927, "logits/rejected": -0.4803852140903473, "logps/chosen": -0.5585015416145325, "logps/rejected": -1.091475248336792, "loss": 0.6002, "odds_ratio_loss": 5.424932956695557, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05585014820098877, "rewards/margins": 0.05329737812280655, "rewards/rejected": -0.10914752632379532, "sft_loss": 0.057719189673662186, "step": 80 }, { "epoch": 8.0, "grad_norm": 3.6893234252929688, "learning_rate": 4.868243561723535e-07, "logits/chosen": -0.3842761516571045, "logits/rejected": -0.4856538772583008, "logps/chosen": -0.5467501878738403, "logps/rejected": -1.051489233970642, "loss": 0.5909, "odds_ratio_loss": 5.326608657836914, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.05467502027750015, "rewards/margins": 0.050473909825086594, "rewards/rejected": -0.10514893382787704, "sft_loss": 0.058202050626277924, "step": 90 }, { "epoch": 8.88888888888889, "grad_norm": 3.0479986667633057, "learning_rate": 1.2482220564763669e-07, "logits/chosen": -0.4013673663139343, "logits/rejected": -0.4972509741783142, "logps/chosen": -0.5072588324546814, "logps/rejected": -1.0165048837661743, "loss": 0.5508, "odds_ratio_loss": 4.971884727478027, "rewards/accuracies": 0.78125, "rewards/chosen": -0.05072588473558426, "rewards/margins": 0.05092460662126541, "rewards/rejected": -0.10165047645568848, "sft_loss": 0.05361882969737053, "step": 100 }, { "epoch": 8.88888888888889, "eval_logits/chosen": -0.36703231930732727, "eval_logits/rejected": -0.46598997712135315, "eval_logps/chosen": -0.4136781096458435, "eval_logps/rejected": -1.1247258186340332, "eval_loss": 0.44274672865867615, "eval_odds_ratio_loss": 3.9738361835479736, "eval_rewards/accuracies": 0.8999999761581421, "eval_rewards/chosen": -0.04136781021952629, "eval_rewards/margins": 0.07110477983951569, "eval_rewards/rejected": -0.11247257888317108, "eval_runtime": 0.8395, "eval_samples_per_second": 23.823, "eval_sft_loss": 0.04536313936114311, "eval_steps_per_second": 11.912, "step": 100 }, { "epoch": 9.777777777777779, "grad_norm": 3.0762059688568115, "learning_rate": 0.0, "logits/chosen": -0.3940914571285248, "logits/rejected": -0.48527806997299194, "logps/chosen": -0.5214211940765381, "logps/rejected": -1.0149188041687012, "loss": 0.5639, "odds_ratio_loss": 5.071834087371826, "rewards/accuracies": 0.78125, "rewards/chosen": -0.05214212089776993, "rewards/margins": 0.049349766224622726, "rewards/rejected": -0.10149188339710236, "sft_loss": 0.056711114943027496, "step": 110 }, { "epoch": 9.777777777777779, "step": 110, "total_flos": 2.0090891645485056e+16, "train_loss": 1.0255976850336248, "train_runtime": 677.2548, "train_samples_per_second": 2.658, "train_steps_per_second": 0.162 } ], "logging_steps": 10, "max_steps": 110, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0090891645485056e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }