xaobai's picture
End of training
c4d1abe verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 280.6875,
"epoch": 0.1,
"grad_norm": 53.05036163330078,
"kl": 0.0,
"learning_rate": 4.965903258506806e-07,
"loss": -0.0,
"reward": 3.6584795396775007,
"reward_std": 0.5753003612917382,
"rewards/concensus_correctness_reward_func": 1.011624988168478,
"rewards/consensus_reward_func": 1.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.656604582327418,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.171875,
"rewards/xmlcount_reward_func": 0.7558750007301569,
"step": 2
},
{
"completion_length": 171.03125,
"epoch": 0.2,
"grad_norm": 169.1420135498047,
"kl": 10.232354334380943,
"learning_rate": 4.698684378016222e-07,
"loss": 0.0102,
"reward": 6.453376889228821,
"reward_std": 0.14471963373944163,
"rewards/concensus_correctness_reward_func": 1.8348749801516533,
"rewards/consensus_reward_func": 2.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.9452207088470459,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.34375,
"rewards/xmlcount_reward_func": 1.2045312523841858,
"step": 4
},
{
"completion_length": 167.6875,
"epoch": 0.3,
"grad_norm": 273064.90625,
"kl": 468753.35245687794,
"learning_rate": 4.193203929064353e-07,
"loss": 468.7533,
"reward": 6.525232225656509,
"reward_std": 0.3649999493500218,
"rewards/concensus_correctness_reward_func": 2.006375003606081,
"rewards/consensus_reward_func": 1.8125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.8971072547137737,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.359375,
"rewards/xmlcount_reward_func": 1.1998750008642673,
"step": 6
},
{
"completion_length": 160.8125,
"epoch": 0.4,
"grad_norm": 167036304.0,
"kl": 3727724.481420556,
"learning_rate": 3.5042385616324236e-07,
"loss": 3727.7244,
"reward": 7.081181287765503,
"reward_std": 0.10486791450239252,
"rewards/concensus_correctness_reward_func": 2.170249997638166,
"rewards/consensus_reward_func": 2.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.375,
"rewards/question_recreation_reward_func": 0.9093687888234854,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.40625,
"rewards/xmlcount_reward_func": 1.2203124985098839,
"step": 8
},
{
"completion_length": 147.0625,
"epoch": 0.5,
"grad_norm": 4871.97412109375,
"kl": 539.2818314363249,
"learning_rate": 2.706448363680831e-07,
"loss": 0.5393,
"reward": 6.877287954092026,
"reward_std": 0.44364456451148726,
"rewards/concensus_correctness_reward_func": 2.1652499809861183,
"rewards/consensus_reward_func": 1.9375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.375,
"rewards/question_recreation_reward_func": 0.8499442553147674,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.328125,
"rewards/xmlcount_reward_func": 1.2214687392115593,
"step": 10
},
{
"completion_length": 175.53125,
"epoch": 0.6,
"grad_norm": 50525.3671875,
"kl": 2825.989122052677,
"learning_rate": 1.886286282148002e-07,
"loss": 2.826,
"reward": 6.0079357624053955,
"reward_std": 0.43696475913748145,
"rewards/concensus_correctness_reward_func": 1.7586249820888042,
"rewards/consensus_reward_func": 1.9375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.7420295961201191,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.359375,
"rewards/xmlcount_reward_func": 1.210406243801117,
"step": 12
},
{
"completion_length": 177.8125,
"epoch": 0.7,
"grad_norm": 505.6527404785156,
"kl": 1061.56522446312,
"learning_rate": 1.1326296046939333e-07,
"loss": 1.0616,
"reward": 6.597704261541367,
"reward_std": 0.4343329582188744,
"rewards/concensus_correctness_reward_func": 2.0284374840557575,
"rewards/consensus_reward_func": 1.8125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.8877355121076107,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.375,
"rewards/xmlcount_reward_func": 1.2440312504768372,
"step": 14
},
{
"completion_length": 144.71875,
"epoch": 0.8,
"grad_norm": 49.680084228515625,
"kl": 12.081052286550403,
"learning_rate": 5.271487265090163e-08,
"loss": 0.0121,
"reward": 6.665501236915588,
"reward_std": 0.5225161880371161,
"rewards/concensus_correctness_reward_func": 2.029374983161688,
"rewards/consensus_reward_func": 1.9375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.8862824384123087,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.390625,
"rewards/xmlcount_reward_func": 1.234218753874302,
"step": 16
},
{
"completion_length": 147.0625,
"epoch": 0.9,
"grad_norm": 730.200927734375,
"kl": 86.71727948682383,
"learning_rate": 1.3545689574841341e-08,
"loss": 0.0867,
"reward": 6.578728884458542,
"reward_std": 0.26941974822693737,
"rewards/concensus_correctness_reward_func": 2.021124981343746,
"rewards/consensus_reward_func": 1.75,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.3125,
"rewards/question_recreation_reward_func": 0.9322289191186428,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.359375,
"rewards/xmlcount_reward_func": 1.2034999914467335,
"step": 18
},
{
"completion_length": 142.25,
"epoch": 1.0,
"grad_norm": 13.476627349853516,
"kl": 7.980941329384223,
"learning_rate": 0.0,
"loss": 0.008,
"reward": 6.966006487607956,
"reward_std": 0.08144115762843285,
"rewards/concensus_correctness_reward_func": 2.1567499935626984,
"rewards/consensus_reward_func": 2.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.9232877704780549,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.421875,
"rewards/xmlcount_reward_func": 1.2140937522053719,
"step": 20
},
{
"epoch": 1.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 420.1021536417305,
"train_runtime": 134.2834,
"train_samples_per_second": 2.383,
"train_steps_per_second": 0.149
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}