| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.06802721088435375, | |
| "eval_steps": 500, | |
| "global_step": 20, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 136.75, | |
| "epoch": 0.006802721088435374, | |
| "grad_norm": 11.705904006958008, | |
| "kl": -1.4783778645721668e-08, | |
| "learning_rate": 4.965903258506806e-07, | |
| "loss": -0.0, | |
| "reward": 0.01928214728832245, | |
| "reward_std": 0.007542446808656678, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.01928214728832245, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "completion_length": 80.25, | |
| "epoch": 0.013605442176870748, | |
| "grad_norm": 36.70332717895508, | |
| "kl": 0.0010297351400367916, | |
| "learning_rate": 4.698684378016222e-07, | |
| "loss": 0.0, | |
| "reward": 0.13169971853494644, | |
| "reward_std": 0.055807461962103844, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.13169971853494644, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "completion_length": 377.0, | |
| "epoch": 0.02040816326530612, | |
| "grad_norm": 44.01548385620117, | |
| "kl": 0.004113578732358292, | |
| "learning_rate": 4.193203929064353e-07, | |
| "loss": 0.0, | |
| "reward": 0.02155294083058834, | |
| "reward_std": 0.011597397737205029, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.02155294083058834, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "completion_length": 71.0, | |
| "epoch": 0.027210884353741496, | |
| "grad_norm": 16.23434066772461, | |
| "kl": 0.015674981172196567, | |
| "learning_rate": 3.5042385616324236e-07, | |
| "loss": 0.0, | |
| "reward": 0.15787642821669579, | |
| "reward_std": 0.03962698602117598, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.15787642821669579, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "completion_length": 40.0, | |
| "epoch": 0.034013605442176874, | |
| "grad_norm": 35.57640838623047, | |
| "kl": 0.06608534790575504, | |
| "learning_rate": 2.706448363680831e-07, | |
| "loss": 0.0001, | |
| "reward": 0.07618423458188772, | |
| "reward_std": 0.04003768414258957, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.07618423458188772, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 64.0, | |
| "epoch": 0.04081632653061224, | |
| "grad_norm": 20.184534072875977, | |
| "kl": 0.0030925535247661173, | |
| "learning_rate": 1.886286282148002e-07, | |
| "loss": 0.0, | |
| "reward": 0.050030073150992393, | |
| "reward_std": 0.02135868975892663, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.050030073150992393, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 14.75, | |
| "epoch": 0.047619047619047616, | |
| "grad_norm": 56.291439056396484, | |
| "kl": 0.10641674045473337, | |
| "learning_rate": 1.1326296046939333e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1033460614271462, | |
| "reward_std": 0.06610357528552413, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.1033460614271462, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "completion_length": 56.0, | |
| "epoch": 0.05442176870748299, | |
| "grad_norm": 47.39197540283203, | |
| "kl": 0.011849562637507915, | |
| "learning_rate": 5.271487265090163e-08, | |
| "loss": 0.0, | |
| "reward": 0.056912059895694256, | |
| "reward_std": 0.045097901951521635, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.02666206005960703, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.030249999836087227, | |
| "step": 16 | |
| }, | |
| { | |
| "completion_length": 99.75, | |
| "epoch": 0.061224489795918366, | |
| "grad_norm": 28.913740158081055, | |
| "kl": 0.010408955160528421, | |
| "learning_rate": 1.3545689574841341e-08, | |
| "loss": 0.0, | |
| "reward": 0.09544003661721945, | |
| "reward_std": 0.024005468003451824, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.09544003661721945, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "completion_length": 121.5, | |
| "epoch": 0.06802721088435375, | |
| "grad_norm": 14.753103256225586, | |
| "kl": 0.006665305700153112, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "reward": 0.08100416325032711, | |
| "reward_std": 0.018220748752355576, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.08100416325032711, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06802721088435375, | |
| "step": 20, | |
| "total_flos": 0.0, | |
| "train_loss": 2.250373363494873e-05, | |
| "train_runtime": 473.0259, | |
| "train_samples_per_second": 0.085, | |
| "train_steps_per_second": 0.042 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 20, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |