| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 405, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04938271604938271, | |
| "grad_norm": 0.24764642119407654, | |
| "learning_rate": 1.9984268150178167e-06, | |
| "loss": 0.4454, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09876543209876543, | |
| "grad_norm": 0.16292472183704376, | |
| "learning_rate": 1.976679926364567e-06, | |
| "loss": 0.4486, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 0.23981738090515137, | |
| "learning_rate": 1.9298944480495172e-06, | |
| "loss": 0.4176, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19753086419753085, | |
| "grad_norm": 0.25884947180747986, | |
| "learning_rate": 1.8592697928608701e-06, | |
| "loss": 0.4354, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.24691358024691357, | |
| "grad_norm": 0.24895012378692627, | |
| "learning_rate": 1.7666165250516003e-06, | |
| "loss": 0.4689, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 0.2755189836025238, | |
| "learning_rate": 1.6543099439299843e-06, | |
| "loss": 0.4721, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.345679012345679, | |
| "grad_norm": 0.24769575893878937, | |
| "learning_rate": 1.5252291896576213e-06, | |
| "loss": 0.4476, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3950617283950617, | |
| "grad_norm": 0.2432471662759781, | |
| "learning_rate": 1.3826834323650898e-06, | |
| "loss": 0.4631, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.2844541072845459, | |
| "learning_rate": 1.2303270368330437e-06, | |
| "loss": 0.4249, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.49382716049382713, | |
| "grad_norm": 0.30305707454681396, | |
| "learning_rate": 1.0720658776137296e-06, | |
| "loss": 0.4358, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5432098765432098, | |
| "grad_norm": 0.2243204116821289, | |
| "learning_rate": 9.119572063390548e-07, | |
| "loss": 0.4057, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 0.2798885405063629, | |
| "learning_rate": 7.541056382602656e-07, | |
| "loss": 0.46, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6419753086419753, | |
| "grad_norm": 0.2705991566181183, | |
| "learning_rate": 6.025579245533673e-07, | |
| "loss": 0.4226, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.691358024691358, | |
| "grad_norm": 0.30190983414649963, | |
| "learning_rate": 4.611992080530396e-07, | |
| "loss": 0.4285, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.3848233222961426, | |
| "learning_rate": 3.3365342204799606e-07, | |
| "loss": 0.4546, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7901234567901234, | |
| "grad_norm": 0.3378937244415283, | |
| "learning_rate": 2.231903855574101e-07, | |
| "loss": 0.4314, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8395061728395061, | |
| "grad_norm": 0.3310483396053314, | |
| "learning_rate": 1.3264197683414912e-07, | |
| "loss": 0.4168, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.3315976560115814, | |
| "learning_rate": 6.432953410719677e-08, | |
| "loss": 0.4389, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9382716049382716, | |
| "grad_norm": 0.29953354597091675, | |
| "learning_rate": 2.0004344749277036e-08, | |
| "loss": 0.4036, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 0.32311904430389404, | |
| "learning_rate": 8.027485151603786e-10, | |
| "loss": 0.4419, | |
| "step": 400 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 405, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 135, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.753047941650842e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |