| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.977777777777778, | |
| "eval_steps": 500, | |
| "global_step": 336, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 7.167409910611822, | |
| "learning_rate": 2.647058823529412e-06, | |
| "loss": 1.2231, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 4.696067274221341, | |
| "learning_rate": 5.588235294117647e-06, | |
| "loss": 0.5355, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 4.647866634012019, | |
| "learning_rate": 8.529411764705883e-06, | |
| "loss": 0.3681, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 3.502678393313082, | |
| "learning_rate": 9.993238112138584e-06, | |
| "loss": 0.2883, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 3.136719993091253, | |
| "learning_rate": 9.93925269528518e-06, | |
| "loss": 0.2551, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 2.896729495452708, | |
| "learning_rate": 9.831865535973103e-06, | |
| "loss": 0.2418, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 2.7145713950099917, | |
| "learning_rate": 9.672237672476506e-06, | |
| "loss": 0.251, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 4.448604511868027, | |
| "learning_rate": 9.4620949541458e-06, | |
| "loss": 0.2291, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 3.2372832664069597, | |
| "learning_rate": 9.203709382033814e-06, | |
| "loss": 0.2617, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 2.1165080061598265, | |
| "learning_rate": 8.899874544690921e-06, | |
| "loss": 0.2207, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 2.273219832997235, | |
| "learning_rate": 8.553875414710088e-06, | |
| "loss": 0.2215, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.0622222222222222, | |
| "grad_norm": 2.3667909348240332, | |
| "learning_rate": 8.169452832572676e-06, | |
| "loss": 0.1973, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.1511111111111112, | |
| "grad_norm": 2.4119505173940334, | |
| "learning_rate": 7.750763061785139e-06, | |
| "loss": 0.1824, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 3.5077284221392526, | |
| "learning_rate": 7.302332852584619e-06, | |
| "loss": 0.1728, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.3288888888888888, | |
| "grad_norm": 2.2547864542466503, | |
| "learning_rate": 6.829010500051319e-06, | |
| "loss": 0.1775, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.4177777777777778, | |
| "grad_norm": 3.0525194314032964, | |
| "learning_rate": 6.335913425772926e-06, | |
| "loss": 0.1841, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.5066666666666668, | |
| "grad_norm": 1.985208876851575, | |
| "learning_rate": 5.8283728497926865e-06, | |
| "loss": 0.1564, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.5955555555555554, | |
| "grad_norm": 1.8499220041240245, | |
| "learning_rate": 5.311876151031642e-06, | |
| "loss": 0.1661, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.6844444444444444, | |
| "grad_norm": 2.068162867729939, | |
| "learning_rate": 4.792007539367198e-06, | |
| "loss": 0.1271, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.7733333333333334, | |
| "grad_norm": 2.8138396138923163, | |
| "learning_rate": 4.274387680803936e-06, | |
| "loss": 0.1558, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.8622222222222222, | |
| "grad_norm": 2.06726257518442, | |
| "learning_rate": 3.7646129284915754e-06, | |
| "loss": 0.1541, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.951111111111111, | |
| "grad_norm": 1.7962393241311936, | |
| "learning_rate": 3.268194816606305e-06, | |
| "loss": 0.1447, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.0355555555555553, | |
| "grad_norm": 1.5863025772045418, | |
| "learning_rate": 2.7905004712698646e-06, | |
| "loss": 0.1151, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.1244444444444444, | |
| "grad_norm": 3.0774709211924045, | |
| "learning_rate": 2.336694582765857e-06, | |
| "loss": 0.0788, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.2133333333333334, | |
| "grad_norm": 6.3588167262879685, | |
| "learning_rate": 1.911683566432633e-06, | |
| "loss": 0.0703, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.3022222222222224, | |
| "grad_norm": 1.7862325542205226, | |
| "learning_rate": 1.5200625159486322e-06, | |
| "loss": 0.0755, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.391111111111111, | |
| "grad_norm": 2.6293630067021145, | |
| "learning_rate": 1.1660655225356533e-06, | |
| "loss": 0.07, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.6062684508069938, | |
| "learning_rate": 8.535198972141295e-07, | |
| "loss": 0.0656, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.568888888888889, | |
| "grad_norm": 2.0533821248411703, | |
| "learning_rate": 5.858047910459109e-07, | |
| "loss": 0.0727, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.6577777777777776, | |
| "grad_norm": 1.2932378848400818, | |
| "learning_rate": 3.6581466075023443e-07, | |
| "loss": 0.0677, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.7466666666666666, | |
| "grad_norm": 1.5090556664534756, | |
| "learning_rate": 1.959279746919057e-07, | |
| "loss": 0.0837, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.8355555555555556, | |
| "grad_norm": 2.273555820523572, | |
| "learning_rate": 7.79814975832649e-08, | |
| "loss": 0.0739, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.924444444444444, | |
| "grad_norm": 2.5157500546795726, | |
| "learning_rate": 1.3250431926197793e-08, | |
| "loss": 0.072, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.977777777777778, | |
| "step": 336, | |
| "total_flos": 6652356526080.0, | |
| "train_loss": 0.20261573587499915, | |
| "train_runtime": 2122.1593, | |
| "train_samples_per_second": 1.272, | |
| "train_steps_per_second": 0.158 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 336, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6652356526080.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |