| { | |
| "best_metric": 0.23472924530506134, | |
| "best_model_checkpoint": "gbert-base-coherence/checkpoint-1501", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 15010, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3331112591605596, | |
| "grad_norm": 28.719581604003906, | |
| "learning_rate": 1.933377748167888e-05, | |
| "loss": 0.3923, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6662225183211192, | |
| "grad_norm": 13.917981147766113, | |
| "learning_rate": 1.8667554963357762e-05, | |
| "loss": 0.2865, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9993337774816788, | |
| "grad_norm": 2.144211530685425, | |
| "learning_rate": 1.8001332445036644e-05, | |
| "loss": 0.2602, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.931022992335888, | |
| "eval_loss": 0.23472924530506134, | |
| "eval_runtime": 79.2839, | |
| "eval_samples_per_second": 37.851, | |
| "eval_steps_per_second": 4.742, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 1.3324450366422385, | |
| "grad_norm": 13.009034156799316, | |
| "learning_rate": 1.7335109926715526e-05, | |
| "loss": 0.1572, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6655562958027983, | |
| "grad_norm": 18.67208480834961, | |
| "learning_rate": 1.6668887408394405e-05, | |
| "loss": 0.1596, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.9986675549633577, | |
| "grad_norm": 0.15106837451457977, | |
| "learning_rate": 1.6002664890073287e-05, | |
| "loss": 0.1786, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9326891036321227, | |
| "eval_loss": 0.33763834834098816, | |
| "eval_runtime": 79.2133, | |
| "eval_samples_per_second": 37.885, | |
| "eval_steps_per_second": 4.747, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 2.3317788141239175, | |
| "grad_norm": 0.12679459154605865, | |
| "learning_rate": 1.533644237175217e-05, | |
| "loss": 0.1087, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.664890073284477, | |
| "grad_norm": 0.04688257351517677, | |
| "learning_rate": 1.4670219853431047e-05, | |
| "loss": 0.0959, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.9980013324450367, | |
| "grad_norm": 0.05462646484375, | |
| "learning_rate": 1.4003997335109927e-05, | |
| "loss": 0.1304, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9416861046317894, | |
| "eval_loss": 0.2770719528198242, | |
| "eval_runtime": 79.0122, | |
| "eval_samples_per_second": 37.981, | |
| "eval_steps_per_second": 4.759, | |
| "step": 4503 | |
| }, | |
| { | |
| "epoch": 3.331112591605596, | |
| "grad_norm": 0.023221973329782486, | |
| "learning_rate": 1.333777481678881e-05, | |
| "loss": 0.0541, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.664223850766156, | |
| "grad_norm": 0.03963194414973259, | |
| "learning_rate": 1.2671552298467688e-05, | |
| "loss": 0.094, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.9973351099267154, | |
| "grad_norm": 0.017608487978577614, | |
| "learning_rate": 1.2005329780146572e-05, | |
| "loss": 0.0714, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9373542152615795, | |
| "eval_loss": 0.3465879261493683, | |
| "eval_runtime": 79.2311, | |
| "eval_samples_per_second": 37.877, | |
| "eval_steps_per_second": 4.746, | |
| "step": 6004 | |
| }, | |
| { | |
| "epoch": 4.330446369087275, | |
| "grad_norm": 0.09371895343065262, | |
| "learning_rate": 1.133910726182545e-05, | |
| "loss": 0.0605, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.663557628247835, | |
| "grad_norm": 0.012564590200781822, | |
| "learning_rate": 1.067288474350433e-05, | |
| "loss": 0.0507, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.996668887408394, | |
| "grad_norm": 0.006542444694787264, | |
| "learning_rate": 1.0006662225183212e-05, | |
| "loss": 0.0522, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9346884371876041, | |
| "eval_loss": 0.4178318381309509, | |
| "eval_runtime": 80.538, | |
| "eval_samples_per_second": 37.262, | |
| "eval_steps_per_second": 4.669, | |
| "step": 7505 | |
| }, | |
| { | |
| "epoch": 5.329780146568954, | |
| "grad_norm": 0.0024416144005954266, | |
| "learning_rate": 9.340439706862093e-06, | |
| "loss": 0.0333, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.662891405729514, | |
| "grad_norm": 0.0012970881070941687, | |
| "learning_rate": 8.674217188540973e-06, | |
| "loss": 0.0382, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.9960026648900735, | |
| "grad_norm": 35.38675308227539, | |
| "learning_rate": 8.007994670219855e-06, | |
| "loss": 0.0412, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9410196601132955, | |
| "eval_loss": 0.41738784313201904, | |
| "eval_runtime": 81.7142, | |
| "eval_samples_per_second": 36.726, | |
| "eval_steps_per_second": 4.601, | |
| "step": 9006 | |
| }, | |
| { | |
| "epoch": 6.329113924050633, | |
| "grad_norm": 0.0018868366023525596, | |
| "learning_rate": 7.341772151898735e-06, | |
| "loss": 0.0203, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 6.662225183211192, | |
| "grad_norm": 0.006074848584830761, | |
| "learning_rate": 6.675549633577616e-06, | |
| "loss": 0.0254, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.995336442371752, | |
| "grad_norm": 0.002733612433075905, | |
| "learning_rate": 6.0093271152564956e-06, | |
| "loss": 0.0257, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9386871042985672, | |
| "eval_loss": 0.4337184727191925, | |
| "eval_runtime": 79.6612, | |
| "eval_samples_per_second": 37.672, | |
| "eval_steps_per_second": 4.72, | |
| "step": 10507 | |
| }, | |
| { | |
| "epoch": 7.328447701532312, | |
| "grad_norm": 0.0014540842967107892, | |
| "learning_rate": 5.343104596935377e-06, | |
| "loss": 0.0133, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 7.661558960692871, | |
| "grad_norm": 0.0015297214267775416, | |
| "learning_rate": 4.676882078614258e-06, | |
| "loss": 0.0185, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 7.994670219853431, | |
| "grad_norm": 0.002298834966495633, | |
| "learning_rate": 4.010659560293138e-06, | |
| "loss": 0.0242, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9386871042985672, | |
| "eval_loss": 0.4563041627407074, | |
| "eval_runtime": 80.0558, | |
| "eval_samples_per_second": 37.486, | |
| "eval_steps_per_second": 4.697, | |
| "step": 12008 | |
| }, | |
| { | |
| "epoch": 8.327781479013991, | |
| "grad_norm": 0.005631518550217152, | |
| "learning_rate": 3.344437041972019e-06, | |
| "loss": 0.0116, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 8.66089273817455, | |
| "grad_norm": 0.0021366027649492025, | |
| "learning_rate": 2.6782145236508994e-06, | |
| "loss": 0.0115, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 8.99400399733511, | |
| "grad_norm": 0.004204094875603914, | |
| "learning_rate": 2.0119920053297805e-06, | |
| "loss": 0.0123, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9430189936687771, | |
| "eval_loss": 0.45750224590301514, | |
| "eval_runtime": 80.6273, | |
| "eval_samples_per_second": 37.221, | |
| "eval_steps_per_second": 4.663, | |
| "step": 13509 | |
| }, | |
| { | |
| "epoch": 9.32711525649567, | |
| "grad_norm": 0.0010383282788097858, | |
| "learning_rate": 1.345769487008661e-06, | |
| "loss": 0.0059, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 9.660226515656229, | |
| "grad_norm": 0.0029374095611274242, | |
| "learning_rate": 6.795469686875417e-07, | |
| "loss": 0.004, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 9.993337774816789, | |
| "grad_norm": 0.0018527992069721222, | |
| "learning_rate": 1.3324450366422387e-08, | |
| "loss": 0.0059, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.943352215928024, | |
| "eval_loss": 0.48842695355415344, | |
| "eval_runtime": 81.3925, | |
| "eval_samples_per_second": 36.871, | |
| "eval_steps_per_second": 4.62, | |
| "step": 15010 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 15010, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.15812199748608e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |