{ "best_metric": 0.23472924530506134, "best_model_checkpoint": "gbert-base-coherence/checkpoint-1501", "epoch": 10.0, "eval_steps": 500, "global_step": 15010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3331112591605596, "grad_norm": 28.719581604003906, "learning_rate": 1.933377748167888e-05, "loss": 0.3923, "step": 500 }, { "epoch": 0.6662225183211192, "grad_norm": 13.917981147766113, "learning_rate": 1.8667554963357762e-05, "loss": 0.2865, "step": 1000 }, { "epoch": 0.9993337774816788, "grad_norm": 2.144211530685425, "learning_rate": 1.8001332445036644e-05, "loss": 0.2602, "step": 1500 }, { "epoch": 1.0, "eval_accuracy": 0.931022992335888, "eval_loss": 0.23472924530506134, "eval_runtime": 79.2839, "eval_samples_per_second": 37.851, "eval_steps_per_second": 4.742, "step": 1501 }, { "epoch": 1.3324450366422385, "grad_norm": 13.009034156799316, "learning_rate": 1.7335109926715526e-05, "loss": 0.1572, "step": 2000 }, { "epoch": 1.6655562958027983, "grad_norm": 18.67208480834961, "learning_rate": 1.6668887408394405e-05, "loss": 0.1596, "step": 2500 }, { "epoch": 1.9986675549633577, "grad_norm": 0.15106837451457977, "learning_rate": 1.6002664890073287e-05, "loss": 0.1786, "step": 3000 }, { "epoch": 2.0, "eval_accuracy": 0.9326891036321227, "eval_loss": 0.33763834834098816, "eval_runtime": 79.2133, "eval_samples_per_second": 37.885, "eval_steps_per_second": 4.747, "step": 3002 }, { "epoch": 2.3317788141239175, "grad_norm": 0.12679459154605865, "learning_rate": 1.533644237175217e-05, "loss": 0.1087, "step": 3500 }, { "epoch": 2.664890073284477, "grad_norm": 0.04688257351517677, "learning_rate": 1.4670219853431047e-05, "loss": 0.0959, "step": 4000 }, { "epoch": 2.9980013324450367, "grad_norm": 0.05462646484375, "learning_rate": 1.4003997335109927e-05, "loss": 0.1304, "step": 4500 }, { "epoch": 3.0, "eval_accuracy": 0.9416861046317894, "eval_loss": 0.2770719528198242, "eval_runtime": 79.0122, "eval_samples_per_second": 37.981, "eval_steps_per_second": 4.759, "step": 4503 }, { "epoch": 3.331112591605596, "grad_norm": 0.023221973329782486, "learning_rate": 1.333777481678881e-05, "loss": 0.0541, "step": 5000 }, { "epoch": 3.664223850766156, "grad_norm": 0.03963194414973259, "learning_rate": 1.2671552298467688e-05, "loss": 0.094, "step": 5500 }, { "epoch": 3.9973351099267154, "grad_norm": 0.017608487978577614, "learning_rate": 1.2005329780146572e-05, "loss": 0.0714, "step": 6000 }, { "epoch": 4.0, "eval_accuracy": 0.9373542152615795, "eval_loss": 0.3465879261493683, "eval_runtime": 79.2311, "eval_samples_per_second": 37.877, "eval_steps_per_second": 4.746, "step": 6004 }, { "epoch": 4.330446369087275, "grad_norm": 0.09371895343065262, "learning_rate": 1.133910726182545e-05, "loss": 0.0605, "step": 6500 }, { "epoch": 4.663557628247835, "grad_norm": 0.012564590200781822, "learning_rate": 1.067288474350433e-05, "loss": 0.0507, "step": 7000 }, { "epoch": 4.996668887408394, "grad_norm": 0.006542444694787264, "learning_rate": 1.0006662225183212e-05, "loss": 0.0522, "step": 7500 }, { "epoch": 5.0, "eval_accuracy": 0.9346884371876041, "eval_loss": 0.4178318381309509, "eval_runtime": 80.538, "eval_samples_per_second": 37.262, "eval_steps_per_second": 4.669, "step": 7505 }, { "epoch": 5.329780146568954, "grad_norm": 0.0024416144005954266, "learning_rate": 9.340439706862093e-06, "loss": 0.0333, "step": 8000 }, { "epoch": 5.662891405729514, "grad_norm": 0.0012970881070941687, "learning_rate": 8.674217188540973e-06, "loss": 0.0382, "step": 8500 }, { "epoch": 5.9960026648900735, "grad_norm": 35.38675308227539, "learning_rate": 8.007994670219855e-06, "loss": 0.0412, "step": 9000 }, { "epoch": 6.0, "eval_accuracy": 0.9410196601132955, "eval_loss": 0.41738784313201904, "eval_runtime": 81.7142, "eval_samples_per_second": 36.726, "eval_steps_per_second": 4.601, "step": 9006 }, { "epoch": 6.329113924050633, "grad_norm": 0.0018868366023525596, "learning_rate": 7.341772151898735e-06, "loss": 0.0203, "step": 9500 }, { "epoch": 6.662225183211192, "grad_norm": 0.006074848584830761, "learning_rate": 6.675549633577616e-06, "loss": 0.0254, "step": 10000 }, { "epoch": 6.995336442371752, "grad_norm": 0.002733612433075905, "learning_rate": 6.0093271152564956e-06, "loss": 0.0257, "step": 10500 }, { "epoch": 7.0, "eval_accuracy": 0.9386871042985672, "eval_loss": 0.4337184727191925, "eval_runtime": 79.6612, "eval_samples_per_second": 37.672, "eval_steps_per_second": 4.72, "step": 10507 }, { "epoch": 7.328447701532312, "grad_norm": 0.0014540842967107892, "learning_rate": 5.343104596935377e-06, "loss": 0.0133, "step": 11000 }, { "epoch": 7.661558960692871, "grad_norm": 0.0015297214267775416, "learning_rate": 4.676882078614258e-06, "loss": 0.0185, "step": 11500 }, { "epoch": 7.994670219853431, "grad_norm": 0.002298834966495633, "learning_rate": 4.010659560293138e-06, "loss": 0.0242, "step": 12000 }, { "epoch": 8.0, "eval_accuracy": 0.9386871042985672, "eval_loss": 0.4563041627407074, "eval_runtime": 80.0558, "eval_samples_per_second": 37.486, "eval_steps_per_second": 4.697, "step": 12008 }, { "epoch": 8.327781479013991, "grad_norm": 0.005631518550217152, "learning_rate": 3.344437041972019e-06, "loss": 0.0116, "step": 12500 }, { "epoch": 8.66089273817455, "grad_norm": 0.0021366027649492025, "learning_rate": 2.6782145236508994e-06, "loss": 0.0115, "step": 13000 }, { "epoch": 8.99400399733511, "grad_norm": 0.004204094875603914, "learning_rate": 2.0119920053297805e-06, "loss": 0.0123, "step": 13500 }, { "epoch": 9.0, "eval_accuracy": 0.9430189936687771, "eval_loss": 0.45750224590301514, "eval_runtime": 80.6273, "eval_samples_per_second": 37.221, "eval_steps_per_second": 4.663, "step": 13509 }, { "epoch": 9.32711525649567, "grad_norm": 0.0010383282788097858, "learning_rate": 1.345769487008661e-06, "loss": 0.0059, "step": 14000 }, { "epoch": 9.660226515656229, "grad_norm": 0.0029374095611274242, "learning_rate": 6.795469686875417e-07, "loss": 0.004, "step": 14500 }, { "epoch": 9.993337774816789, "grad_norm": 0.0018527992069721222, "learning_rate": 1.3324450366422387e-08, "loss": 0.0059, "step": 15000 }, { "epoch": 10.0, "eval_accuracy": 0.943352215928024, "eval_loss": 0.48842695355415344, "eval_runtime": 81.3925, "eval_samples_per_second": 36.871, "eval_steps_per_second": 4.62, "step": 15010 } ], "logging_steps": 500, "max_steps": 15010, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.15812199748608e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }