punctuation-nilc-bert-large / trainer_state.json
Tiago Barbosa de Lima
End of training
4b586d2
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 5860,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.43,
"learning_rate": 4.573378839590444e-05,
"loss": 0.1077,
"step": 500
},
{
"epoch": 0.85,
"learning_rate": 4.1467576791808876e-05,
"loss": 0.0742,
"step": 1000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9741838326251815,
"eval_f1": 0.8941176470588236,
"eval_loss": 0.06532363593578339,
"eval_precision": 0.9193548387096774,
"eval_recall": 0.8702290076335878,
"eval_runtime": 10.9317,
"eval_samples_per_second": 95.228,
"eval_steps_per_second": 11.983,
"step": 1172
},
{
"epoch": 1.28,
"learning_rate": 3.720136518771331e-05,
"loss": 0.0506,
"step": 1500
},
{
"epoch": 1.71,
"learning_rate": 3.293515358361775e-05,
"loss": 0.0396,
"step": 2000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9747754531275211,
"eval_f1": 0.8959139784946236,
"eval_loss": 0.07732317596673965,
"eval_precision": 0.9088132635253054,
"eval_recall": 0.8833757421543681,
"eval_runtime": 11.1179,
"eval_samples_per_second": 93.632,
"eval_steps_per_second": 11.783,
"step": 2344
},
{
"epoch": 2.13,
"learning_rate": 2.8668941979522183e-05,
"loss": 0.0325,
"step": 2500
},
{
"epoch": 2.56,
"learning_rate": 2.4402730375426623e-05,
"loss": 0.0156,
"step": 3000
},
{
"epoch": 2.99,
"learning_rate": 2.013651877133106e-05,
"loss": 0.0153,
"step": 3500
},
{
"epoch": 3.0,
"eval_accuracy": 0.9738611305329963,
"eval_f1": 0.8905547226386806,
"eval_loss": 0.11707163602113724,
"eval_precision": 0.8996105581999134,
"eval_recall": 0.8816793893129771,
"eval_runtime": 10.7972,
"eval_samples_per_second": 96.414,
"eval_steps_per_second": 12.133,
"step": 3516
},
{
"epoch": 3.41,
"learning_rate": 1.5870307167235497e-05,
"loss": 0.0058,
"step": 4000
},
{
"epoch": 3.84,
"learning_rate": 1.1604095563139932e-05,
"loss": 0.0059,
"step": 4500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9746678857634594,
"eval_f1": 0.8941074146553598,
"eval_loss": 0.13896839320659637,
"eval_precision": 0.9174475680499777,
"eval_recall": 0.8719253604749788,
"eval_runtime": 10.8916,
"eval_samples_per_second": 95.579,
"eval_steps_per_second": 12.028,
"step": 4688
},
{
"epoch": 4.27,
"learning_rate": 7.337883959044369e-06,
"loss": 0.0038,
"step": 5000
},
{
"epoch": 4.69,
"learning_rate": 3.0716723549488057e-06,
"loss": 0.0024,
"step": 5500
},
{
"epoch": 5.0,
"eval_accuracy": 0.9755284246759534,
"eval_f1": 0.8987612131567707,
"eval_loss": 0.15846052765846252,
"eval_precision": 0.9053356282271945,
"eval_recall": 0.8922815945716709,
"eval_runtime": 11.3812,
"eval_samples_per_second": 91.467,
"eval_steps_per_second": 11.51,
"step": 5860
},
{
"epoch": 5.0,
"step": 5860,
"total_flos": 3598808026592814.0,
"train_loss": 0.030279771266536908,
"train_runtime": 2342.7606,
"train_samples_per_second": 20.0,
"train_steps_per_second": 2.501
}
],
"max_steps": 5860,
"num_train_epochs": 5,
"total_flos": 3598808026592814.0,
"trial_name": null,
"trial_params": null
}