{ "best_global_step": 1143, "best_metric": 1.135260820388794, "best_model_checkpoint": "./mt5_base/indic_loss/mlm/checkpoint-1143", "epoch": 5.0, "eval_steps": 500, "global_step": 1905, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 8.4002, "eval_gen_len": 19.9018, "eval_loss": 1.3717433214187622, "eval_runtime": 43.2354, "eval_samples_per_second": 22.135, "eval_steps_per_second": 2.776, "eval_wer": 0.6943, "step": 381 }, { "epoch": 1.3123359580052494, "grad_norm": 2.9348294734954834, "learning_rate": 4.67257217847769e-05, "loss": 2.0859, "step": 500 }, { "epoch": 2.0, "eval_bleu": 9.9408, "eval_gen_len": 19.9739, "eval_loss": 1.248307704925537, "eval_runtime": 43.1078, "eval_samples_per_second": 22.2, "eval_steps_per_second": 2.784, "eval_wer": 0.6486, "step": 762 }, { "epoch": 2.6246719160104988, "grad_norm": 2.1083076000213623, "learning_rate": 4.3444881889763784e-05, "loss": 0.6856, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 10.3503, "eval_gen_len": 19.9739, "eval_loss": 1.135260820388794, "eval_runtime": 43.0868, "eval_samples_per_second": 22.211, "eval_steps_per_second": 2.785, "eval_wer": 0.6484, "step": 1143 }, { "epoch": 3.937007874015748, "grad_norm": 2.1635806560516357, "learning_rate": 4.016404199475066e-05, "loss": 0.5485, "step": 1500 }, { "epoch": 4.0, "eval_bleu": 10.3966, "eval_gen_len": 19.9739, "eval_loss": 1.1630462408065796, "eval_runtime": 43.0721, "eval_samples_per_second": 22.219, "eval_steps_per_second": 2.786, "eval_wer": 0.6476, "step": 1524 }, { "epoch": 5.0, "eval_bleu": 10.585, "eval_gen_len": 19.9739, "eval_loss": 1.1526703834533691, "eval_runtime": 42.8826, "eval_samples_per_second": 22.317, "eval_steps_per_second": 2.798, "eval_wer": 0.6481, "step": 1905 }, { "epoch": 5.0, "step": 1905, "total_flos": 2480404906856448.0, "train_loss": 0.9748498230781455, "train_runtime": 626.9519, "train_samples_per_second": 97.073, "train_steps_per_second": 12.154 } ], "logging_steps": 500, "max_steps": 7620, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 50000.0, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2480404906856448.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }