| { | |
| "best_global_step": 160, | |
| "best_metric": 0.7282191492717808, | |
| "best_model_checkpoint": "runs/router-mmBERT-small-text-only-v3/checkpoint-160", | |
| "epoch": 2.0, | |
| "eval_steps": 20, | |
| "global_step": 176, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_accuracy": 0.4431818181818182, | |
| "eval_f1": 0.45373205741626793, | |
| "eval_loss": 1.5113513469696045, | |
| "eval_precision": 0.48037190082644626, | |
| "eval_recall": 0.4431818181818182, | |
| "eval_runtime": 4.7496, | |
| "eval_samples_per_second": 37.056, | |
| "eval_steps_per_second": 1.263, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.056818181818181816, | |
| "grad_norm": 12.686948776245117, | |
| "learning_rate": 9.987260573051269e-05, | |
| "loss": 3.2334, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.11363636363636363, | |
| "grad_norm": 104.65008544921875, | |
| "learning_rate": 9.935617890443557e-05, | |
| "loss": 2.5264, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17045454545454544, | |
| "grad_norm": 77.57421112060547, | |
| "learning_rate": 9.844686508907537e-05, | |
| "loss": 0.9934, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 25.431455612182617, | |
| "learning_rate": 9.715190263989561e-05, | |
| "loss": 0.7931, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "eval_accuracy": 0.6647727272727273, | |
| "eval_f1": 0.5783962367355869, | |
| "eval_loss": 0.9312057495117188, | |
| "eval_precision": 0.6848484848484849, | |
| "eval_recall": 0.6647727272727273, | |
| "eval_runtime": 0.302, | |
| "eval_samples_per_second": 582.771, | |
| "eval_steps_per_second": 19.867, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2840909090909091, | |
| "grad_norm": 59.058265686035156, | |
| "learning_rate": 9.548159976772592e-05, | |
| "loss": 0.9107, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.3409090909090909, | |
| "grad_norm": 47.96800231933594, | |
| "learning_rate": 9.344925248293837e-05, | |
| "loss": 0.708, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.3977272727272727, | |
| "grad_norm": 23.16752815246582, | |
| "learning_rate": 9.107103875602459e-05, | |
| "loss": 0.5048, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 66.16763305664062, | |
| "learning_rate": 8.836588973708129e-05, | |
| "loss": 0.7752, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "eval_accuracy": 0.48863636363636365, | |
| "eval_f1": 0.43204905618558603, | |
| "eval_loss": 0.9223223328590393, | |
| "eval_precision": 0.7377156177156178, | |
| "eval_recall": 0.48863636363636365, | |
| "eval_runtime": 0.321, | |
| "eval_samples_per_second": 548.336, | |
| "eval_steps_per_second": 18.693, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5113636363636364, | |
| "grad_norm": 9.133078575134277, | |
| "learning_rate": 8.535533905932738e-05, | |
| "loss": 0.6457, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.5681818181818182, | |
| "grad_norm": 6.507447719573975, | |
| "learning_rate": 8.206335142623305e-05, | |
| "loss": 0.8555, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 7.759437561035156, | |
| "learning_rate": 7.85161318467482e-05, | |
| "loss": 0.5602, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 3.4017348289489746, | |
| "learning_rate": 7.474191703716339e-05, | |
| "loss": 0.5251, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "eval_accuracy": 0.6988636363636364, | |
| "eval_f1": 0.6658946623722376, | |
| "eval_loss": 0.5345972180366516, | |
| "eval_precision": 0.6939832136717565, | |
| "eval_recall": 0.6988636363636364, | |
| "eval_runtime": 0.4317, | |
| "eval_samples_per_second": 407.665, | |
| "eval_steps_per_second": 13.898, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7386363636363636, | |
| "grad_norm": 38.210453033447266, | |
| "learning_rate": 7.077075065009433e-05, | |
| "loss": 0.7065, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.7954545454545454, | |
| "grad_norm": 10.191884994506836, | |
| "learning_rate": 6.663424411982121e-05, | |
| "loss": 0.6372, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8522727272727273, | |
| "grad_norm": 20.093486785888672, | |
| "learning_rate": 6.236532502771078e-05, | |
| "loss": 0.5907, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 8.867379188537598, | |
| "learning_rate": 5.799797499079301e-05, | |
| "loss": 0.5975, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "eval_accuracy": 0.6534090909090909, | |
| "eval_f1": 0.6573467315982285, | |
| "eval_loss": 0.5975217223167419, | |
| "eval_precision": 0.7275870824215018, | |
| "eval_recall": 0.6534090909090909, | |
| "eval_runtime": 0.3987, | |
| "eval_samples_per_second": 441.426, | |
| "eval_steps_per_second": 15.049, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9659090909090909, | |
| "grad_norm": 10.983565330505371, | |
| "learning_rate": 5.3566959159961615e-05, | |
| "loss": 0.6915, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.0227272727272727, | |
| "grad_norm": 5.20986795425415, | |
| "learning_rate": 4.9107549481057696e-05, | |
| "loss": 0.5815, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0795454545454546, | |
| "grad_norm": 4.2207183837890625, | |
| "learning_rate": 4.4655243921744374e-05, | |
| "loss": 0.4689, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 17.133651733398438, | |
| "learning_rate": 4.0245483899193595e-05, | |
| "loss": 0.551, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "eval_accuracy": 0.7272727272727273, | |
| "eval_f1": 0.7090006767426121, | |
| "eval_loss": 0.5679962635040283, | |
| "eval_precision": 0.7224598930481284, | |
| "eval_recall": 0.7272727272727273, | |
| "eval_runtime": 0.3507, | |
| "eval_samples_per_second": 501.913, | |
| "eval_steps_per_second": 17.111, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1931818181818181, | |
| "grad_norm": 6.083935737609863, | |
| "learning_rate": 3.591337215792852e-05, | |
| "loss": 0.5319, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 37.91518020629883, | |
| "learning_rate": 3.1693393343581044e-05, | |
| "loss": 0.571, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3068181818181819, | |
| "grad_norm": 10.03105640411377, | |
| "learning_rate": 2.7619139496864378e-05, | |
| "loss": 0.5283, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 3.3100969791412354, | |
| "learning_rate": 2.3723042652894362e-05, | |
| "loss": 0.5093, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "eval_accuracy": 0.7045454545454546, | |
| "eval_f1": 0.6950933483441223, | |
| "eval_loss": 0.5871580243110657, | |
| "eval_precision": 0.6951515151515152, | |
| "eval_recall": 0.7045454545454546, | |
| "eval_runtime": 0.3979, | |
| "eval_samples_per_second": 442.333, | |
| "eval_steps_per_second": 15.08, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4204545454545454, | |
| "grad_norm": 16.1977596282959, | |
| "learning_rate": 2.0036116674432654e-05, | |
| "loss": 0.5697, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.4772727272727273, | |
| "grad_norm": 4.914312362670898, | |
| "learning_rate": 1.6587710374121203e-05, | |
| "loss": 0.4587, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5340909090909092, | |
| "grad_norm": 6.9654645919799805, | |
| "learning_rate": 1.340527389091374e-05, | |
| "loss": 0.7103, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 8.979105949401855, | |
| "learning_rate": 1.0514140180404204e-05, | |
| "loss": 0.5315, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "eval_accuracy": 0.75, | |
| "eval_f1": 0.72649847340511, | |
| "eval_loss": 0.5397942066192627, | |
| "eval_precision": 0.7593206296603149, | |
| "eval_recall": 0.75, | |
| "eval_runtime": 0.3718, | |
| "eval_samples_per_second": 473.373, | |
| "eval_steps_per_second": 16.138, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6477272727272727, | |
| "grad_norm": 31.318988800048828, | |
| "learning_rate": 7.937323358440935e-06, | |
| "loss": 0.5135, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.7045454545454546, | |
| "grad_norm": 2.2285165786743164, | |
| "learning_rate": 5.69533550325988e-06, | |
| "loss": 0.481, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7613636363636362, | |
| "grad_norm": 16.231529235839844, | |
| "learning_rate": 3.8060233744356633e-06, | |
| "loss": 0.5126, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 31.868896484375, | |
| "learning_rate": 2.2844263484068096e-06, | |
| "loss": 0.5231, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "eval_accuracy": 0.7443181818181818, | |
| "eval_f1": 0.7282191492717808, | |
| "eval_loss": 0.5264365673065186, | |
| "eval_precision": 0.7421696641208836, | |
| "eval_recall": 0.7443181818181818, | |
| "eval_runtime": 0.3604, | |
| "eval_samples_per_second": 488.355, | |
| "eval_steps_per_second": 16.648, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 26.505346298217773, | |
| "learning_rate": 1.1426567014420297e-06, | |
| "loss": 0.5943, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.9318181818181817, | |
| "grad_norm": 23.811979293823242, | |
| "learning_rate": 3.8980319302407977e-07, | |
| "loss": 0.5122, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9886363636363638, | |
| "grad_norm": 7.987723350524902, | |
| "learning_rate": 3.185871715041255e-08, | |
| "loss": 0.5237, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 176, | |
| "total_flos": 315928015988940.0, | |
| "train_loss": 0.7388186820528724, | |
| "train_runtime": 87.4213, | |
| "train_samples_per_second": 32.189, | |
| "train_steps_per_second": 2.013 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 176, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 315928015988940.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |
