{ "best_metric": 0.3089325428009033, "best_model_checkpoint": "./xlam_lora_new_ete_over_size_3epoch_multi_mix/checkpoint-205", "epoch": 0.33372335554764204, "eval_steps": 205, "global_step": 205, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016279188075494735, "grad_norm": 0.7572630643844604, "learning_rate": 2.688172043010753e-06, "loss": 0.5223, "step": 10 }, { "epoch": 0.03255837615098947, "grad_norm": 0.417061984539032, "learning_rate": 5.376344086021506e-06, "loss": 0.4858, "step": 20 }, { "epoch": 0.048837564226484206, "grad_norm": 0.3718095123767853, "learning_rate": 8.064516129032258e-06, "loss": 0.4246, "step": 30 }, { "epoch": 0.06511675230197894, "grad_norm": 0.2949349582195282, "learning_rate": 1.0752688172043012e-05, "loss": 0.4405, "step": 40 }, { "epoch": 0.08139594037747368, "grad_norm": 0.3159159719944, "learning_rate": 1.3440860215053763e-05, "loss": 0.4148, "step": 50 }, { "epoch": 0.09767512845296841, "grad_norm": 0.4167034327983856, "learning_rate": 1.6129032258064517e-05, "loss": 0.3393, "step": 60 }, { "epoch": 0.11395431652846315, "grad_norm": 0.39410400390625, "learning_rate": 1.881720430107527e-05, "loss": 0.2464, "step": 70 }, { "epoch": 0.13023350460395788, "grad_norm": 0.3644021153450012, "learning_rate": 2.1505376344086024e-05, "loss": 0.2294, "step": 80 }, { "epoch": 0.1465126926794526, "grad_norm": 0.30372634530067444, "learning_rate": 2.4193548387096777e-05, "loss": 0.2315, "step": 90 }, { "epoch": 0.16279188075494735, "grad_norm": 0.2586315870285034, "learning_rate": 2.4999011923655086e-05, "loss": 0.1932, "step": 100 }, { "epoch": 0.17907106883044208, "grad_norm": 0.37825971841812134, "learning_rate": 2.4994172742085852e-05, "loss": 0.2204, "step": 110 }, { "epoch": 0.19535025690593683, "grad_norm": 0.21422357857227325, "learning_rate": 2.4985302531208654e-05, "loss": 0.1795, "step": 120 }, { "epoch": 0.21162944498143155, "grad_norm": 0.2566869854927063, "learning_rate": 2.4972404152844008e-05, "loss": 0.1668, "step": 130 }, { "epoch": 0.2279086330569263, "grad_norm": 0.28194501996040344, "learning_rate": 2.49554817684312e-05, "loss": 0.1476, "step": 140 }, { "epoch": 0.24418782113242102, "grad_norm": 0.24139340221881866, "learning_rate": 2.4934540837685647e-05, "loss": 0.1609, "step": 150 }, { "epoch": 0.26046700920791577, "grad_norm": 0.3306334614753723, "learning_rate": 2.490958811683741e-05, "loss": 0.1638, "step": 160 }, { "epoch": 0.2767461972834105, "grad_norm": 0.27301114797592163, "learning_rate": 2.4880631656451447e-05, "loss": 0.1494, "step": 170 }, { "epoch": 0.2930253853589052, "grad_norm": 0.34037259221076965, "learning_rate": 2.484768079883018e-05, "loss": 0.1534, "step": 180 }, { "epoch": 0.30930457343439993, "grad_norm": 0.2306762933731079, "learning_rate": 2.4810746174999418e-05, "loss": 0.1749, "step": 190 }, { "epoch": 0.3255837615098947, "grad_norm": 0.3183388113975525, "learning_rate": 2.476983970127841e-05, "loss": 0.1482, "step": 200 }, { "epoch": 0.33372335554764204, "eval_loss": 0.3089325428009033, "eval_runtime": 34.8769, "eval_samples_per_second": 5.419, "eval_steps_per_second": 5.419, "step": 205 } ], "logging_steps": 10, "max_steps": 1842, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 205, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.7404062066376704e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }