| { | |
| "best_metric": 0.04320823773741722, | |
| "best_model_checkpoint": "./xlam_lora_new_1024_2_delete_over_size_3epoch/checkpoint-789", | |
| "epoch": 1.000316906987799, | |
| "eval_steps": 263, | |
| "global_step": 789, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02535255902392648, | |
| "grad_norm": 0.515038788318634, | |
| "learning_rate": 5.633802816901409e-06, | |
| "loss": 0.4826, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05070511804785296, | |
| "grad_norm": 1.2124691009521484, | |
| "learning_rate": 1.1267605633802819e-05, | |
| "loss": 0.337, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07605767707177943, | |
| "grad_norm": 0.562520444393158, | |
| "learning_rate": 1.6901408450704228e-05, | |
| "loss": 0.1167, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10141023609570592, | |
| "grad_norm": 0.1530018150806427, | |
| "learning_rate": 1.9999239776734535e-05, | |
| "loss": 0.0444, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1267627951196324, | |
| "grad_norm": 0.43701356649398804, | |
| "learning_rate": 1.9992107756032603e-05, | |
| "loss": 0.0941, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15211535414355887, | |
| "grad_norm": 0.26956745982170105, | |
| "learning_rate": 1.997747365756147e-05, | |
| "loss": 0.0722, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17746791316748534, | |
| "grad_norm": 0.38304299116134644, | |
| "learning_rate": 1.995534846860705e-05, | |
| "loss": 0.0664, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.20282047219141183, | |
| "grad_norm": 0.24561983346939087, | |
| "learning_rate": 1.9925748800768444e-05, | |
| "loss": 0.0474, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2281730312153383, | |
| "grad_norm": 0.26263824105262756, | |
| "learning_rate": 1.9888696877485937e-05, | |
| "loss": 0.0364, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2535255902392648, | |
| "grad_norm": 0.2551981210708618, | |
| "learning_rate": 1.984422051735564e-05, | |
| "loss": 0.0276, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.27887814926319127, | |
| "grad_norm": 0.16999328136444092, | |
| "learning_rate": 1.979235311324328e-05, | |
| "loss": 0.039, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.30423070828711773, | |
| "grad_norm": 0.2080262005329132, | |
| "learning_rate": 1.973313360721281e-05, | |
| "loss": 0.0353, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3295832673110442, | |
| "grad_norm": 0.19375990331172943, | |
| "learning_rate": 1.9666606461288728e-05, | |
| "loss": 0.0509, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.33338615116463316, | |
| "eval_loss": 0.04866104573011398, | |
| "eval_runtime": 144.4235, | |
| "eval_samples_per_second": 4.618, | |
| "eval_steps_per_second": 2.313, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.35493582633497067, | |
| "grad_norm": 0.3100409209728241, | |
| "learning_rate": 1.959282162407392e-05, | |
| "loss": 0.0317, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.38028838535889714, | |
| "grad_norm": 0.49380865693092346, | |
| "learning_rate": 1.9511834493248303e-05, | |
| "loss": 0.0513, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.40564094438282366, | |
| "grad_norm": 0.18333816528320312, | |
| "learning_rate": 1.9423705873976156e-05, | |
| "loss": 0.0449, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.43099350340675013, | |
| "grad_norm": 0.3562432825565338, | |
| "learning_rate": 1.9328501933253643e-05, | |
| "loss": 0.028, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4563460624306766, | |
| "grad_norm": 0.341875821352005, | |
| "learning_rate": 1.9226294150230573e-05, | |
| "loss": 0.0361, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.48169862145460307, | |
| "grad_norm": 0.259799987077713, | |
| "learning_rate": 1.9117159262543844e-05, | |
| "loss": 0.0454, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5070511804785296, | |
| "grad_norm": 0.4206746518611908, | |
| "learning_rate": 1.9001179208702812e-05, | |
| "loss": 0.0333, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.532403739502456, | |
| "grad_norm": 0.3466087877750397, | |
| "learning_rate": 1.88784410665698e-05, | |
| "loss": 0.0292, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5577562985263825, | |
| "grad_norm": 0.5302783846855164, | |
| "learning_rate": 1.8749036987982047e-05, | |
| "loss": 0.0319, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.583108857550309, | |
| "grad_norm": 0.3487597107887268, | |
| "learning_rate": 1.861306412956404e-05, | |
| "loss": 0.037, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6084614165742355, | |
| "grad_norm": 0.4753687381744385, | |
| "learning_rate": 1.8470624579782338e-05, | |
| "loss": 0.029, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.633813975598162, | |
| "grad_norm": 0.20097890496253967, | |
| "learning_rate": 1.8321825282297467e-05, | |
| "loss": 0.0432, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6591665346220884, | |
| "grad_norm": 0.33282265067100525, | |
| "learning_rate": 1.8166777955670616e-05, | |
| "loss": 0.0414, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6667723023292663, | |
| "eval_loss": 0.045147210359573364, | |
| "eval_runtime": 144.4317, | |
| "eval_samples_per_second": 4.618, | |
| "eval_steps_per_second": 2.313, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6845190936460149, | |
| "grad_norm": 0.17276500165462494, | |
| "learning_rate": 1.800559900948529e-05, | |
| "loss": 0.046, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7098716526699413, | |
| "grad_norm": 0.17020255327224731, | |
| "learning_rate": 1.7838409456946973e-05, | |
| "loss": 0.0273, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7352242116938679, | |
| "grad_norm": 0.09968056529760361, | |
| "learning_rate": 1.7665334824026358e-05, | |
| "loss": 0.0265, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7605767707177943, | |
| "grad_norm": 0.3242018520832062, | |
| "learning_rate": 1.7486505055214405e-05, | |
| "loss": 0.0269, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7859293297417208, | |
| "grad_norm": 0.2690919041633606, | |
| "learning_rate": 1.7302054415959994e-05, | |
| "loss": 0.0247, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8112818887656473, | |
| "grad_norm": 0.5220848321914673, | |
| "learning_rate": 1.7112121391863367e-05, | |
| "loss": 0.0389, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8366344477895737, | |
| "grad_norm": 0.26270976662635803, | |
| "learning_rate": 1.6916848584701092e-05, | |
| "loss": 0.0281, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8619870068135003, | |
| "grad_norm": 0.32137855887413025, | |
| "learning_rate": 1.671638260536062e-05, | |
| "loss": 0.0365, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8873395658374267, | |
| "grad_norm": 0.5726165175437927, | |
| "learning_rate": 1.6510873963764754e-05, | |
| "loss": 0.0468, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9126921248613532, | |
| "grad_norm": 0.7382823824882507, | |
| "learning_rate": 1.6300476955868785e-05, | |
| "loss": 0.0365, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9380446838852796, | |
| "grad_norm": 0.3500744104385376, | |
| "learning_rate": 1.6085349547815002e-05, | |
| "loss": 0.027, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9633972429092061, | |
| "grad_norm": 0.2678811252117157, | |
| "learning_rate": 1.5865653257331697e-05, | |
| "loss": 0.0483, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9887498019331327, | |
| "grad_norm": 0.22900326550006866, | |
| "learning_rate": 1.564155303246556e-05, | |
| "loss": 0.0508, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.000316906987799, | |
| "eval_loss": 0.04320823773741722, | |
| "eval_runtime": 144.4268, | |
| "eval_samples_per_second": 4.618, | |
| "eval_steps_per_second": 2.313, | |
| "step": 789 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 2364, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 263, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.876643611330478e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |