{ "best_metric": 0.01622922532260418, "best_model_checkpoint": "saves/Custom/lora/train_2024-03-13-19-46-19/checkpoint-200", "epoch": 5.555555555555555, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 7.693007469177246, "learning_rate": 4.997620553954645e-05, "loss": 1.7543, "step": 5 }, { "epoch": 0.28, "grad_norm": 5.394199371337891, "learning_rate": 4.990486745229364e-05, "loss": 1.0206, "step": 10 }, { "epoch": 0.42, "grad_norm": 2.683964729309082, "learning_rate": 4.9786121534345265e-05, "loss": 0.5826, "step": 15 }, { "epoch": 0.56, "grad_norm": 1.8806061744689941, "learning_rate": 4.962019382530521e-05, "loss": 0.4099, "step": 20 }, { "epoch": 0.69, "grad_norm": 1.9219516515731812, "learning_rate": 4.940740017799833e-05, "loss": 0.3052, "step": 25 }, { "epoch": 0.83, "grad_norm": 1.6164277791976929, "learning_rate": 4.914814565722671e-05, "loss": 0.244, "step": 30 }, { "epoch": 0.97, "grad_norm": 1.8378784656524658, "learning_rate": 4.884292376870567e-05, "loss": 0.1924, "step": 35 }, { "epoch": 1.11, "grad_norm": 2.151557445526123, "learning_rate": 4.849231551964771e-05, "loss": 0.1655, "step": 40 }, { "epoch": 1.25, "grad_norm": 1.2347593307495117, "learning_rate": 4.8096988312782174e-05, "loss": 0.1203, "step": 45 }, { "epoch": 1.39, "grad_norm": 2.1008129119873047, "learning_rate": 4.765769467591625e-05, "loss": 0.1188, "step": 50 }, { "epoch": 1.53, "grad_norm": 1.351984977722168, "learning_rate": 4.717527082945554e-05, "loss": 0.0852, "step": 55 }, { "epoch": 1.67, "grad_norm": 1.8272274732589722, "learning_rate": 4.665063509461097e-05, "loss": 0.0854, "step": 60 }, { "epoch": 1.81, "grad_norm": 1.3354312181472778, "learning_rate": 4.608478614532215e-05, "loss": 0.0672, "step": 65 }, { "epoch": 1.94, "grad_norm": 1.7812442779541016, "learning_rate": 4.54788011072248e-05, "loss": 0.0455, "step": 70 }, { "epoch": 2.08, "grad_norm": 1.6480517387390137, "learning_rate": 4.4833833507280884e-05, "loss": 0.0366, "step": 75 }, { "epoch": 2.22, "grad_norm": 1.5116844177246094, "learning_rate": 4.415111107797445e-05, "loss": 0.0503, "step": 80 }, { "epoch": 2.36, "grad_norm": 1.792582631111145, "learning_rate": 4.34319334202531e-05, "loss": 0.0429, "step": 85 }, { "epoch": 2.5, "grad_norm": 2.4267895221710205, "learning_rate": 4.267766952966369e-05, "loss": 0.0482, "step": 90 }, { "epoch": 2.64, "grad_norm": 0.7215550541877747, "learning_rate": 4.188975519039151e-05, "loss": 0.0317, "step": 95 }, { "epoch": 2.78, "grad_norm": 1.289726972579956, "learning_rate": 4.1069690242163484e-05, "loss": 0.0372, "step": 100 }, { "epoch": 2.78, "eval_loss": 0.03319624438881874, "eval_runtime": 2.4078, "eval_samples_per_second": 50.252, "eval_steps_per_second": 3.322, "step": 100 }, { "epoch": 2.92, "grad_norm": 1.3112704753875732, "learning_rate": 4.021903572521802e-05, "loss": 0.0245, "step": 105 }, { "epoch": 3.06, "grad_norm": 1.9846090078353882, "learning_rate": 3.933941090877615e-05, "loss": 0.0333, "step": 110 }, { "epoch": 3.19, "grad_norm": 2.320976734161377, "learning_rate": 3.84324902086706e-05, "loss": 0.0351, "step": 115 }, { "epoch": 3.33, "grad_norm": 2.042772054672241, "learning_rate": 3.7500000000000003e-05, "loss": 0.0261, "step": 120 }, { "epoch": 3.47, "grad_norm": 2.495635747909546, "learning_rate": 3.654371533087586e-05, "loss": 0.0204, "step": 125 }, { "epoch": 3.61, "grad_norm": 0.8619056344032288, "learning_rate": 3.556545654351749e-05, "loss": 0.0122, "step": 130 }, { "epoch": 3.75, "grad_norm": 1.8066715002059937, "learning_rate": 3.456708580912725e-05, "loss": 0.0288, "step": 135 }, { "epoch": 3.89, "grad_norm": 3.5700998306274414, "learning_rate": 3.355050358314172e-05, "loss": 0.0222, "step": 140 }, { "epoch": 4.03, "grad_norm": 1.5312789678573608, "learning_rate": 3.251764498760683e-05, "loss": 0.0249, "step": 145 }, { "epoch": 4.17, "grad_norm": 0.45907583832740784, "learning_rate": 3.147047612756302e-05, "loss": 0.0227, "step": 150 }, { "epoch": 4.31, "grad_norm": 1.6312272548675537, "learning_rate": 3.0410990348452573e-05, "loss": 0.0171, "step": 155 }, { "epoch": 4.44, "grad_norm": 2.005723476409912, "learning_rate": 2.9341204441673266e-05, "loss": 0.0169, "step": 160 }, { "epoch": 4.58, "grad_norm": 1.4385231733322144, "learning_rate": 2.8263154805501297e-05, "loss": 0.0192, "step": 165 }, { "epoch": 4.72, "grad_norm": 1.0173711776733398, "learning_rate": 2.717889356869146e-05, "loss": 0.0136, "step": 170 }, { "epoch": 4.86, "grad_norm": 1.2668229341506958, "learning_rate": 2.6090484684133404e-05, "loss": 0.0122, "step": 175 }, { "epoch": 5.0, "grad_norm": 1.913548469543457, "learning_rate": 2.5e-05, "loss": 0.012, "step": 180 }, { "epoch": 5.14, "grad_norm": 2.155971050262451, "learning_rate": 2.3909515315866605e-05, "loss": 0.0128, "step": 185 }, { "epoch": 5.28, "grad_norm": 1.0692299604415894, "learning_rate": 2.2821106431308544e-05, "loss": 0.0127, "step": 190 }, { "epoch": 5.42, "grad_norm": 0.9680432081222534, "learning_rate": 2.173684519449872e-05, "loss": 0.0158, "step": 195 }, { "epoch": 5.56, "grad_norm": 1.0661903619766235, "learning_rate": 2.0658795558326743e-05, "loss": 0.0125, "step": 200 }, { "epoch": 5.56, "eval_loss": 0.01622922532260418, "eval_runtime": 2.4123, "eval_samples_per_second": 50.161, "eval_steps_per_second": 3.316, "step": 200 } ], "logging_steps": 5, "max_steps": 360, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "total_flos": 4.092376135473562e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }