Llama-3.1-8B-Instruct-SAA-600 / trainer_log.jsonl
chchen's picture
Model save
f190058 verified
{"current_steps": 10, "total_steps": 330, "loss": 1.7946, "accuracy": 0.7749999761581421, "learning_rate": 1.5151515151515152e-06, "epoch": 0.2962962962962963, "percentage": 3.03, "elapsed_time": "0:00:20", "remaining_time": "0:11:03"}
{"current_steps": 20, "total_steps": 330, "loss": 1.8133, "accuracy": 0.699999988079071, "learning_rate": 3.0303030303030305e-06, "epoch": 0.5925925925925926, "percentage": 6.06, "elapsed_time": "0:00:42", "remaining_time": "0:11:02"}
{"current_steps": 30, "total_steps": 330, "loss": 1.725, "accuracy": 0.78125, "learning_rate": 4.5454545454545455e-06, "epoch": 0.8888888888888888, "percentage": 9.09, "elapsed_time": "0:01:02", "remaining_time": "0:10:28"}
{"current_steps": 40, "total_steps": 330, "loss": 1.5344, "accuracy": 0.731249988079071, "learning_rate": 4.993149937871306e-06, "epoch": 1.1851851851851851, "percentage": 12.12, "elapsed_time": "0:01:23", "remaining_time": "0:10:06"}
{"current_steps": 50, "total_steps": 330, "loss": 1.3352, "accuracy": 0.7250000238418579, "learning_rate": 4.959688949822748e-06, "epoch": 1.4814814814814814, "percentage": 15.15, "elapsed_time": "0:01:44", "remaining_time": "0:09:44"}
{"current_steps": 50, "total_steps": 330, "eval_loss": 1.0316624641418457, "epoch": 1.4814814814814814, "percentage": 15.15, "elapsed_time": "0:01:46", "remaining_time": "0:09:57"}
{"current_steps": 60, "total_steps": 330, "loss": 0.9359, "accuracy": 0.824999988079071, "learning_rate": 4.8987324340362445e-06, "epoch": 1.7777777777777777, "percentage": 18.18, "elapsed_time": "0:02:11", "remaining_time": "0:09:52"}
{"current_steps": 70, "total_steps": 330, "loss": 0.625, "accuracy": 0.800000011920929, "learning_rate": 4.810961790316731e-06, "epoch": 2.074074074074074, "percentage": 21.21, "elapsed_time": "0:02:31", "remaining_time": "0:09:24"}
{"current_steps": 80, "total_steps": 330, "loss": 0.4878, "accuracy": 0.793749988079071, "learning_rate": 4.697358159051549e-06, "epoch": 2.3703703703703702, "percentage": 24.24, "elapsed_time": "0:02:52", "remaining_time": "0:09:00"}
{"current_steps": 90, "total_steps": 330, "loss": 0.332, "accuracy": 0.7875000238418579, "learning_rate": 4.559191453574582e-06, "epoch": 2.6666666666666665, "percentage": 27.27, "elapsed_time": "0:03:14", "remaining_time": "0:08:38"}
{"current_steps": 100, "total_steps": 330, "loss": 0.2371, "accuracy": 0.7875000238418579, "learning_rate": 4.398006164494358e-06, "epoch": 2.962962962962963, "percentage": 30.3, "elapsed_time": "0:03:35", "remaining_time": "0:08:14"}
{"current_steps": 100, "total_steps": 330, "eval_loss": 0.16552023589611053, "epoch": 2.962962962962963, "percentage": 30.3, "elapsed_time": "0:03:37", "remaining_time": "0:08:19"}
{"current_steps": 110, "total_steps": 330, "loss": 0.1933, "accuracy": 0.7875000238418579, "learning_rate": 4.215604094671835e-06, "epoch": 3.259259259259259, "percentage": 33.33, "elapsed_time": "0:04:01", "remaining_time": "0:08:02"}
{"current_steps": 120, "total_steps": 330, "loss": 0.1679, "accuracy": 0.856249988079071, "learning_rate": 4.014024217844167e-06, "epoch": 3.5555555555555554, "percentage": 36.36, "elapsed_time": "0:04:22", "remaining_time": "0:07:39"}
{"current_steps": 130, "total_steps": 330, "loss": 0.1434, "accuracy": 0.7749999761581421, "learning_rate": 3.7955198860439892e-06, "epoch": 3.851851851851852, "percentage": 39.39, "elapsed_time": "0:04:42", "remaining_time": "0:07:14"}
{"current_steps": 140, "total_steps": 330, "loss": 0.1289, "accuracy": 0.824999988079071, "learning_rate": 3.5625336406000752e-06, "epoch": 4.148148148148148, "percentage": 42.42, "elapsed_time": "0:05:02", "remaining_time": "0:06:50"}
{"current_steps": 150, "total_steps": 330, "loss": 0.1421, "accuracy": 0.8062499761581421, "learning_rate": 3.3176699082935546e-06, "epoch": 4.444444444444445, "percentage": 45.45, "elapsed_time": "0:05:23", "remaining_time": "0:06:28"}
{"current_steps": 150, "total_steps": 330, "eval_loss": 0.10104309767484665, "epoch": 4.444444444444445, "percentage": 45.45, "elapsed_time": "0:05:25", "remaining_time": "0:06:30"}
{"current_steps": 160, "total_steps": 330, "loss": 0.1333, "accuracy": 0.824999988079071, "learning_rate": 3.0636658878845116e-06, "epoch": 4.7407407407407405, "percentage": 48.48, "elapsed_time": "0:05:49", "remaining_time": "0:06:10"}
{"current_steps": 170, "total_steps": 330, "loss": 0.1229, "accuracy": 0.7875000238418579, "learning_rate": 2.803360952452705e-06, "epoch": 5.037037037037037, "percentage": 51.52, "elapsed_time": "0:06:10", "remaining_time": "0:05:48"}
{"current_steps": 180, "total_steps": 330, "loss": 0.1321, "accuracy": 0.84375, "learning_rate": 2.53966490958702e-06, "epoch": 5.333333333333333, "percentage": 54.55, "elapsed_time": "0:06:32", "remaining_time": "0:05:26"}
{"current_steps": 190, "total_steps": 330, "loss": 0.118, "accuracy": 0.831250011920929, "learning_rate": 2.275525474225771e-06, "epoch": 5.62962962962963, "percentage": 57.58, "elapsed_time": "0:06:52", "remaining_time": "0:05:03"}
{"current_steps": 200, "total_steps": 330, "loss": 0.1291, "accuracy": 0.78125, "learning_rate": 2.013895317751323e-06, "epoch": 5.925925925925926, "percentage": 60.61, "elapsed_time": "0:07:13", "remaining_time": "0:04:41"}
{"current_steps": 200, "total_steps": 330, "eval_loss": 0.09843841940164566, "epoch": 5.925925925925926, "percentage": 60.61, "elapsed_time": "0:07:16", "remaining_time": "0:04:43"}
{"current_steps": 210, "total_steps": 330, "loss": 0.0946, "accuracy": 0.856249988079071, "learning_rate": 1.7576990616793139e-06, "epoch": 6.222222222222222, "percentage": 63.64, "elapsed_time": "0:07:39", "remaining_time": "0:04:22"}
{"current_steps": 220, "total_steps": 330, "loss": 0.1333, "accuracy": 0.824999988079071, "learning_rate": 1.509800584902108e-06, "epoch": 6.518518518518518, "percentage": 66.67, "elapsed_time": "0:08:00", "remaining_time": "0:04:00"}
{"current_steps": 230, "total_steps": 330, "loss": 0.0972, "accuracy": 0.8687499761581421, "learning_rate": 1.2729710099410802e-06, "epoch": 6.814814814814815, "percentage": 69.7, "elapsed_time": "0:08:21", "remaining_time": "0:03:38"}
{"current_steps": 240, "total_steps": 330, "loss": 0.1338, "accuracy": 0.831250011920929, "learning_rate": 1.049857726072005e-06, "epoch": 7.111111111111111, "percentage": 72.73, "elapsed_time": "0:08:42", "remaining_time": "0:03:16"}
{"current_steps": 250, "total_steps": 330, "loss": 0.1246, "accuracy": 0.800000011920929, "learning_rate": 8.4295479559726e-07, "epoch": 7.407407407407407, "percentage": 75.76, "elapsed_time": "0:09:02", "remaining_time": "0:02:53"}
{"current_steps": 250, "total_steps": 330, "eval_loss": 0.09428545832633972, "epoch": 7.407407407407407, "percentage": 75.76, "elapsed_time": "0:09:05", "remaining_time": "0:02:54"}
{"current_steps": 260, "total_steps": 330, "loss": 0.0906, "accuracy": 0.84375, "learning_rate": 6.545750740770338e-07, "epoch": 7.703703703703704, "percentage": 78.79, "elapsed_time": "0:09:28", "remaining_time": "0:02:32"}
{"current_steps": 270, "total_steps": 330, "loss": 0.1201, "accuracy": 0.862500011920929, "learning_rate": 4.868243561723535e-07, "epoch": 8.0, "percentage": 81.82, "elapsed_time": "0:09:49", "remaining_time": "0:02:11"}
{"current_steps": 280, "total_steps": 330, "loss": 0.1153, "accuracy": 0.831250011920929, "learning_rate": 3.4157783610952263e-07, "epoch": 8.296296296296296, "percentage": 84.85, "elapsed_time": "0:10:10", "remaining_time": "0:01:49"}
{"current_steps": 290, "total_steps": 330, "loss": 0.1022, "accuracy": 0.84375, "learning_rate": 2.2045914590165252e-07, "epoch": 8.592592592592592, "percentage": 87.88, "elapsed_time": "0:10:31", "remaining_time": "0:01:27"}
{"current_steps": 300, "total_steps": 330, "loss": 0.1045, "accuracy": 0.8374999761581421, "learning_rate": 1.2482220564763669e-07, "epoch": 8.88888888888889, "percentage": 90.91, "elapsed_time": "0:10:52", "remaining_time": "0:01:05"}
{"current_steps": 300, "total_steps": 330, "eval_loss": 0.09481088072061539, "epoch": 8.88888888888889, "percentage": 90.91, "elapsed_time": "0:10:54", "remaining_time": "0:01:05"}
{"current_steps": 310, "total_steps": 330, "loss": 0.1254, "accuracy": 0.8125, "learning_rate": 5.573608879422876e-08, "epoch": 9.185185185185185, "percentage": 93.94, "elapsed_time": "0:11:16", "remaining_time": "0:00:43"}
{"current_steps": 320, "total_steps": 330, "loss": 0.098, "accuracy": 0.8812500238418579, "learning_rate": 1.3973071544233219e-08, "epoch": 9.481481481481481, "percentage": 96.97, "elapsed_time": "0:11:37", "remaining_time": "0:00:21"}
{"current_steps": 330, "total_steps": 330, "loss": 0.1206, "accuracy": 0.824999988079071, "learning_rate": 0.0, "epoch": 9.777777777777779, "percentage": 100.0, "elapsed_time": "0:11:58", "remaining_time": "0:00:00"}
{"current_steps": 330, "total_steps": 330, "epoch": 9.777777777777779, "percentage": 100.0, "elapsed_time": "0:12:01", "remaining_time": "0:00:00"}