| { | |
| "best_metric": 0.8043478260869565, | |
| "best_model_checkpoint": "swiftformer-xs-ve-U13-b-80c\\checkpoint-175", | |
| "epoch": 73.84615384615384, | |
| "eval_steps": 500, | |
| "global_step": 480, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.2391304347826087, | |
| "eval_loss": 1.3860037326812744, | |
| "eval_runtime": 0.6497, | |
| "eval_samples_per_second": 70.799, | |
| "eval_steps_per_second": 3.078, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 1.3859, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.30434782608695654, | |
| "eval_loss": 1.3843744993209839, | |
| "eval_runtime": 0.6508, | |
| "eval_samples_per_second": 70.684, | |
| "eval_steps_per_second": 3.073, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_accuracy": 0.1956521739130435, | |
| "eval_loss": 1.3819622993469238, | |
| "eval_runtime": 0.6238, | |
| "eval_samples_per_second": 73.745, | |
| "eval_steps_per_second": 3.206, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 5.555555555555556e-05, | |
| "loss": 1.381, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.17391304347826086, | |
| "eval_loss": 1.3745858669281006, | |
| "eval_runtime": 0.6174, | |
| "eval_samples_per_second": 74.504, | |
| "eval_steps_per_second": 3.239, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 1.3573, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "eval_accuracy": 0.1956521739130435, | |
| "eval_loss": 1.3642687797546387, | |
| "eval_runtime": 0.5993, | |
| "eval_samples_per_second": 76.759, | |
| "eval_steps_per_second": 3.337, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.15217391304347827, | |
| "eval_loss": 1.3561071157455444, | |
| "eval_runtime": 0.6092, | |
| "eval_samples_per_second": 75.505, | |
| "eval_steps_per_second": 3.283, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 0.00011111111111111112, | |
| "loss": 1.2692, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "eval_accuracy": 0.15217391304347827, | |
| "eval_loss": 1.3582805395126343, | |
| "eval_runtime": 0.6066, | |
| "eval_samples_per_second": 75.83, | |
| "eval_steps_per_second": 3.297, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 0.0001388888888888889, | |
| "loss": 1.1682, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.17391304347826086, | |
| "eval_loss": 1.3623026609420776, | |
| "eval_runtime": 0.6189, | |
| "eval_samples_per_second": 74.33, | |
| "eval_steps_per_second": 3.232, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "eval_accuracy": 0.2608695652173913, | |
| "eval_loss": 1.3296467065811157, | |
| "eval_runtime": 0.6038, | |
| "eval_samples_per_second": 76.178, | |
| "eval_steps_per_second": 3.312, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 1.1005, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.391304347826087, | |
| "eval_loss": 1.266342282295227, | |
| "eval_runtime": 0.6296, | |
| "eval_samples_per_second": 73.06, | |
| "eval_steps_per_second": 3.177, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "learning_rate": 0.00019444444444444446, | |
| "loss": 0.9884, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "eval_accuracy": 0.3695652173913043, | |
| "eval_loss": 1.3159973621368408, | |
| "eval_runtime": 0.6014, | |
| "eval_samples_per_second": 76.491, | |
| "eval_steps_per_second": 3.326, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.4782608695652174, | |
| "eval_loss": 1.1806195974349976, | |
| "eval_runtime": 0.6259, | |
| "eval_samples_per_second": 73.491, | |
| "eval_steps_per_second": 3.195, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 12.31, | |
| "learning_rate": 0.000196078431372549, | |
| "loss": 0.9111, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 1.15597403049469, | |
| "eval_runtime": 0.6243, | |
| "eval_samples_per_second": 73.68, | |
| "eval_steps_per_second": 3.203, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "learning_rate": 0.0001911764705882353, | |
| "loss": 0.8464, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 1.1349503993988037, | |
| "eval_runtime": 0.6226, | |
| "eval_samples_per_second": 73.889, | |
| "eval_steps_per_second": 3.213, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.0768355131149292, | |
| "eval_runtime": 0.6092, | |
| "eval_samples_per_second": 75.507, | |
| "eval_steps_per_second": 3.283, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "learning_rate": 0.00018627450980392157, | |
| "loss": 0.7768, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9706609845161438, | |
| "eval_runtime": 0.6399, | |
| "eval_samples_per_second": 71.887, | |
| "eval_steps_per_second": 3.126, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "learning_rate": 0.00018137254901960786, | |
| "loss": 0.6754, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9544328451156616, | |
| "eval_runtime": 0.5992, | |
| "eval_samples_per_second": 76.775, | |
| "eval_steps_per_second": 3.338, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.9885360598564148, | |
| "eval_runtime": 0.6261, | |
| "eval_samples_per_second": 73.474, | |
| "eval_steps_per_second": 3.195, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 18.46, | |
| "learning_rate": 0.00017647058823529413, | |
| "loss": 0.657, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 18.92, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.85777348279953, | |
| "eval_runtime": 0.6125, | |
| "eval_samples_per_second": 75.103, | |
| "eval_steps_per_second": 3.265, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0001715686274509804, | |
| "loss": 0.5408, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.7794063687324524, | |
| "eval_runtime": 0.5998, | |
| "eval_samples_per_second": 76.697, | |
| "eval_steps_per_second": 3.335, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8072043657302856, | |
| "eval_runtime": 0.6119, | |
| "eval_samples_per_second": 75.172, | |
| "eval_steps_per_second": 3.268, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 21.54, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 0.5094, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.7917114496231079, | |
| "eval_runtime": 0.6088, | |
| "eval_samples_per_second": 75.563, | |
| "eval_steps_per_second": 3.285, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 22.92, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.7974965572357178, | |
| "eval_runtime": 0.6475, | |
| "eval_samples_per_second": 71.039, | |
| "eval_steps_per_second": 3.089, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 23.08, | |
| "learning_rate": 0.00016176470588235295, | |
| "loss": 0.4546, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.758342981338501, | |
| "eval_runtime": 0.6038, | |
| "eval_samples_per_second": 76.184, | |
| "eval_steps_per_second": 3.312, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 24.62, | |
| "learning_rate": 0.00015686274509803922, | |
| "loss": 0.3722, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 24.92, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 0.7073833346366882, | |
| "eval_runtime": 0.6225, | |
| "eval_samples_per_second": 73.896, | |
| "eval_steps_per_second": 3.213, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.6909225583076477, | |
| "eval_runtime": 0.6068, | |
| "eval_samples_per_second": 75.807, | |
| "eval_steps_per_second": 3.296, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 26.15, | |
| "learning_rate": 0.00015196078431372549, | |
| "loss": 0.3494, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 26.92, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.7032170295715332, | |
| "eval_runtime": 0.6101, | |
| "eval_samples_per_second": 75.403, | |
| "eval_steps_per_second": 3.278, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 27.69, | |
| "learning_rate": 0.00014705882352941178, | |
| "loss": 0.3092, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 0.8148682713508606, | |
| "eval_runtime": 0.6369, | |
| "eval_samples_per_second": 72.229, | |
| "eval_steps_per_second": 3.14, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 28.92, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 0.7898127436637878, | |
| "eval_runtime": 0.6193, | |
| "eval_samples_per_second": 74.278, | |
| "eval_steps_per_second": 3.229, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 29.23, | |
| "learning_rate": 0.00014215686274509804, | |
| "loss": 0.2643, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.7311907410621643, | |
| "eval_runtime": 0.6043, | |
| "eval_samples_per_second": 76.119, | |
| "eval_steps_per_second": 3.31, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "learning_rate": 0.0001372549019607843, | |
| "loss": 0.2659, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 30.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.7597643733024597, | |
| "eval_runtime": 0.6226, | |
| "eval_samples_per_second": 73.885, | |
| "eval_steps_per_second": 3.212, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.7530593276023865, | |
| "eval_runtime": 0.5971, | |
| "eval_samples_per_second": 77.044, | |
| "eval_steps_per_second": 3.35, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 32.31, | |
| "learning_rate": 0.0001323529411764706, | |
| "loss": 0.2298, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 32.92, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.6876691579818726, | |
| "eval_runtime": 0.6354, | |
| "eval_samples_per_second": 72.394, | |
| "eval_steps_per_second": 3.148, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 33.85, | |
| "learning_rate": 0.00012745098039215687, | |
| "loss": 0.2147, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.6864094734191895, | |
| "eval_runtime": 0.6204, | |
| "eval_samples_per_second": 74.15, | |
| "eval_steps_per_second": 3.224, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 34.92, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.7655704617500305, | |
| "eval_runtime": 0.623, | |
| "eval_samples_per_second": 73.835, | |
| "eval_steps_per_second": 3.21, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 35.38, | |
| "learning_rate": 0.00012254901960784316, | |
| "loss": 0.2457, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8493983149528503, | |
| "eval_runtime": 0.6074, | |
| "eval_samples_per_second": 75.734, | |
| "eval_steps_per_second": 3.293, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "learning_rate": 0.00011764705882352942, | |
| "loss": 0.1905, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.7319269776344299, | |
| "eval_runtime": 0.6155, | |
| "eval_samples_per_second": 74.733, | |
| "eval_steps_per_second": 3.249, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8289902806282043, | |
| "eval_runtime": 0.6141, | |
| "eval_samples_per_second": 74.91, | |
| "eval_steps_per_second": 3.257, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 38.46, | |
| "learning_rate": 0.0001127450980392157, | |
| "loss": 0.2073, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 38.92, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.7963315844535828, | |
| "eval_runtime": 0.6558, | |
| "eval_samples_per_second": 70.149, | |
| "eval_steps_per_second": 3.05, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.00010784313725490196, | |
| "loss": 0.1603, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8692622780799866, | |
| "eval_runtime": 0.5964, | |
| "eval_samples_per_second": 77.127, | |
| "eval_steps_per_second": 3.353, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 40.92, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.7137989401817322, | |
| "eval_runtime": 0.6168, | |
| "eval_samples_per_second": 74.577, | |
| "eval_steps_per_second": 3.242, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 41.54, | |
| "learning_rate": 0.00010294117647058823, | |
| "loss": 0.1852, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.7274174690246582, | |
| "eval_runtime": 0.6771, | |
| "eval_samples_per_second": 67.936, | |
| "eval_steps_per_second": 2.954, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 42.92, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.8352705240249634, | |
| "eval_runtime": 0.6424, | |
| "eval_samples_per_second": 71.602, | |
| "eval_steps_per_second": 3.113, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 43.08, | |
| "learning_rate": 9.80392156862745e-05, | |
| "loss": 0.1641, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.9382135272026062, | |
| "eval_runtime": 0.6048, | |
| "eval_samples_per_second": 76.054, | |
| "eval_steps_per_second": 3.307, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 44.62, | |
| "learning_rate": 9.313725490196079e-05, | |
| "loss": 0.1568, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 44.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8655094504356384, | |
| "eval_runtime": 0.6326, | |
| "eval_samples_per_second": 72.716, | |
| "eval_steps_per_second": 3.162, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.7620847821235657, | |
| "eval_runtime": 0.6048, | |
| "eval_samples_per_second": 76.056, | |
| "eval_steps_per_second": 3.307, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 46.15, | |
| "learning_rate": 8.823529411764706e-05, | |
| "loss": 0.1498, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 46.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.794407069683075, | |
| "eval_runtime": 0.6439, | |
| "eval_samples_per_second": 71.44, | |
| "eval_steps_per_second": 3.106, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 47.69, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 0.1563, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8432682752609253, | |
| "eval_runtime": 0.6319, | |
| "eval_samples_per_second": 72.801, | |
| "eval_steps_per_second": 3.165, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 48.92, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.8633100986480713, | |
| "eval_runtime": 0.6452, | |
| "eval_samples_per_second": 71.293, | |
| "eval_steps_per_second": 3.1, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 49.23, | |
| "learning_rate": 7.843137254901961e-05, | |
| "loss": 0.1554, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8542913198471069, | |
| "eval_runtime": 0.6393, | |
| "eval_samples_per_second": 71.95, | |
| "eval_steps_per_second": 3.128, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 50.77, | |
| "learning_rate": 7.352941176470589e-05, | |
| "loss": 0.1316, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 50.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.9127072095870972, | |
| "eval_runtime": 0.6018, | |
| "eval_samples_per_second": 76.431, | |
| "eval_steps_per_second": 3.323, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.9248411059379578, | |
| "eval_runtime": 0.6005, | |
| "eval_samples_per_second": 76.606, | |
| "eval_steps_per_second": 3.331, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 52.31, | |
| "learning_rate": 6.862745098039216e-05, | |
| "loss": 0.1264, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 52.92, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.9349088072776794, | |
| "eval_runtime": 0.5946, | |
| "eval_samples_per_second": 77.368, | |
| "eval_steps_per_second": 3.364, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 53.85, | |
| "learning_rate": 6.372549019607843e-05, | |
| "loss": 0.1082, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.978458046913147, | |
| "eval_runtime": 0.6614, | |
| "eval_samples_per_second": 69.548, | |
| "eval_steps_per_second": 3.024, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 54.92, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.0165393352508545, | |
| "eval_runtime": 0.6054, | |
| "eval_samples_per_second": 75.984, | |
| "eval_steps_per_second": 3.304, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 55.38, | |
| "learning_rate": 5.882352941176471e-05, | |
| "loss": 0.1366, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8369129300117493, | |
| "eval_runtime": 0.6396, | |
| "eval_samples_per_second": 71.916, | |
| "eval_steps_per_second": 3.127, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 56.92, | |
| "learning_rate": 5.392156862745098e-05, | |
| "loss": 0.1546, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 56.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8372054696083069, | |
| "eval_runtime": 0.6015, | |
| "eval_samples_per_second": 76.478, | |
| "eval_steps_per_second": 3.325, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8595665097236633, | |
| "eval_runtime": 0.6434, | |
| "eval_samples_per_second": 71.491, | |
| "eval_steps_per_second": 3.108, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 58.46, | |
| "learning_rate": 4.901960784313725e-05, | |
| "loss": 0.1218, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 58.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8054145574569702, | |
| "eval_runtime": 0.6049, | |
| "eval_samples_per_second": 76.042, | |
| "eval_steps_per_second": 3.306, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 4.411764705882353e-05, | |
| "loss": 0.1162, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.7963301539421082, | |
| "eval_runtime": 0.6236, | |
| "eval_samples_per_second": 73.763, | |
| "eval_steps_per_second": 3.207, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 60.92, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.795264720916748, | |
| "eval_runtime": 0.6138, | |
| "eval_samples_per_second": 74.945, | |
| "eval_steps_per_second": 3.258, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 61.54, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 0.0876, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8229031562805176, | |
| "eval_runtime": 0.6005, | |
| "eval_samples_per_second": 76.603, | |
| "eval_steps_per_second": 3.331, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 62.92, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8364757895469666, | |
| "eval_runtime": 0.626, | |
| "eval_samples_per_second": 73.487, | |
| "eval_steps_per_second": 3.195, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 63.08, | |
| "learning_rate": 3.431372549019608e-05, | |
| "loss": 0.1032, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.8161815404891968, | |
| "eval_runtime": 0.6165, | |
| "eval_samples_per_second": 74.611, | |
| "eval_steps_per_second": 3.244, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 64.62, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 0.0825, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 64.92, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8645689487457275, | |
| "eval_runtime": 0.608, | |
| "eval_samples_per_second": 75.66, | |
| "eval_steps_per_second": 3.29, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.9134983420372009, | |
| "eval_runtime": 0.7022, | |
| "eval_samples_per_second": 65.511, | |
| "eval_steps_per_second": 2.848, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 66.15, | |
| "learning_rate": 2.4509803921568626e-05, | |
| "loss": 0.1119, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 66.92, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.9164416193962097, | |
| "eval_runtime": 0.6197, | |
| "eval_samples_per_second": 74.228, | |
| "eval_steps_per_second": 3.227, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 67.69, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": 0.0949, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.9232246279716492, | |
| "eval_runtime": 0.6325, | |
| "eval_samples_per_second": 72.724, | |
| "eval_steps_per_second": 3.162, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 68.92, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.9380779266357422, | |
| "eval_runtime": 0.5791, | |
| "eval_samples_per_second": 79.439, | |
| "eval_steps_per_second": 3.454, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 69.23, | |
| "learning_rate": 1.4705882352941177e-05, | |
| "loss": 0.1227, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8997882604598999, | |
| "eval_runtime": 0.606, | |
| "eval_samples_per_second": 75.907, | |
| "eval_steps_per_second": 3.3, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 70.77, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 0.0872, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 70.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.9632061123847961, | |
| "eval_runtime": 0.5942, | |
| "eval_samples_per_second": 77.419, | |
| "eval_steps_per_second": 3.366, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8566347360610962, | |
| "eval_runtime": 0.6595, | |
| "eval_samples_per_second": 69.753, | |
| "eval_steps_per_second": 3.033, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 72.31, | |
| "learning_rate": 4.901960784313726e-06, | |
| "loss": 0.1033, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 72.92, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8909047842025757, | |
| "eval_runtime": 0.6783, | |
| "eval_samples_per_second": 67.817, | |
| "eval_steps_per_second": 2.949, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 73.85, | |
| "learning_rate": 0.0, | |
| "loss": 0.0876, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 73.85, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.8869202733039856, | |
| "eval_runtime": 0.6277, | |
| "eval_samples_per_second": 73.284, | |
| "eval_steps_per_second": 3.186, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 73.85, | |
| "step": 480, | |
| "total_flos": 1.6581977329862246e+17, | |
| "train_loss": 0.40781121912101903, | |
| "train_runtime": 277.6094, | |
| "train_samples_per_second": 236.015, | |
| "train_steps_per_second": 1.729 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 480, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 80, | |
| "save_steps": 500, | |
| "total_flos": 1.6581977329862246e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |