| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 21600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.953703703703704e-05, | |
| "loss": 1.1352, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.7482928037643433, | |
| "eval_loss": 0.6091228127479553, | |
| "eval_runtime": 1960.8351, | |
| "eval_samples_per_second": 17.625, | |
| "eval_steps_per_second": 4.406, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.907407407407407e-05, | |
| "loss": 0.5497, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.9043981432914734, | |
| "eval_loss": 0.2794453501701355, | |
| "eval_runtime": 2070.7116, | |
| "eval_samples_per_second": 16.69, | |
| "eval_steps_per_second": 4.172, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.861111111111112e-05, | |
| "loss": 0.4001, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.9633391499519348, | |
| "eval_loss": 0.1039256900548935, | |
| "eval_runtime": 2051.9615, | |
| "eval_samples_per_second": 16.842, | |
| "eval_steps_per_second": 4.211, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.814814814814815e-05, | |
| "loss": 0.2967, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.9760127067565918, | |
| "eval_loss": 0.0726834237575531, | |
| "eval_runtime": 1976.3465, | |
| "eval_samples_per_second": 17.487, | |
| "eval_steps_per_second": 4.372, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.768518518518519e-05, | |
| "loss": 0.2572, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.9752025604248047, | |
| "eval_loss": 0.07836401462554932, | |
| "eval_runtime": 1953.8972, | |
| "eval_samples_per_second": 17.688, | |
| "eval_steps_per_second": 4.422, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.722222222222223e-05, | |
| "loss": 0.1858, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.987442135810852, | |
| "eval_loss": 0.03908771649003029, | |
| "eval_runtime": 1933.1285, | |
| "eval_samples_per_second": 17.878, | |
| "eval_steps_per_second": 4.469, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.675925925925926e-05, | |
| "loss": 0.1776, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.9870080947875977, | |
| "eval_loss": 0.046012409031391144, | |
| "eval_runtime": 2073.2574, | |
| "eval_samples_per_second": 16.669, | |
| "eval_steps_per_second": 4.167, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.62962962962963e-05, | |
| "loss": 0.1253, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.987442135810852, | |
| "eval_loss": 0.04302794486284256, | |
| "eval_runtime": 2100.3436, | |
| "eval_samples_per_second": 16.454, | |
| "eval_steps_per_second": 4.114, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.583333333333334e-05, | |
| "loss": 0.1509, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.9821469783782959, | |
| "eval_loss": 0.06500900536775589, | |
| "eval_runtime": 2096.1889, | |
| "eval_samples_per_second": 16.487, | |
| "eval_steps_per_second": 4.122, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.537037037037038e-05, | |
| "loss": 0.1574, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.9847221970558167, | |
| "eval_loss": 0.059933874756097794, | |
| "eval_runtime": 2109.7765, | |
| "eval_samples_per_second": 16.381, | |
| "eval_steps_per_second": 4.095, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.490740740740742e-05, | |
| "loss": 0.1506, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.9896122813224792, | |
| "eval_loss": 0.034695032984018326, | |
| "eval_runtime": 2117.6815, | |
| "eval_samples_per_second": 16.32, | |
| "eval_steps_per_second": 4.08, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.444444444444444e-05, | |
| "loss": 0.118, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.9911168813705444, | |
| "eval_loss": 0.03316599503159523, | |
| "eval_runtime": 2107.0764, | |
| "eval_samples_per_second": 16.402, | |
| "eval_steps_per_second": 4.1, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 9.398148148148148e-05, | |
| "loss": 0.0885, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.9947627186775208, | |
| "eval_loss": 0.019724205136299133, | |
| "eval_runtime": 2100.2147, | |
| "eval_samples_per_second": 16.455, | |
| "eval_steps_per_second": 4.114, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 9.351851851851852e-05, | |
| "loss": 0.0967, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.9936053156852722, | |
| "eval_loss": 0.022701723501086235, | |
| "eval_runtime": 2110.6117, | |
| "eval_samples_per_second": 16.374, | |
| "eval_steps_per_second": 4.094, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 9.305555555555556e-05, | |
| "loss": 0.0882, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.992274284362793, | |
| "eval_loss": 0.02855427749454975, | |
| "eval_runtime": 2115.7419, | |
| "eval_samples_per_second": 16.335, | |
| "eval_steps_per_second": 4.084, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.25925925925926e-05, | |
| "loss": 0.1056, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.9962384104728699, | |
| "eval_loss": 0.015638431534171104, | |
| "eval_runtime": 2093.1202, | |
| "eval_samples_per_second": 16.511, | |
| "eval_steps_per_second": 4.128, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.212962962962963e-05, | |
| "loss": 0.1124, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.9942708611488342, | |
| "eval_loss": 0.023519381880760193, | |
| "eval_runtime": 2715.3988, | |
| "eval_samples_per_second": 12.727, | |
| "eval_steps_per_second": 3.182, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 9.166666666666667e-05, | |
| "loss": 0.0813, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.995341420173645, | |
| "eval_loss": 0.017750833183526993, | |
| "eval_runtime": 2099.7025, | |
| "eval_samples_per_second": 16.459, | |
| "eval_steps_per_second": 4.115, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 9.120370370370371e-05, | |
| "loss": 0.0609, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.9971932768821716, | |
| "eval_loss": 0.011351389810442924, | |
| "eval_runtime": 2142.3716, | |
| "eval_samples_per_second": 16.132, | |
| "eval_steps_per_second": 4.033, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 9.074074074074075e-05, | |
| "loss": 0.0891, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.9973379373550415, | |
| "eval_loss": 0.012310467660427094, | |
| "eval_runtime": 2095.1245, | |
| "eval_samples_per_second": 16.495, | |
| "eval_steps_per_second": 4.124, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 9.027777777777779e-05, | |
| "loss": 0.0424, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.9985821843147278, | |
| "eval_loss": 0.00660862447693944, | |
| "eval_runtime": 2101.8862, | |
| "eval_samples_per_second": 16.442, | |
| "eval_steps_per_second": 4.111, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 8.981481481481481e-05, | |
| "loss": 0.0546, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.9950520992279053, | |
| "eval_loss": 0.021980540826916695, | |
| "eval_runtime": 2121.7281, | |
| "eval_samples_per_second": 16.289, | |
| "eval_steps_per_second": 4.072, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 8.935185185185185e-05, | |
| "loss": 0.146, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.9940393567085266, | |
| "eval_loss": 0.02473669871687889, | |
| "eval_runtime": 2062.8449, | |
| "eval_samples_per_second": 16.754, | |
| "eval_steps_per_second": 4.188, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 0.1174, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.9958622455596924, | |
| "eval_loss": 0.01570066250860691, | |
| "eval_runtime": 2017.5974, | |
| "eval_samples_per_second": 17.129, | |
| "eval_steps_per_second": 4.282, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.842592592592593e-05, | |
| "loss": 0.0848, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.9978298544883728, | |
| "eval_loss": 0.008064490742981434, | |
| "eval_runtime": 2005.1771, | |
| "eval_samples_per_second": 17.235, | |
| "eval_steps_per_second": 4.309, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.796296296296297e-05, | |
| "loss": 0.0792, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.9986110925674438, | |
| "eval_loss": 0.004222337622195482, | |
| "eval_runtime": 1999.0902, | |
| "eval_samples_per_second": 17.288, | |
| "eval_steps_per_second": 4.322, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 8.75e-05, | |
| "loss": 0.0482, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.9971354007720947, | |
| "eval_loss": 0.01219157688319683, | |
| "eval_runtime": 2001.1288, | |
| "eval_samples_per_second": 17.27, | |
| "eval_steps_per_second": 4.318, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 8.703703703703704e-05, | |
| "loss": 0.0697, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.9931133985519409, | |
| "eval_loss": 0.027987554669380188, | |
| "eval_runtime": 1997.1851, | |
| "eval_samples_per_second": 17.304, | |
| "eval_steps_per_second": 4.326, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 8.657407407407408e-05, | |
| "loss": 0.106, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.9977430701255798, | |
| "eval_loss": 0.008220946416258812, | |
| "eval_runtime": 2609.175, | |
| "eval_samples_per_second": 13.246, | |
| "eval_steps_per_second": 3.311, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 8.611111111111112e-05, | |
| "loss": 0.052, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.9971932768821716, | |
| "eval_loss": 0.01051583793014288, | |
| "eval_runtime": 1971.9035, | |
| "eval_samples_per_second": 17.526, | |
| "eval_steps_per_second": 4.382, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.564814814814816e-05, | |
| "loss": 0.047, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.9978588223457336, | |
| "eval_loss": 0.009094738401472569, | |
| "eval_runtime": 1980.5023, | |
| "eval_samples_per_second": 17.45, | |
| "eval_steps_per_second": 4.363, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 8.518518518518518e-05, | |
| "loss": 0.0495, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.998466432094574, | |
| "eval_loss": 0.006100042257457972, | |
| "eval_runtime": 2070.7502, | |
| "eval_samples_per_second": 16.69, | |
| "eval_steps_per_second": 4.172, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 8.472222222222222e-05, | |
| "loss": 0.0979, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.9978588223457336, | |
| "eval_loss": 0.009109850972890854, | |
| "eval_runtime": 1970.0999, | |
| "eval_samples_per_second": 17.542, | |
| "eval_steps_per_second": 4.386, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 8.425925925925926e-05, | |
| "loss": 0.0381, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.9951099753379822, | |
| "eval_loss": 0.021163903176784515, | |
| "eval_runtime": 1977.6331, | |
| "eval_samples_per_second": 17.475, | |
| "eval_steps_per_second": 4.369, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 8.379629629629629e-05, | |
| "loss": 0.0268, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.9980034828186035, | |
| "eval_loss": 0.008532223291695118, | |
| "eval_runtime": 1971.1529, | |
| "eval_samples_per_second": 17.533, | |
| "eval_steps_per_second": 4.383, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 0.073, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.9961516261100769, | |
| "eval_loss": 0.017610933631658554, | |
| "eval_runtime": 1957.2329, | |
| "eval_samples_per_second": 17.658, | |
| "eval_steps_per_second": 4.414, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 8.287037037037037e-05, | |
| "loss": 0.0585, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.9971354007720947, | |
| "eval_loss": 0.011580849066376686, | |
| "eval_runtime": 1962.4064, | |
| "eval_samples_per_second": 17.611, | |
| "eval_steps_per_second": 4.403, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.240740740740741e-05, | |
| "loss": 0.0868, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.9994502067565918, | |
| "eval_loss": 0.00212017516605556, | |
| "eval_runtime": 1982.4259, | |
| "eval_samples_per_second": 17.433, | |
| "eval_steps_per_second": 4.358, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 8.194444444444445e-05, | |
| "loss": 0.0496, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.9978877305984497, | |
| "eval_loss": 0.008284298703074455, | |
| "eval_runtime": 1983.4898, | |
| "eval_samples_per_second": 17.424, | |
| "eval_steps_per_second": 4.356, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 8.148148148148148e-05, | |
| "loss": 0.0641, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.9967592358589172, | |
| "eval_loss": 0.013520145788788795, | |
| "eval_runtime": 1998.2946, | |
| "eval_samples_per_second": 17.295, | |
| "eval_steps_per_second": 4.324, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 8.101851851851853e-05, | |
| "loss": 0.0858, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.9989872574806213, | |
| "eval_loss": 0.003793817013502121, | |
| "eval_runtime": 2125.8264, | |
| "eval_samples_per_second": 16.257, | |
| "eval_steps_per_second": 4.064, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.055555555555556e-05, | |
| "loss": 0.0483, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.9978588223457336, | |
| "eval_loss": 0.009265501983463764, | |
| "eval_runtime": 2260.2096, | |
| "eval_samples_per_second": 15.291, | |
| "eval_steps_per_second": 3.823, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.00925925925926e-05, | |
| "loss": 0.1115, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9990162253379822, | |
| "eval_loss": 0.003752070013433695, | |
| "eval_runtime": 1992.5237, | |
| "eval_samples_per_second": 17.345, | |
| "eval_steps_per_second": 4.336, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.962962962962964e-05, | |
| "loss": 0.0486, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_accuracy": 0.9991897940635681, | |
| "eval_loss": 0.0031358152627944946, | |
| "eval_runtime": 1985.6758, | |
| "eval_samples_per_second": 17.405, | |
| "eval_steps_per_second": 4.351, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.916666666666666e-05, | |
| "loss": 0.0166, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_accuracy": 0.9995370507240295, | |
| "eval_loss": 0.002144153229892254, | |
| "eval_runtime": 2034.5738, | |
| "eval_samples_per_second": 16.986, | |
| "eval_steps_per_second": 4.247, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 7.870370370370372e-05, | |
| "loss": 0.0084, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_accuracy": 0.9986979365348816, | |
| "eval_loss": 0.006190824322402477, | |
| "eval_runtime": 2022.3112, | |
| "eval_samples_per_second": 17.089, | |
| "eval_steps_per_second": 4.272, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 7.824074074074074e-05, | |
| "loss": 0.0205, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_accuracy": 0.999160885810852, | |
| "eval_loss": 0.0034529021941125393, | |
| "eval_runtime": 2036.3231, | |
| "eval_samples_per_second": 16.972, | |
| "eval_steps_per_second": 4.243, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 7.777777777777778e-05, | |
| "loss": 0.0217, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_accuracy": 0.9973379373550415, | |
| "eval_loss": 0.012433897703886032, | |
| "eval_runtime": 2054.4934, | |
| "eval_samples_per_second": 16.822, | |
| "eval_steps_per_second": 4.205, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 7.731481481481482e-05, | |
| "loss": 0.0407, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_accuracy": 0.9991030097007751, | |
| "eval_loss": 0.004298593383282423, | |
| "eval_runtime": 2009.8166, | |
| "eval_samples_per_second": 17.196, | |
| "eval_steps_per_second": 4.299, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 7.685185185185185e-05, | |
| "loss": 0.0598, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_accuracy": 0.9981771111488342, | |
| "eval_loss": 0.007797444239258766, | |
| "eval_runtime": 1996.948, | |
| "eval_samples_per_second": 17.306, | |
| "eval_steps_per_second": 4.327, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 7.638888888888889e-05, | |
| "loss": 0.058, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_accuracy": 0.9981192350387573, | |
| "eval_loss": 0.009161165915429592, | |
| "eval_runtime": 2007.394, | |
| "eval_samples_per_second": 17.216, | |
| "eval_steps_per_second": 4.304, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 7.592592592592593e-05, | |
| "loss": 0.0119, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_accuracy": 0.9994502067565918, | |
| "eval_loss": 0.0023240004666149616, | |
| "eval_runtime": 2021.6614, | |
| "eval_samples_per_second": 17.095, | |
| "eval_steps_per_second": 4.274, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 7.546296296296297e-05, | |
| "loss": 0.08, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_accuracy": 0.9976562261581421, | |
| "eval_loss": 0.009528687223792076, | |
| "eval_runtime": 2024.2247, | |
| "eval_samples_per_second": 17.073, | |
| "eval_steps_per_second": 4.268, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.0336, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_accuracy": 0.9995949268341064, | |
| "eval_loss": 0.0020153559744358063, | |
| "eval_runtime": 2005.7373, | |
| "eval_samples_per_second": 17.231, | |
| "eval_steps_per_second": 4.308, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.453703703703703e-05, | |
| "loss": 0.0508, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_accuracy": 0.9989004731178284, | |
| "eval_loss": 0.00367682590149343, | |
| "eval_runtime": 2024.401, | |
| "eval_samples_per_second": 17.072, | |
| "eval_steps_per_second": 4.268, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.407407407407407e-05, | |
| "loss": 0.0146, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_accuracy": 0.9992766380310059, | |
| "eval_loss": 0.002618621801957488, | |
| "eval_runtime": 2017.6548, | |
| "eval_samples_per_second": 17.129, | |
| "eval_steps_per_second": 4.282, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 7.361111111111111e-05, | |
| "loss": 0.038, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_accuracy": 0.9988425970077515, | |
| "eval_loss": 0.00465565687045455, | |
| "eval_runtime": 2002.1508, | |
| "eval_samples_per_second": 17.261, | |
| "eval_steps_per_second": 4.315, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 7.314814814814815e-05, | |
| "loss": 0.0613, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_accuracy": 0.998379647731781, | |
| "eval_loss": 0.005978360306471586, | |
| "eval_runtime": 2001.292, | |
| "eval_samples_per_second": 17.269, | |
| "eval_steps_per_second": 4.317, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 7.268518518518519e-05, | |
| "loss": 0.0364, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_accuracy": 0.9971064925193787, | |
| "eval_loss": 0.01282673142850399, | |
| "eval_runtime": 2012.1731, | |
| "eval_samples_per_second": 17.175, | |
| "eval_steps_per_second": 4.294, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 7.222222222222222e-05, | |
| "loss": 0.108, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_accuracy": 0.998379647731781, | |
| "eval_loss": 0.005587506573647261, | |
| "eval_runtime": 2228.721, | |
| "eval_samples_per_second": 15.507, | |
| "eval_steps_per_second": 3.877, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 7.175925925925926e-05, | |
| "loss": 0.0134, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_accuracy": 0.9985821843147278, | |
| "eval_loss": 0.0066048940643668175, | |
| "eval_runtime": 2000.8975, | |
| "eval_samples_per_second": 17.272, | |
| "eval_steps_per_second": 4.318, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 7.12962962962963e-05, | |
| "loss": 0.0389, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_accuracy": 0.9972511529922485, | |
| "eval_loss": 0.012162311002612114, | |
| "eval_runtime": 1997.5848, | |
| "eval_samples_per_second": 17.301, | |
| "eval_steps_per_second": 4.325, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 7.083333333333334e-05, | |
| "loss": 0.0208, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_accuracy": 0.9991030097007751, | |
| "eval_loss": 0.0034532626159489155, | |
| "eval_runtime": 2007.9035, | |
| "eval_samples_per_second": 17.212, | |
| "eval_steps_per_second": 4.303, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 7.037037037037038e-05, | |
| "loss": 0.0376, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_accuracy": 0.9991897940635681, | |
| "eval_loss": 0.004356299061328173, | |
| "eval_runtime": 1996.8911, | |
| "eval_samples_per_second": 17.307, | |
| "eval_steps_per_second": 4.327, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 6.99074074074074e-05, | |
| "loss": 0.0346, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_accuracy": 0.9969907402992249, | |
| "eval_loss": 0.017812130972743034, | |
| "eval_runtime": 2004.911, | |
| "eval_samples_per_second": 17.238, | |
| "eval_steps_per_second": 4.309, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 6.944444444444444e-05, | |
| "loss": 0.0189, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_accuracy": 0.9987847208976746, | |
| "eval_loss": 0.0057495711371302605, | |
| "eval_runtime": 2011.102, | |
| "eval_samples_per_second": 17.185, | |
| "eval_steps_per_second": 4.296, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 6.898148148148148e-05, | |
| "loss": 0.0141, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_accuracy": 0.9992766380310059, | |
| "eval_loss": 0.003152304096147418, | |
| "eval_runtime": 1989.9017, | |
| "eval_samples_per_second": 17.368, | |
| "eval_steps_per_second": 4.342, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 6.851851851851852e-05, | |
| "loss": 0.0719, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_accuracy": 0.9987847208976746, | |
| "eval_loss": 0.005420052912086248, | |
| "eval_runtime": 1969.8998, | |
| "eval_samples_per_second": 17.544, | |
| "eval_steps_per_second": 4.386, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 6.805555555555556e-05, | |
| "loss": 0.0225, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.9971932768821716, | |
| "eval_loss": 0.012641699984669685, | |
| "eval_runtime": 1981.1809, | |
| "eval_samples_per_second": 17.444, | |
| "eval_steps_per_second": 4.361, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 6.759259259259259e-05, | |
| "loss": 0.0682, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_accuracy": 0.9989583492279053, | |
| "eval_loss": 0.003953148610889912, | |
| "eval_runtime": 1973.9678, | |
| "eval_samples_per_second": 17.508, | |
| "eval_steps_per_second": 4.377, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 6.712962962962963e-05, | |
| "loss": 0.0521, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_accuracy": 0.998466432094574, | |
| "eval_loss": 0.005261498969048262, | |
| "eval_runtime": 1989.7692, | |
| "eval_samples_per_second": 17.369, | |
| "eval_steps_per_second": 4.342, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.0358, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_accuracy": 0.9993634223937988, | |
| "eval_loss": 0.002406924497336149, | |
| "eval_runtime": 1975.9496, | |
| "eval_samples_per_second": 17.49, | |
| "eval_steps_per_second": 4.373, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 6.620370370370371e-05, | |
| "loss": 0.0255, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_accuracy": 0.9984953999519348, | |
| "eval_loss": 0.007655243389308453, | |
| "eval_runtime": 1972.1073, | |
| "eval_samples_per_second": 17.524, | |
| "eval_steps_per_second": 4.381, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 6.574074074074075e-05, | |
| "loss": 0.0424, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_accuracy": 0.9996238350868225, | |
| "eval_loss": 0.0017167649930343032, | |
| "eval_runtime": 1980.6249, | |
| "eval_samples_per_second": 17.449, | |
| "eval_steps_per_second": 4.362, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 6.527777777777778e-05, | |
| "loss": 0.0214, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 0.9997106194496155, | |
| "eval_loss": 0.0009764753049239516, | |
| "eval_runtime": 2005.2649, | |
| "eval_samples_per_second": 17.235, | |
| "eval_steps_per_second": 4.309, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 6.481481481481482e-05, | |
| "loss": 0.0429, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_accuracy": 0.996006965637207, | |
| "eval_loss": 0.019011829048395157, | |
| "eval_runtime": 2045.2435, | |
| "eval_samples_per_second": 16.898, | |
| "eval_steps_per_second": 4.224, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 6.435185185185186e-05, | |
| "loss": 0.0783, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.9976562261581421, | |
| "eval_loss": 0.008234655484557152, | |
| "eval_runtime": 2041.1233, | |
| "eval_samples_per_second": 16.932, | |
| "eval_steps_per_second": 4.233, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 6.388888888888888e-05, | |
| "loss": 0.0141, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_accuracy": 0.9996238350868225, | |
| "eval_loss": 0.0018950661178678274, | |
| "eval_runtime": 1994.0408, | |
| "eval_samples_per_second": 17.332, | |
| "eval_steps_per_second": 4.333, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 6.342592592592594e-05, | |
| "loss": 0.0203, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_accuracy": 0.9994502067565918, | |
| "eval_loss": 0.0022274223156273365, | |
| "eval_runtime": 1978.2563, | |
| "eval_samples_per_second": 17.47, | |
| "eval_steps_per_second": 4.367, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 6.296296296296296e-05, | |
| "loss": 0.0439, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_accuracy": 0.9979166388511658, | |
| "eval_loss": 0.007150179240852594, | |
| "eval_runtime": 1990.4775, | |
| "eval_samples_per_second": 17.363, | |
| "eval_steps_per_second": 4.341, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 6.25e-05, | |
| "loss": 0.0228, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_accuracy": 0.9973090291023254, | |
| "eval_loss": 0.010999325662851334, | |
| "eval_runtime": 1995.6933, | |
| "eval_samples_per_second": 17.317, | |
| "eval_steps_per_second": 4.329, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 6.203703703703704e-05, | |
| "loss": 0.0386, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.9996817111968994, | |
| "eval_loss": 0.001689778990112245, | |
| "eval_runtime": 1983.468, | |
| "eval_samples_per_second": 17.424, | |
| "eval_steps_per_second": 4.356, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 6.157407407407407e-05, | |
| "loss": 0.023, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_accuracy": 0.9997106194496155, | |
| "eval_loss": 0.001407949603162706, | |
| "eval_runtime": 1979.2035, | |
| "eval_samples_per_second": 17.462, | |
| "eval_steps_per_second": 4.365, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 6.111111111111112e-05, | |
| "loss": 0.0188, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_accuracy": 0.9997395873069763, | |
| "eval_loss": 0.001248441985808313, | |
| "eval_runtime": 1986.8349, | |
| "eval_samples_per_second": 17.395, | |
| "eval_steps_per_second": 4.349, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 6.0648148148148154e-05, | |
| "loss": 0.0301, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.9998553395271301, | |
| "eval_loss": 0.0005934939254075289, | |
| "eval_runtime": 2006.6093, | |
| "eval_samples_per_second": 17.223, | |
| "eval_steps_per_second": 4.306, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 6.018518518518519e-05, | |
| "loss": 0.0077, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.00026703893672674894, | |
| "eval_runtime": 2048.5713, | |
| "eval_samples_per_second": 16.87, | |
| "eval_steps_per_second": 4.218, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 5.972222222222223e-05, | |
| "loss": 0.0291, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_accuracy": 0.9989872574806213, | |
| "eval_loss": 0.004098657984286547, | |
| "eval_runtime": 2031.7661, | |
| "eval_samples_per_second": 17.01, | |
| "eval_steps_per_second": 4.252, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 5.925925925925926e-05, | |
| "loss": 0.0274, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_accuracy": 0.9995949268341064, | |
| "eval_loss": 0.001983657479286194, | |
| "eval_runtime": 2040.4005, | |
| "eval_samples_per_second": 16.938, | |
| "eval_steps_per_second": 4.234, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 5.879629629629629e-05, | |
| "loss": 0.0193, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_accuracy": 0.9998842477798462, | |
| "eval_loss": 0.0003717490180861205, | |
| "eval_runtime": 2041.1098, | |
| "eval_samples_per_second": 16.932, | |
| "eval_steps_per_second": 4.233, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 5.833333333333334e-05, | |
| "loss": 0.0296, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_accuracy": 0.9997684955596924, | |
| "eval_loss": 0.0011291600530967116, | |
| "eval_runtime": 2004.7261, | |
| "eval_samples_per_second": 17.239, | |
| "eval_steps_per_second": 4.31, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 5.787037037037037e-05, | |
| "loss": 0.0033, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_accuracy": 0.9998553395271301, | |
| "eval_loss": 0.0005978959961794317, | |
| "eval_runtime": 2001.4449, | |
| "eval_samples_per_second": 17.268, | |
| "eval_steps_per_second": 4.317, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 5.740740740740741e-05, | |
| "loss": 0.0218, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_accuracy": 0.999160885810852, | |
| "eval_loss": 0.0025338120758533478, | |
| "eval_runtime": 1990.5201, | |
| "eval_samples_per_second": 17.362, | |
| "eval_steps_per_second": 4.341, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 5.6944444444444445e-05, | |
| "loss": 0.0238, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_accuracy": 0.999218761920929, | |
| "eval_loss": 0.0033705937676131725, | |
| "eval_runtime": 2019.0619, | |
| "eval_samples_per_second": 17.117, | |
| "eval_steps_per_second": 4.279, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 5.648148148148148e-05, | |
| "loss": 0.0319, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_accuracy": 0.9994791746139526, | |
| "eval_loss": 0.0017771282000467181, | |
| "eval_runtime": 1997.4844, | |
| "eval_samples_per_second": 17.302, | |
| "eval_steps_per_second": 4.325, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 5.6018518518518525e-05, | |
| "loss": 0.0465, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_accuracy": 0.9994502067565918, | |
| "eval_loss": 0.002331700176000595, | |
| "eval_runtime": 2012.3132, | |
| "eval_samples_per_second": 17.174, | |
| "eval_steps_per_second": 4.294, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 5.555555555555556e-05, | |
| "loss": 0.0412, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_accuracy": 0.9997395873069763, | |
| "eval_loss": 0.001237583113834262, | |
| "eval_runtime": 1999.8191, | |
| "eval_samples_per_second": 17.282, | |
| "eval_steps_per_second": 4.32, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 5.50925925925926e-05, | |
| "loss": 0.02, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_accuracy": 0.9998842477798462, | |
| "eval_loss": 0.0009390079067088664, | |
| "eval_runtime": 2008.6286, | |
| "eval_samples_per_second": 17.206, | |
| "eval_steps_per_second": 4.301, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 5.462962962962963e-05, | |
| "loss": 0.0226, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_accuracy": 0.9995949268341064, | |
| "eval_loss": 0.0017483533592894673, | |
| "eval_runtime": 2005.0151, | |
| "eval_samples_per_second": 17.237, | |
| "eval_steps_per_second": 4.309, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 5.4166666666666664e-05, | |
| "loss": 0.0104, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_accuracy": 0.9997684955596924, | |
| "eval_loss": 0.0008292018319480121, | |
| "eval_runtime": 1992.6545, | |
| "eval_samples_per_second": 17.344, | |
| "eval_steps_per_second": 4.336, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 5.370370370370371e-05, | |
| "loss": 0.0021, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.000292919430648908, | |
| "eval_runtime": 1987.2006, | |
| "eval_samples_per_second": 17.391, | |
| "eval_steps_per_second": 4.348, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 5.3240740740740744e-05, | |
| "loss": 0.0135, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_accuracy": 0.9987558126449585, | |
| "eval_loss": 0.005596287082880735, | |
| "eval_runtime": 2007.3126, | |
| "eval_samples_per_second": 17.217, | |
| "eval_steps_per_second": 4.304, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 5.2777777777777784e-05, | |
| "loss": 0.0319, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.9995949268341064, | |
| "eval_loss": 0.001722234534099698, | |
| "eval_runtime": 2007.5056, | |
| "eval_samples_per_second": 17.215, | |
| "eval_steps_per_second": 4.304, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 5.231481481481482e-05, | |
| "loss": 0.0279, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_accuracy": 0.9997106194496155, | |
| "eval_loss": 0.001128367381170392, | |
| "eval_runtime": 1980.2057, | |
| "eval_samples_per_second": 17.453, | |
| "eval_steps_per_second": 4.363, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5.185185185185185e-05, | |
| "loss": 0.0017, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_accuracy": 0.9997106194496155, | |
| "eval_loss": 0.0013848639791831374, | |
| "eval_runtime": 2007.1812, | |
| "eval_samples_per_second": 17.218, | |
| "eval_steps_per_second": 4.305, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 5.138888888888889e-05, | |
| "loss": 0.0296, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_accuracy": 0.9989583492279053, | |
| "eval_loss": 0.005161995068192482, | |
| "eval_runtime": 1986.761, | |
| "eval_samples_per_second": 17.395, | |
| "eval_steps_per_second": 4.349, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 5.092592592592593e-05, | |
| "loss": 0.0168, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_accuracy": 0.9997974634170532, | |
| "eval_loss": 0.0004770481900777668, | |
| "eval_runtime": 2003.7003, | |
| "eval_samples_per_second": 17.248, | |
| "eval_steps_per_second": 4.312, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 5.046296296296297e-05, | |
| "loss": 0.0194, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_accuracy": 0.9997684955596924, | |
| "eval_loss": 0.000735765672288835, | |
| "eval_runtime": 1994.758, | |
| "eval_samples_per_second": 17.325, | |
| "eval_steps_per_second": 4.331, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0006, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_accuracy": 0.9998263716697693, | |
| "eval_loss": 0.0009093827102333307, | |
| "eval_runtime": 2000.8621, | |
| "eval_samples_per_second": 17.273, | |
| "eval_steps_per_second": 4.318, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 4.9537037037037035e-05, | |
| "loss": 0.0293, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.0005157970590516925, | |
| "eval_runtime": 2011.0169, | |
| "eval_samples_per_second": 17.185, | |
| "eval_steps_per_second": 4.296, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 4.9074074074074075e-05, | |
| "loss": 0.0016, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_accuracy": 0.9994791746139526, | |
| "eval_loss": 0.0025301428977400064, | |
| "eval_runtime": 2000.9133, | |
| "eval_samples_per_second": 17.272, | |
| "eval_steps_per_second": 4.318, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 0.0069, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_accuracy": 0.9998842477798462, | |
| "eval_loss": 0.0004891157150268555, | |
| "eval_runtime": 2006.7369, | |
| "eval_samples_per_second": 17.222, | |
| "eval_steps_per_second": 4.305, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 0.0001, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.00020419809152372181, | |
| "eval_runtime": 1993.3725, | |
| "eval_samples_per_second": 17.337, | |
| "eval_steps_per_second": 4.334, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 4.768518518518519e-05, | |
| "loss": 0.0108, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_accuracy": 0.9997974634170532, | |
| "eval_loss": 0.0010758559219539165, | |
| "eval_runtime": 2001.2763, | |
| "eval_samples_per_second": 17.269, | |
| "eval_steps_per_second": 4.317, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 0.0165, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_accuracy": 0.9998842477798462, | |
| "eval_loss": 0.0006313551566563547, | |
| "eval_runtime": 1995.5247, | |
| "eval_samples_per_second": 17.319, | |
| "eval_steps_per_second": 4.33, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 4.675925925925926e-05, | |
| "loss": 0.0001, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_accuracy": 0.9997974634170532, | |
| "eval_loss": 0.0007648964528925717, | |
| "eval_runtime": 2001.09, | |
| "eval_samples_per_second": 17.271, | |
| "eval_steps_per_second": 4.318, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.0244, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_accuracy": 0.9998553395271301, | |
| "eval_loss": 0.000668107473757118, | |
| "eval_runtime": 2000.7577, | |
| "eval_samples_per_second": 17.273, | |
| "eval_steps_per_second": 4.318, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.5833333333333334e-05, | |
| "loss": 0.0312, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_accuracy": 0.9995659589767456, | |
| "eval_loss": 0.001716578146442771, | |
| "eval_runtime": 1997.1256, | |
| "eval_samples_per_second": 17.305, | |
| "eval_steps_per_second": 4.326, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.5370370370370374e-05, | |
| "loss": 0.0191, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_accuracy": 0.9997395873069763, | |
| "eval_loss": 0.0007975550834089518, | |
| "eval_runtime": 1967.8746, | |
| "eval_samples_per_second": 17.562, | |
| "eval_steps_per_second": 4.391, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.490740740740741e-05, | |
| "loss": 0.0005, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_accuracy": 0.9998842477798462, | |
| "eval_loss": 0.0004628011374734342, | |
| "eval_runtime": 1958.7798, | |
| "eval_samples_per_second": 17.644, | |
| "eval_steps_per_second": 4.411, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.0259, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_accuracy": 0.9996238350868225, | |
| "eval_loss": 0.001358355744741857, | |
| "eval_runtime": 1971.0225, | |
| "eval_samples_per_second": 17.534, | |
| "eval_steps_per_second": 4.384, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.3981481481481486e-05, | |
| "loss": 0.0226, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00019500043708831072, | |
| "eval_runtime": 1970.5019, | |
| "eval_samples_per_second": 17.539, | |
| "eval_steps_per_second": 4.385, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 4.351851851851852e-05, | |
| "loss": 0.0, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.0002164940524380654, | |
| "eval_runtime": 1961.9305, | |
| "eval_samples_per_second": 17.615, | |
| "eval_steps_per_second": 4.404, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 4.305555555555556e-05, | |
| "loss": 0.0, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.00010657820530468598, | |
| "eval_runtime": 1973.4403, | |
| "eval_samples_per_second": 17.513, | |
| "eval_steps_per_second": 4.378, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 0.0145, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 4.758801151183434e-05, | |
| "eval_runtime": 1977.328, | |
| "eval_samples_per_second": 17.478, | |
| "eval_steps_per_second": 4.37, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 4.212962962962963e-05, | |
| "loss": 0.0083, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_accuracy": 0.9995659589767456, | |
| "eval_loss": 0.001972577767446637, | |
| "eval_runtime": 1962.5085, | |
| "eval_samples_per_second": 17.61, | |
| "eval_steps_per_second": 4.403, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.02, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_accuracy": 0.9994791746139526, | |
| "eval_loss": 0.00198388216085732, | |
| "eval_runtime": 1956.5161, | |
| "eval_samples_per_second": 17.664, | |
| "eval_steps_per_second": 4.416, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 4.1203703703703705e-05, | |
| "loss": 0.0293, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_accuracy": 0.9994212985038757, | |
| "eval_loss": 0.0031591171864420176, | |
| "eval_runtime": 1997.7409, | |
| "eval_samples_per_second": 17.3, | |
| "eval_steps_per_second": 4.325, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 0.0164, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_accuracy": 0.9997395873069763, | |
| "eval_loss": 0.0012433998053893447, | |
| "eval_runtime": 2019.1263, | |
| "eval_samples_per_second": 17.116, | |
| "eval_steps_per_second": 4.279, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 4.027777777777778e-05, | |
| "loss": 0.0147, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.9997684955596924, | |
| "eval_loss": 0.001224155188538134, | |
| "eval_runtime": 2015.6572, | |
| "eval_samples_per_second": 17.146, | |
| "eval_steps_per_second": 4.286, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 3.981481481481482e-05, | |
| "loss": 0.0112, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.0008148940978571773, | |
| "eval_runtime": 1991.7022, | |
| "eval_samples_per_second": 17.352, | |
| "eval_steps_per_second": 4.338, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 3.935185185185186e-05, | |
| "loss": 0.002, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "eval_accuracy": 0.9997395873069763, | |
| "eval_loss": 0.0012871942017227411, | |
| "eval_runtime": 2016.2834, | |
| "eval_samples_per_second": 17.14, | |
| "eval_steps_per_second": 4.285, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.017, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "eval_accuracy": 0.9997106194496155, | |
| "eval_loss": 0.0010973262833431363, | |
| "eval_runtime": 2017.4262, | |
| "eval_samples_per_second": 17.131, | |
| "eval_steps_per_second": 4.283, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.8425925925925924e-05, | |
| "loss": 0.0142, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "eval_accuracy": 0.9996528029441833, | |
| "eval_loss": 0.0019141812808811665, | |
| "eval_runtime": 2002.6757, | |
| "eval_samples_per_second": 17.257, | |
| "eval_steps_per_second": 4.314, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.7962962962962964e-05, | |
| "loss": 0.008, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "eval_accuracy": 0.9997395873069763, | |
| "eval_loss": 0.00135290517937392, | |
| "eval_runtime": 2020.5372, | |
| "eval_samples_per_second": 17.104, | |
| "eval_steps_per_second": 4.276, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0411, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "eval_accuracy": 0.9997974634170532, | |
| "eval_loss": 0.000736766669433564, | |
| "eval_runtime": 2162.1556, | |
| "eval_samples_per_second": 15.984, | |
| "eval_steps_per_second": 3.996, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.0262, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_accuracy": 0.9998553395271301, | |
| "eval_loss": 0.000846204929985106, | |
| "eval_runtime": 2119.0303, | |
| "eval_samples_per_second": 16.309, | |
| "eval_steps_per_second": 4.077, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 3.6574074074074076e-05, | |
| "loss": 0.0198, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "eval_accuracy": 0.9997106194496155, | |
| "eval_loss": 0.0010991438757628202, | |
| "eval_runtime": 2095.0628, | |
| "eval_samples_per_second": 16.496, | |
| "eval_steps_per_second": 4.124, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 0.0178, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.00029710811213590205, | |
| "eval_runtime": 2130.6792, | |
| "eval_samples_per_second": 16.22, | |
| "eval_steps_per_second": 4.055, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 3.564814814814815e-05, | |
| "loss": 0.0072, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00018699387146625668, | |
| "eval_runtime": 2082.0917, | |
| "eval_samples_per_second": 16.599, | |
| "eval_steps_per_second": 4.15, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 0.0004, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "eval_accuracy": 0.9998263716697693, | |
| "eval_loss": 0.0013777822023257613, | |
| "eval_runtime": 2053.664, | |
| "eval_samples_per_second": 16.828, | |
| "eval_steps_per_second": 4.207, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 0.0191, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.0004184871504548937, | |
| "eval_runtime": 2048.7946, | |
| "eval_samples_per_second": 16.868, | |
| "eval_steps_per_second": 4.217, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 3.425925925925926e-05, | |
| "loss": 0.007, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.0004062869702465832, | |
| "eval_runtime": 2055.664, | |
| "eval_samples_per_second": 16.812, | |
| "eval_steps_per_second": 4.203, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.3796296296296295e-05, | |
| "loss": 0.0108, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.00011388419807190076, | |
| "eval_runtime": 2043.6545, | |
| "eval_samples_per_second": 16.911, | |
| "eval_steps_per_second": 4.228, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 7.532363088103011e-05, | |
| "eval_runtime": 2040.9204, | |
| "eval_samples_per_second": 16.934, | |
| "eval_steps_per_second": 4.233, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.2870370370370375e-05, | |
| "loss": 0.0006, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.0003408396732993424, | |
| "eval_runtime": 2057.5647, | |
| "eval_samples_per_second": 16.797, | |
| "eval_steps_per_second": 4.199, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.240740740740741e-05, | |
| "loss": 0.0085, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "eval_accuracy": 0.9992766380310059, | |
| "eval_loss": 0.0034337618853896856, | |
| "eval_runtime": 2027.249, | |
| "eval_samples_per_second": 17.048, | |
| "eval_steps_per_second": 4.262, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 3.194444444444444e-05, | |
| "loss": 0.0002, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.0006225552642717957, | |
| "eval_runtime": 2004.1478, | |
| "eval_samples_per_second": 17.244, | |
| "eval_steps_per_second": 4.311, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 0.0181, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.000251033779932186, | |
| "eval_runtime": 2016.4131, | |
| "eval_samples_per_second": 17.139, | |
| "eval_steps_per_second": 4.285, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.101851851851852e-05, | |
| "loss": 0.0021, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.00040141510544344783, | |
| "eval_runtime": 2000.3942, | |
| "eval_samples_per_second": 17.277, | |
| "eval_steps_per_second": 4.319, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 0.0069, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "eval_accuracy": 0.9998842477798462, | |
| "eval_loss": 0.0006463331519626081, | |
| "eval_runtime": 2015.2783, | |
| "eval_samples_per_second": 17.149, | |
| "eval_steps_per_second": 4.287, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 3.0092592592592593e-05, | |
| "loss": 0.0156, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.0001428252726327628, | |
| "eval_runtime": 1995.7618, | |
| "eval_samples_per_second": 17.317, | |
| "eval_steps_per_second": 4.329, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.0042, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "eval_accuracy": 0.9997974634170532, | |
| "eval_loss": 0.000510143639985472, | |
| "eval_runtime": 2000.972, | |
| "eval_samples_per_second": 17.272, | |
| "eval_steps_per_second": 4.318, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.916666666666667e-05, | |
| "loss": 0.0233, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00019888828683178872, | |
| "eval_runtime": 2002.1598, | |
| "eval_samples_per_second": 17.261, | |
| "eval_steps_per_second": 4.315, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 2.8703703703703706e-05, | |
| "loss": 0.003, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "eval_accuracy": 0.9997974634170532, | |
| "eval_loss": 0.0006905001355335116, | |
| "eval_runtime": 2000.419, | |
| "eval_samples_per_second": 17.276, | |
| "eval_steps_per_second": 4.319, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 2.824074074074074e-05, | |
| "loss": 0.0149, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "eval_accuracy": 0.9998553395271301, | |
| "eval_loss": 0.000585312838666141, | |
| "eval_runtime": 1997.3791, | |
| "eval_samples_per_second": 17.303, | |
| "eval_steps_per_second": 4.326, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.0072, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.000229826764552854, | |
| "eval_runtime": 2001.2597, | |
| "eval_samples_per_second": 17.269, | |
| "eval_steps_per_second": 4.317, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 2.7314814814814816e-05, | |
| "loss": 0.0004, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 5.024338679504581e-05, | |
| "eval_runtime": 2013.5805, | |
| "eval_samples_per_second": 17.163, | |
| "eval_steps_per_second": 4.291, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 2.6851851851851855e-05, | |
| "loss": 0.0001, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "eval_accuracy": 0.999913215637207, | |
| "eval_loss": 0.00017916383512783796, | |
| "eval_runtime": 1994.345, | |
| "eval_samples_per_second": 17.329, | |
| "eval_steps_per_second": 4.332, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 2.6388888888888892e-05, | |
| "loss": 0.0186, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 9.207503353536595e-06, | |
| "eval_runtime": 2056.2161, | |
| "eval_samples_per_second": 16.808, | |
| "eval_steps_per_second": 4.202, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 0.0115, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00022165325935930014, | |
| "eval_runtime": 2044.6907, | |
| "eval_samples_per_second": 16.902, | |
| "eval_steps_per_second": 4.226, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 2.5462962962962965e-05, | |
| "loss": 0.0011, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00027788631268776953, | |
| "eval_runtime": 2046.6409, | |
| "eval_samples_per_second": 16.886, | |
| "eval_steps_per_second": 4.222, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0048, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 5.909843821427785e-05, | |
| "eval_runtime": 2008.3137, | |
| "eval_samples_per_second": 17.208, | |
| "eval_steps_per_second": 4.302, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 2.4537037037037038e-05, | |
| "loss": 0.0042, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.828932328062365e-06, | |
| "eval_runtime": 2129.8226, | |
| "eval_samples_per_second": 16.227, | |
| "eval_steps_per_second": 4.057, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 2.4074074074074074e-05, | |
| "loss": 0.0024, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 8.2383139670128e-06, | |
| "eval_runtime": 2113.6583, | |
| "eval_samples_per_second": 16.351, | |
| "eval_steps_per_second": 4.088, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 2.361111111111111e-05, | |
| "loss": 0.0, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.800426606583642e-06, | |
| "eval_runtime": 2122.3997, | |
| "eval_samples_per_second": 16.283, | |
| "eval_steps_per_second": 4.071, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 2.314814814814815e-05, | |
| "loss": 0.0003, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00010272156214341521, | |
| "eval_runtime": 2128.9658, | |
| "eval_samples_per_second": 16.233, | |
| "eval_steps_per_second": 4.058, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 2.2685185185185187e-05, | |
| "loss": 0.0, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 7.889495464041829e-05, | |
| "eval_runtime": 2147.2327, | |
| "eval_samples_per_second": 16.095, | |
| "eval_steps_per_second": 4.024, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 7.938377530081198e-05, | |
| "eval_runtime": 2139.4855, | |
| "eval_samples_per_second": 16.153, | |
| "eval_steps_per_second": 4.038, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 2.175925925925926e-05, | |
| "loss": 0.0029, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_accuracy": 0.9998842477798462, | |
| "eval_loss": 0.0005274215945973992, | |
| "eval_runtime": 2142.1862, | |
| "eval_samples_per_second": 16.133, | |
| "eval_steps_per_second": 4.033, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 2.1296296296296296e-05, | |
| "loss": 0.0066, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00019657429947983474, | |
| "eval_runtime": 2149.2032, | |
| "eval_samples_per_second": 16.08, | |
| "eval_steps_per_second": 4.02, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.0079, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 5.7856173953041434e-05, | |
| "eval_runtime": 2135.9752, | |
| "eval_samples_per_second": 16.18, | |
| "eval_steps_per_second": 4.045, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 2.037037037037037e-05, | |
| "loss": 0.0091, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.00015575718134641647, | |
| "eval_runtime": 2158.5953, | |
| "eval_samples_per_second": 16.01, | |
| "eval_steps_per_second": 4.003, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.990740740740741e-05, | |
| "loss": 0.0951, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 6.823511648690328e-05, | |
| "eval_runtime": 2106.2766, | |
| "eval_samples_per_second": 16.408, | |
| "eval_steps_per_second": 4.102, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.0578, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.00031872568069957197, | |
| "eval_runtime": 2091.5056, | |
| "eval_samples_per_second": 16.524, | |
| "eval_steps_per_second": 4.131, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 1.8981481481481482e-05, | |
| "loss": 0.0171, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "eval_accuracy": 0.9999421238899231, | |
| "eval_loss": 0.0003504869237076491, | |
| "eval_runtime": 2076.2302, | |
| "eval_samples_per_second": 16.646, | |
| "eval_steps_per_second": 4.161, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.0305, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00012279333896003664, | |
| "eval_runtime": 2072.7643, | |
| "eval_samples_per_second": 16.673, | |
| "eval_steps_per_second": 4.168, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 1.8055555555555555e-05, | |
| "loss": 0.0449, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00021972648391965777, | |
| "eval_runtime": 2090.628, | |
| "eval_samples_per_second": 16.531, | |
| "eval_steps_per_second": 4.133, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 1.7592592592592595e-05, | |
| "loss": 0.0161, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 2.7198611860512756e-05, | |
| "eval_runtime": 2085.7289, | |
| "eval_samples_per_second": 16.57, | |
| "eval_steps_per_second": 4.142, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 1.712962962962963e-05, | |
| "loss": 0.0322, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 2.2180371161084622e-05, | |
| "eval_runtime": 2061.6769, | |
| "eval_samples_per_second": 16.763, | |
| "eval_steps_per_second": 4.191, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0358, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "eval_accuracy": 0.9999710917472839, | |
| "eval_loss": 0.00010751090303529054, | |
| "eval_runtime": 2107.1409, | |
| "eval_samples_per_second": 16.401, | |
| "eval_steps_per_second": 4.1, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 1.6203703703703704e-05, | |
| "loss": 0.0264, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.194192792463582e-06, | |
| "eval_runtime": 2091.7086, | |
| "eval_samples_per_second": 16.522, | |
| "eval_steps_per_second": 4.131, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 1.574074074074074e-05, | |
| "loss": 0.0199, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.114233656262513e-06, | |
| "eval_runtime": 2093.6259, | |
| "eval_samples_per_second": 16.507, | |
| "eval_steps_per_second": 4.127, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 1.527777777777778e-05, | |
| "loss": 0.0266, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.532317456731107e-06, | |
| "eval_runtime": 2103.3039, | |
| "eval_samples_per_second": 16.431, | |
| "eval_steps_per_second": 4.108, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.0162, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.056379334040685e-06, | |
| "eval_runtime": 2141.6719, | |
| "eval_samples_per_second": 16.137, | |
| "eval_steps_per_second": 4.034, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 1.4351851851851853e-05, | |
| "loss": 0.0142, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.732083420502022e-06, | |
| "eval_runtime": 2137.4831, | |
| "eval_samples_per_second": 16.169, | |
| "eval_steps_per_second": 4.042, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.0353, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.884473466721829e-06, | |
| "eval_runtime": 2111.534, | |
| "eval_samples_per_second": 16.367, | |
| "eval_steps_per_second": 4.092, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 1.3425925925925928e-05, | |
| "loss": 0.0435, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.438468062697211e-06, | |
| "eval_runtime": 2127.2273, | |
| "eval_samples_per_second": 16.247, | |
| "eval_steps_per_second": 4.062, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 1.2962962962962962e-05, | |
| "loss": 0.0067, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 9.256172234017868e-06, | |
| "eval_runtime": 2183.0463, | |
| "eval_samples_per_second": 15.831, | |
| "eval_steps_per_second": 3.958, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0299, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.904490419401554e-06, | |
| "eval_runtime": 2110.4592, | |
| "eval_samples_per_second": 16.376, | |
| "eval_steps_per_second": 4.094, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.2037037037037037e-05, | |
| "loss": 0.0063, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.991323061811272e-06, | |
| "eval_runtime": 2074.4391, | |
| "eval_samples_per_second": 16.66, | |
| "eval_steps_per_second": 4.165, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.1574074074074075e-05, | |
| "loss": 0.0117, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.223146672506118e-06, | |
| "eval_runtime": 2093.0232, | |
| "eval_samples_per_second": 16.512, | |
| "eval_steps_per_second": 4.128, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.0107, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.764682777633425e-06, | |
| "eval_runtime": 2092.3489, | |
| "eval_samples_per_second": 16.517, | |
| "eval_steps_per_second": 4.129, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 1.0648148148148148e-05, | |
| "loss": 0.0162, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.700497240468394e-06, | |
| "eval_runtime": 2095.1985, | |
| "eval_samples_per_second": 16.495, | |
| "eval_steps_per_second": 4.124, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.0185185185185185e-05, | |
| "loss": 0.0138, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.209324172028573e-06, | |
| "eval_runtime": 2073.7497, | |
| "eval_samples_per_second": 16.665, | |
| "eval_steps_per_second": 4.166, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 9.722222222222223e-06, | |
| "loss": 0.0124, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.243016858003102e-06, | |
| "eval_runtime": 2056.3515, | |
| "eval_samples_per_second": 16.806, | |
| "eval_steps_per_second": 4.202, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.0083, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.0634776016522665e-06, | |
| "eval_runtime": 2077.8389, | |
| "eval_samples_per_second": 16.633, | |
| "eval_steps_per_second": 4.158, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 8.796296296296297e-06, | |
| "loss": 0.0066, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 4.925776011077687e-06, | |
| "eval_runtime": 2073.5316, | |
| "eval_samples_per_second": 16.667, | |
| "eval_steps_per_second": 4.167, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0058, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 4.750945663545281e-06, | |
| "eval_runtime": 2057.702, | |
| "eval_samples_per_second": 16.795, | |
| "eval_steps_per_second": 4.199, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 7.87037037037037e-06, | |
| "loss": 0.0032, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.96109145792434e-06, | |
| "eval_runtime": 2071.5479, | |
| "eval_samples_per_second": 16.683, | |
| "eval_steps_per_second": 4.171, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.0205, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 4.608726612786995e-06, | |
| "eval_runtime": 2066.372, | |
| "eval_samples_per_second": 16.725, | |
| "eval_steps_per_second": 4.181, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 0.0094, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 4.8284973672707565e-06, | |
| "eval_runtime": 2054.9166, | |
| "eval_samples_per_second": 16.818, | |
| "eval_steps_per_second": 4.205, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 6.481481481481481e-06, | |
| "loss": 0.003, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 4.495966550166486e-06, | |
| "eval_runtime": 2072.6571, | |
| "eval_samples_per_second": 16.674, | |
| "eval_steps_per_second": 4.169, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 6.0185185185185185e-06, | |
| "loss": 0.0035, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.835635420226026e-06, | |
| "eval_runtime": 2047.8141, | |
| "eval_samples_per_second": 16.877, | |
| "eval_steps_per_second": 4.219, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.0257, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.829508609167533e-06, | |
| "eval_runtime": 2091.8646, | |
| "eval_samples_per_second": 16.521, | |
| "eval_steps_per_second": 4.13, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 5.092592592592592e-06, | |
| "loss": 0.0019, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.3429124566027895e-06, | |
| "eval_runtime": 2040.9379, | |
| "eval_samples_per_second": 16.933, | |
| "eval_steps_per_second": 4.233, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 4.6296296296296296e-06, | |
| "loss": 0.0023, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 8.131992217386141e-06, | |
| "eval_runtime": 2048.3614, | |
| "eval_samples_per_second": 16.872, | |
| "eval_steps_per_second": 4.218, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.0062, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 8.594151950092055e-06, | |
| "eval_runtime": 2094.5382, | |
| "eval_samples_per_second": 16.5, | |
| "eval_steps_per_second": 4.125, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.0039, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.4294948717579246e-06, | |
| "eval_runtime": 2104.354, | |
| "eval_samples_per_second": 16.423, | |
| "eval_steps_per_second": 4.106, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 3.2407407407407406e-06, | |
| "loss": 0.0144, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.862039299448952e-06, | |
| "eval_runtime": 2101.0817, | |
| "eval_samples_per_second": 16.449, | |
| "eval_steps_per_second": 4.112, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 0.0109, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.136932825029362e-06, | |
| "eval_runtime": 2119.5964, | |
| "eval_samples_per_second": 16.305, | |
| "eval_steps_per_second": 4.076, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.3148148148148148e-06, | |
| "loss": 0.0148, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.497817139461404e-06, | |
| "eval_runtime": 2115.6009, | |
| "eval_samples_per_second": 16.336, | |
| "eval_steps_per_second": 4.084, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.0308, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.753816134936642e-06, | |
| "eval_runtime": 2118.9207, | |
| "eval_samples_per_second": 16.31, | |
| "eval_steps_per_second": 4.078, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 1.388888888888889e-06, | |
| "loss": 0.0023, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.5415960054669995e-06, | |
| "eval_runtime": 2120.9953, | |
| "eval_samples_per_second": 16.294, | |
| "eval_steps_per_second": 4.074, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 9.259259259259259e-07, | |
| "loss": 0.0243, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.68591053201817e-06, | |
| "eval_runtime": 2120.6941, | |
| "eval_samples_per_second": 16.297, | |
| "eval_steps_per_second": 4.074, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 4.6296296296296297e-07, | |
| "loss": 0.0031, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.5350230872572865e-06, | |
| "eval_runtime": 2105.7948, | |
| "eval_samples_per_second": 16.412, | |
| "eval_steps_per_second": 4.103, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0272, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.493398243241245e-06, | |
| "eval_runtime": 2100.1734, | |
| "eval_samples_per_second": 16.456, | |
| "eval_steps_per_second": 4.114, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 21600, | |
| "total_flos": 2.295560541703184e+19, | |
| "train_loss": 0.003923701412147946, | |
| "train_runtime": 97975.3145, | |
| "train_samples_per_second": 1.764, | |
| "train_steps_per_second": 0.22 | |
| } | |
| ], | |
| "max_steps": 21600, | |
| "num_train_epochs": 5, | |
| "total_flos": 2.295560541703184e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |