| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "global_step": 724, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 4.2523, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 3.4255, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.5412, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 2.0169, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8e-05, | |
| "loss": 1.8547, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 7.999646594434211e-05, | |
| "loss": 1.8217, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.998586440184589e-05, | |
| "loss": 1.7177, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 7.996819724583341e-05, | |
| "loss": 1.6448, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 7.99434675981403e-05, | |
| "loss": 1.6419, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 7.991167982856416e-05, | |
| "loss": 1.595, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 7.987283955409229e-05, | |
| "loss": 1.7162, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 7.982695363790929e-05, | |
| "loss": 1.6641, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 7.977403018818425e-05, | |
| "loss": 1.5268, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 7.971407855663803e-05, | |
| "loss": 1.5187, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 7.964710933689073e-05, | |
| "loss": 1.6022, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 7.95731343625899e-05, | |
| "loss": 1.5216, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 7.94921667053193e-05, | |
| "loss": 1.5416, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 7.940422067228933e-05, | |
| "loss": 1.5302, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 7.930931180380879e-05, | |
| "loss": 1.4887, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 7.920745687053881e-05, | |
| "loss": 1.5794, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 7.909867387052959e-05, | |
| "loss": 1.4197, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 7.898298202603996e-05, | |
| "loss": 1.5554, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 7.886040178014079e-05, | |
| "loss": 1.4569, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.873095479310265e-05, | |
| "loss": 1.4795, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.859466393856842e-05, | |
| "loss": 1.4088, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.845155329951134e-05, | |
| "loss": 1.4127, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.830164816397961e-05, | |
| "loss": 1.3549, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.814497502062784e-05, | |
| "loss": 1.4085, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 7.798156155403649e-05, | |
| "loss": 1.4669, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 7.781143663981985e-05, | |
| "loss": 1.3935, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.76346303395237e-05, | |
| "loss": 1.4601, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.745117389531335e-05, | |
| "loss": 1.3549, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.726109972445301e-05, | |
| "loss": 1.4819, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.706444141357764e-05, | |
| "loss": 1.4633, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 7.686123371275806e-05, | |
| "loss": 1.4074, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 7.665151252936049e-05, | |
| "loss": 1.3739, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 7.643531492170168e-05, | |
| "loss": 1.4575, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 7.621267909250057e-05, | |
| "loss": 1.4109, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 7.598364438212773e-05, | |
| "loss": 1.261, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 7.574825126165386e-05, | |
| "loss": 1.251, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 7.550654132569846e-05, | |
| "loss": 1.3583, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.525855728507984e-05, | |
| "loss": 1.3513, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.500434295926807e-05, | |
| "loss": 1.4371, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.474394326864201e-05, | |
| "loss": 1.5398, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.447740422655164e-05, | |
| "loss": 1.3364, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.420477293118745e-05, | |
| "loss": 1.2326, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.392609755725803e-05, | |
| "loss": 1.3098, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.36414273474775e-05, | |
| "loss": 1.3438, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.33508126038641e-05, | |
| "loss": 1.3329, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.305430467885182e-05, | |
| "loss": 1.4064, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.275195596621611e-05, | |
| "loss": 1.3769, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 7.244381989181594e-05, | |
| "loss": 1.3437, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 7.212995090415312e-05, | |
| "loss": 1.2524, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 7.181040446475129e-05, | |
| "loss": 1.4045, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 7.148523703835553e-05, | |
| "loss": 1.207, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 7.115450608295498e-05, | |
| "loss": 1.2996, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 7.081827003962987e-05, | |
| "loss": 1.2952, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.047658832222475e-05, | |
| "loss": 1.4254, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 7.012952130684995e-05, | |
| "loss": 1.3879, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 6.977713032121295e-05, | |
| "loss": 1.3536, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 6.941947763378157e-05, | |
| "loss": 1.2768, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 6.905662644278099e-05, | |
| "loss": 1.3828, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 6.868864086502643e-05, | |
| "loss": 1.2058, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 6.831558592459356e-05, | |
| "loss": 1.2856, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 6.793752754132852e-05, | |
| "loss": 1.2367, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 6.755453251919973e-05, | |
| "loss": 1.3201, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 6.716666853449342e-05, | |
| "loss": 1.3446, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 6.67740041238551e-05, | |
| "loss": 1.2449, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 6.637660867217884e-05, | |
| "loss": 1.2529, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 6.59745524003469e-05, | |
| "loss": 1.3266, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 6.556790635282136e-05, | |
| "loss": 1.2609, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.515674238509048e-05, | |
| "loss": 1.231, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.474113315097161e-05, | |
| "loss": 1.3123, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.432115208977297e-05, | |
| "loss": 1.2846, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.389687341331688e-05, | |
| "loss": 1.3302, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.346837209282615e-05, | |
| "loss": 1.386, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.303572384567662e-05, | |
| "loss": 1.332, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.259900512201756e-05, | |
| "loss": 1.3034, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.215829309126279e-05, | |
| "loss": 1.4256, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.17136656284546e-05, | |
| "loss": 1.2721, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.1265201300503e-05, | |
| "loss": 1.3103, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 6.081297935230281e-05, | |
| "loss": 1.2451, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 6.035707969273072e-05, | |
| "loss": 1.1757, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.989758288052531e-05, | |
| "loss": 1.3589, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.9434570110052036e-05, | |
| "loss": 1.275, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 5.8968123196955955e-05, | |
| "loss": 1.3078, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.8498324563704676e-05, | |
| "loss": 1.3537, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 5.80252572250241e-05, | |
| "loss": 1.1313, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 5.7549004773229474e-05, | |
| "loss": 1.2557, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 5.706965136345439e-05, | |
| "loss": 1.208, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 5.658728169878033e-05, | |
| "loss": 1.0945, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 5.6101981015269436e-05, | |
| "loss": 1.3753, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 5.561383506690303e-05, | |
| "loss": 1.2593, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 5.512293011042863e-05, | |
| "loss": 1.2717, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 5.462935289011821e-05, | |
| "loss": 1.2859, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.4133190622440153e-05, | |
| "loss": 1.1984, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.363453098064792e-05, | |
| "loss": 1.3106, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.313346207928795e-05, | |
| "loss": 1.2159, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.2630072458629526e-05, | |
| "loss": 1.1897, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.2124451069019495e-05, | |
| "loss": 1.1248, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 5.161668725516451e-05, | |
| "loss": 1.2375, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5.110687074034351e-05, | |
| "loss": 1.2367, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5.059509161055343e-05, | |
| "loss": 1.2432, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.008144029859074e-05, | |
| "loss": 1.2744, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.956600756807172e-05, | |
| "loss": 1.2705, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.904888449739422e-05, | |
| "loss": 1.2448, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.8530162463643935e-05, | |
| "loss": 1.1841, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.800993312644778e-05, | |
| "loss": 1.3046, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.748828841177738e-05, | |
| "loss": 1.1876, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.6965320495705504e-05, | |
| "loss": 1.2061, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.644112178811828e-05, | |
| "loss": 1.2982, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.591578491638613e-05, | |
| "loss": 1.2507, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.538940270899625e-05, | |
| "loss": 1.2313, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.4862068179149546e-05, | |
| "loss": 1.2507, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.4333874508324964e-05, | |
| "loss": 1.274, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.3804915029814054e-05, | |
| "loss": 1.2024, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.327528321222869e-05, | |
| "loss": 1.2804, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.274507264298496e-05, | |
| "loss": 1.1524, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.2214377011765956e-05, | |
| "loss": 1.236, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.1683290093966603e-05, | |
| "loss": 1.2723, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 14.044312630844383, | |
| "eval_loss": 1.032505989074707, | |
| "eval_rouge1": 61.6206, | |
| "eval_rouge2": 45.1199, | |
| "eval_rougeL": 59.6467, | |
| "eval_rougeLsum": 59.7534, | |
| "eval_runtime": 315.6416, | |
| "eval_samples_per_second": 9.08, | |
| "eval_steps_per_second": 9.08, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.115190573412321e-05, | |
| "loss": 1.1678, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.062031782933099e-05, | |
| "loss": 1.1245, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.008862031265205e-05, | |
| "loss": 1.0944, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.955690713651723e-05, | |
| "loss": 1.0338, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.902527225612447e-05, | |
| "loss": 1.1155, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.849380961283661e-05, | |
| "loss": 1.0579, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.796261311758174e-05, | |
| "loss": 1.0659, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.743177663425883e-05, | |
| "loss": 1.0031, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.690139396315174e-05, | |
| "loss": 1.0458, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.637155882435446e-05, | |
| "loss": 0.9631, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.5842364841210466e-05, | |
| "loss": 1.0156, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.53139055237693e-05, | |
| "loss": 1.0457, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.478627425226299e-05, | |
| "loss": 1.0843, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.4259564260605564e-05, | |
| "loss": 1.1081, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.373386861991832e-05, | |
| "loss": 1.0335, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.320928022208392e-05, | |
| "loss": 1.0572, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.268589176333213e-05, | |
| "loss": 1.0875, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.216379572786015e-05, | |
| "loss": 1.0419, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.1643084371490394e-05, | |
| "loss": 1.0162, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.112384970536862e-05, | |
| "loss": 1.1311, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.060618347970529e-05, | |
| "loss": 1.0091, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0090177167563106e-05, | |
| "loss": 1.0725, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.9575921948693394e-05, | |
| "loss": 0.9811, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.906350869342447e-05, | |
| "loss": 1.1393, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.8553027946604523e-05, | |
| "loss": 1.0846, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.8044569911602134e-05, | |
| "loss": 1.0058, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.7538224434367063e-05, | |
| "loss": 1.0463, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.703408098755424e-05, | |
| "loss": 1.006, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.6532228654713706e-05, | |
| "loss": 1.0281, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.603275611454928e-05, | |
| "loss": 1.1484, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.5535751625248784e-05, | |
| "loss": 1.1257, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.5041303008888593e-05, | |
| "loss": 1.1191, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.454949763591521e-05, | |
| "loss": 1.0099, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.406042240970668e-05, | |
| "loss": 1.0287, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.3574163751216513e-05, | |
| "loss": 0.9936, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.30908075837029e-05, | |
| "loss": 0.959, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.2610439317545723e-05, | |
| "loss": 1.0438, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.213314383515447e-05, | |
| "loss": 0.9786, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.1659005475969125e-05, | |
| "loss": 1.0546, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.1188108021557236e-05, | |
| "loss": 1.058, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.0720534680809452e-05, | |
| "loss": 1.0402, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.0256368075236296e-05, | |
| "loss": 1.0674, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.979569022436869e-05, | |
| "loss": 0.9716, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.9338582531264908e-05, | |
| "loss": 0.9702, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.8885125768126405e-05, | |
| "loss": 1.0379, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.843540006202513e-05, | |
| "loss": 0.9711, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.7989484880744917e-05, | |
| "loss": 1.0055, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.754745901873923e-05, | |
| "loss": 0.943, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.7109400583207977e-05, | |
| "loss": 1.0675, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.667538698029581e-05, | |
| "loss": 1.0679, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.624549490141417e-05, | |
| "loss": 0.9835, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.581980030968974e-05, | |
| "loss": 1.0194, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.5398378426541535e-05, | |
| "loss": 1.1163, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.4981303718389088e-05, | |
| "loss": 1.0588, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.4568649883494001e-05, | |
| "loss": 0.9728, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.416048983893727e-05, | |
| "loss": 1.0166, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.3756895707734637e-05, | |
| "loss": 1.0569, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.3357938806092245e-05, | |
| "loss": 1.0751, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.2963689630804854e-05, | |
| "loss": 0.9486, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.2574217846798921e-05, | |
| "loss": 1.0278, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.2189592274822526e-05, | |
| "loss": 1.0522, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.1809880879284608e-05, | |
| "loss": 1.0255, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.1435150756245439e-05, | |
| "loss": 1.0388, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.1065468121560627e-05, | |
| "loss": 0.9952, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.0700898299180493e-05, | |
| "loss": 1.084, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.034150570960721e-05, | |
| "loss": 0.9855, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.987353858511506e-06, | |
| "loss": 1.0436, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.638505325511041e-06, | |
| "loss": 1.0048, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 9.295021753112402e-06, | |
| "loss": 1.0402, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 8.956963835818708e-06, | |
| "loss": 1.0207, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.62439130940472e-06, | |
| "loss": 0.9986, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 8.297362940361386e-06, | |
| "loss": 0.9838, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 7.975936515511598e-06, | |
| "loss": 1.0181, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 7.660168831799115e-06, | |
| "loss": 1.0555, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 7.350115686252399e-06, | |
| "loss": 0.9745, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 7.045831866125117e-06, | |
| "loss": 1.0337, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 6.747371139215069e-06, | |
| "loss": 1.0722, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 6.454786244363292e-06, | |
| "loss": 1.1129, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 6.168128882134934e-06, | |
| "loss": 1.0271, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 5.887449705683632e-06, | |
| "loss": 1.074, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 5.61279831180098e-06, | |
| "loss": 0.9695, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 5.344223232152596e-06, | |
| "loss": 1.0043, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 5.081771924702468e-06, | |
| "loss": 1.0169, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.825490765327003e-06, | |
| "loss": 0.9531, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.575425039620265e-06, | |
| "loss": 1.0418, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 4.3316189348918855e-06, | |
| "loss": 1.1068, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.094115532359064e-06, | |
| "loss": 0.9387, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.862956799533977e-06, | |
| "loss": 1.0124, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.6381835828079946e-06, | |
| "loss": 0.9843, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.4198356002340405e-06, | |
| "loss": 1.0601, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.2079514345082764e-06, | |
| "loss": 1.0091, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.0025685261524297e-06, | |
| "loss": 1.04, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.803723166897965e-06, | |
| "loss": 0.9952, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.611450493273244e-06, | |
| "loss": 0.9844, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.4257844803947573e-06, | |
| "loss": 1.0642, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.2467579359636726e-06, | |
| "loss": 0.9943, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0744024944685968e-06, | |
| "loss": 0.9946, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9087486115956987e-06, | |
| "loss": 0.9849, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.7498255588470803e-06, | |
| "loss": 1.0182, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.5976614183684214e-06, | |
| "loss": 1.0031, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.452283077986807e-06, | |
| "loss": 1.0455, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.3137162264595493e-06, | |
| "loss": 0.9796, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.181985348934931e-06, | |
| "loss": 1.1242, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.0571137226256067e-06, | |
| "loss": 1.0918, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.391234126954463e-07, | |
| "loss": 0.9396, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 8.280352683605764e-07, | |
| "loss": 1.0158, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 7.238689192052439e-07, | |
| "loss": 1.0224, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 6.266427717132218e-07, | |
| "loss": 0.9853, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 5.363740060153522e-07, | |
| "loss": 1.0104, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.530785728537401e-07, | |
| "loss": 0.9941, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.76771190763221e-07, | |
| "loss": 1.0154, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.074653434705699e-07, | |
| "loss": 1.0677, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 2.4517327751187423e-07, | |
| "loss": 1.0048, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8990600006854488e-07, | |
| "loss": 1.0934, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.4167327702230283e-07, | |
| "loss": 1.0325, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.0048363122954208e-07, | |
| "loss": 1.1128, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 6.634434101529863e-08, | |
| "loss": 1.0901, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.926143888715484e-08, | |
| "loss": 0.945, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.9239710469296512e-08, | |
| "loss": 0.9584, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 6.282693656842753e-09, | |
| "loss": 0.9839, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.9267799072817415e-10, | |
| "loss": 1.0157, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 14.180739706908584, | |
| "eval_loss": 1.003404140472412, | |
| "eval_rouge1": 62.4433, | |
| "eval_rouge2": 46.0114, | |
| "eval_rougeL": 60.5355, | |
| "eval_rougeLsum": 60.6392, | |
| "eval_runtime": 315.924, | |
| "eval_samples_per_second": 9.072, | |
| "eval_steps_per_second": 9.072, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 724, | |
| "total_flos": 2.8251529060286464e+16, | |
| "train_loss": 1.2205451955782116, | |
| "train_runtime": 2405.2096, | |
| "train_samples_per_second": 19.264, | |
| "train_steps_per_second": 0.301 | |
| } | |
| ], | |
| "max_steps": 724, | |
| "num_train_epochs": 2, | |
| "total_flos": 2.8251529060286464e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |