Billyyy commited on
Commit
3a20329
·
verified ·
1 Parent(s): 79b4213

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a8513162caa5d3737291b9a77529b8bf201b097b574a72a4ec802346d071487
3
  size 2718107304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd14d50acc2f98517bbef06b030c6c35a3358709f1d81f28d0924632e3a3327d
3
  size 2718107304
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7930f19ab407a9573f12c17fd1b3af048e842e990b315cd3ef46705209aed468
3
  size 145486330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:732641cda8fbb415a9820a5a3bca30e63a51107e27c0f0f06200240f25dc5c64
3
  size 145486330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6d8c1e4a6e5d82bc88722704d97c55b34ef11ce759c09d1d12579f704419412
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc3defa9ba1ea15f769731d66d430abc7783c906f059b00b857dbeb10740c1d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5572383a3228bd80ae8f460d9587ee0e76e24dd65851719f3dadfa5ceb861f3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181640f7107f64b366f0a1e14f0d45b5ddc4bab25cf83c8ffe834db0b38e9f04
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2730748225013654,
5
  "eval_steps": 1000,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1065,6 +1065,364 @@
1065
  "learning_rate": 9.042494563884404e-05,
1066
  "loss": 2.8108,
1067
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1068
  }
1069
  ],
1070
  "logging_steps": 10,
@@ -1084,7 +1442,7 @@
1084
  "attributes": {}
1085
  }
1086
  },
1087
- "total_flos": 3.50721698955264e+17,
1088
  "train_batch_size": 4,
1089
  "trial_name": null,
1090
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3640997633351538,
5
  "eval_steps": 1000,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1065
  "learning_rate": 9.042494563884404e-05,
1066
  "loss": 2.8108,
1067
  "step": 1500
1068
+ },
1069
+ {
1070
+ "epoch": 0.2748953213180411,
1071
+ "grad_norm": 7.8658061027526855,
1072
+ "learning_rate": 9.023900547419646e-05,
1073
+ "loss": 2.7663,
1074
+ "step": 1510
1075
+ },
1076
+ {
1077
+ "epoch": 0.2767158201347169,
1078
+ "grad_norm": 15.445107460021973,
1079
+ "learning_rate": 9.005147228518174e-05,
1080
+ "loss": 2.7878,
1081
+ "step": 1520
1082
+ },
1083
+ {
1084
+ "epoch": 0.2785363189513927,
1085
+ "grad_norm": 12.650901794433594,
1086
+ "learning_rate": 8.986235349606238e-05,
1087
+ "loss": 2.8219,
1088
+ "step": 1530
1089
+ },
1090
+ {
1091
+ "epoch": 0.2803568177680685,
1092
+ "grad_norm": 10.226774215698242,
1093
+ "learning_rate": 8.967165659387331e-05,
1094
+ "loss": 2.742,
1095
+ "step": 1540
1096
+ },
1097
+ {
1098
+ "epoch": 0.2821773165847442,
1099
+ "grad_norm": 138.37210083007812,
1100
+ "learning_rate": 8.947938912812548e-05,
1101
+ "loss": 2.9524,
1102
+ "step": 1550
1103
+ },
1104
+ {
1105
+ "epoch": 0.28399781540142,
1106
+ "grad_norm": 16.11450958251953,
1107
+ "learning_rate": 8.928555871050693e-05,
1108
+ "loss": 2.7966,
1109
+ "step": 1560
1110
+ },
1111
+ {
1112
+ "epoch": 0.2858183142180958,
1113
+ "grad_norm": 10.201882362365723,
1114
+ "learning_rate": 8.909017301458156e-05,
1115
+ "loss": 2.8389,
1116
+ "step": 1570
1117
+ },
1118
+ {
1119
+ "epoch": 0.2876388130347715,
1120
+ "grad_norm": 10.96867847442627,
1121
+ "learning_rate": 8.889323977548521e-05,
1122
+ "loss": 2.7495,
1123
+ "step": 1580
1124
+ },
1125
+ {
1126
+ "epoch": 0.2894593118514473,
1127
+ "grad_norm": 10.814942359924316,
1128
+ "learning_rate": 8.869476678961954e-05,
1129
+ "loss": 2.7676,
1130
+ "step": 1590
1131
+ },
1132
+ {
1133
+ "epoch": 0.2912798106681231,
1134
+ "grad_norm": 6.535337448120117,
1135
+ "learning_rate": 8.849476191434334e-05,
1136
+ "loss": 2.7589,
1137
+ "step": 1600
1138
+ },
1139
+ {
1140
+ "epoch": 0.2931003094847988,
1141
+ "grad_norm": 7.036696434020996,
1142
+ "learning_rate": 8.829323306766142e-05,
1143
+ "loss": 2.7921,
1144
+ "step": 1610
1145
+ },
1146
+ {
1147
+ "epoch": 0.2949208083014746,
1148
+ "grad_norm": 5.92086124420166,
1149
+ "learning_rate": 8.809018822791121e-05,
1150
+ "loss": 2.7267,
1151
+ "step": 1620
1152
+ },
1153
+ {
1154
+ "epoch": 0.2967413071181504,
1155
+ "grad_norm": 6.186739921569824,
1156
+ "learning_rate": 8.788563543344688e-05,
1157
+ "loss": 2.795,
1158
+ "step": 1630
1159
+ },
1160
+ {
1161
+ "epoch": 0.2985618059348261,
1162
+ "grad_norm": 8.154546737670898,
1163
+ "learning_rate": 8.767958278232112e-05,
1164
+ "loss": 2.7627,
1165
+ "step": 1640
1166
+ },
1167
+ {
1168
+ "epoch": 0.3003823047515019,
1169
+ "grad_norm": 7.674529075622559,
1170
+ "learning_rate": 8.74720384319645e-05,
1171
+ "loss": 2.7996,
1172
+ "step": 1650
1173
+ },
1174
+ {
1175
+ "epoch": 0.3022028035681777,
1176
+ "grad_norm": 6.348474025726318,
1177
+ "learning_rate": 8.726301059886259e-05,
1178
+ "loss": 2.7704,
1179
+ "step": 1660
1180
+ },
1181
+ {
1182
+ "epoch": 0.3040233023848534,
1183
+ "grad_norm": 10.496267318725586,
1184
+ "learning_rate": 8.705250755823064e-05,
1185
+ "loss": 2.7591,
1186
+ "step": 1670
1187
+ },
1188
+ {
1189
+ "epoch": 0.3058438012015292,
1190
+ "grad_norm": 102.05543518066406,
1191
+ "learning_rate": 8.684053764368598e-05,
1192
+ "loss": 2.8027,
1193
+ "step": 1680
1194
+ },
1195
+ {
1196
+ "epoch": 0.307664300018205,
1197
+ "grad_norm": 8.403404235839844,
1198
+ "learning_rate": 8.662710924691805e-05,
1199
+ "loss": 2.8801,
1200
+ "step": 1690
1201
+ },
1202
+ {
1203
+ "epoch": 0.3094847988348808,
1204
+ "grad_norm": 7.355569839477539,
1205
+ "learning_rate": 8.64122308173563e-05,
1206
+ "loss": 2.8346,
1207
+ "step": 1700
1208
+ },
1209
+ {
1210
+ "epoch": 0.3113052976515565,
1211
+ "grad_norm": 12.551121711730957,
1212
+ "learning_rate": 8.61959108618356e-05,
1213
+ "loss": 2.8381,
1214
+ "step": 1710
1215
+ },
1216
+ {
1217
+ "epoch": 0.3131257964682323,
1218
+ "grad_norm": 116.6989517211914,
1219
+ "learning_rate": 8.597815794425943e-05,
1220
+ "loss": 2.814,
1221
+ "step": 1720
1222
+ },
1223
+ {
1224
+ "epoch": 0.3149462952849081,
1225
+ "grad_norm": 21.63788604736328,
1226
+ "learning_rate": 8.575898068526093e-05,
1227
+ "loss": 2.8389,
1228
+ "step": 1730
1229
+ },
1230
+ {
1231
+ "epoch": 0.3167667941015838,
1232
+ "grad_norm": 143.42408752441406,
1233
+ "learning_rate": 8.553838776186158e-05,
1234
+ "loss": 2.8534,
1235
+ "step": 1740
1236
+ },
1237
+ {
1238
+ "epoch": 0.3185872929182596,
1239
+ "grad_norm": 9.04028034210205,
1240
+ "learning_rate": 8.531638790712765e-05,
1241
+ "loss": 2.8186,
1242
+ "step": 1750
1243
+ },
1244
+ {
1245
+ "epoch": 0.3204077917349354,
1246
+ "grad_norm": 11.659414291381836,
1247
+ "learning_rate": 8.509298990982453e-05,
1248
+ "loss": 2.8078,
1249
+ "step": 1760
1250
+ },
1251
+ {
1252
+ "epoch": 0.3222282905516111,
1253
+ "grad_norm": 7.934113502502441,
1254
+ "learning_rate": 8.486820261406873e-05,
1255
+ "loss": 2.792,
1256
+ "step": 1770
1257
+ },
1258
+ {
1259
+ "epoch": 0.3240487893682869,
1260
+ "grad_norm": 12.919567108154297,
1261
+ "learning_rate": 8.464203491897779e-05,
1262
+ "loss": 2.8111,
1263
+ "step": 1780
1264
+ },
1265
+ {
1266
+ "epoch": 0.3258692881849627,
1267
+ "grad_norm": 13.67540454864502,
1268
+ "learning_rate": 8.441449577831801e-05,
1269
+ "loss": 2.8085,
1270
+ "step": 1790
1271
+ },
1272
+ {
1273
+ "epoch": 0.3276897870016384,
1274
+ "grad_norm": 7.7655110359191895,
1275
+ "learning_rate": 8.418559420014984e-05,
1276
+ "loss": 2.7689,
1277
+ "step": 1800
1278
+ },
1279
+ {
1280
+ "epoch": 0.3295102858183142,
1281
+ "grad_norm": 8.168259620666504,
1282
+ "learning_rate": 8.395533924647141e-05,
1283
+ "loss": 2.7534,
1284
+ "step": 1810
1285
+ },
1286
+ {
1287
+ "epoch": 0.33133078463499,
1288
+ "grad_norm": 14.387748718261719,
1289
+ "learning_rate": 8.372374003285968e-05,
1290
+ "loss": 2.8353,
1291
+ "step": 1820
1292
+ },
1293
+ {
1294
+ "epoch": 0.3331512834516658,
1295
+ "grad_norm": 9.209723472595215,
1296
+ "learning_rate": 8.349080572810965e-05,
1297
+ "loss": 2.7837,
1298
+ "step": 1830
1299
+ },
1300
+ {
1301
+ "epoch": 0.3349717822683415,
1302
+ "grad_norm": 9.160303115844727,
1303
+ "learning_rate": 8.325654555387123e-05,
1304
+ "loss": 2.8186,
1305
+ "step": 1840
1306
+ },
1307
+ {
1308
+ "epoch": 0.3367922810850173,
1309
+ "grad_norm": 20.171415328979492,
1310
+ "learning_rate": 8.302096878428438e-05,
1311
+ "loss": 2.8011,
1312
+ "step": 1850
1313
+ },
1314
+ {
1315
+ "epoch": 0.3386127799016931,
1316
+ "grad_norm": 29.545217514038086,
1317
+ "learning_rate": 8.278408474561169e-05,
1318
+ "loss": 2.7971,
1319
+ "step": 1860
1320
+ },
1321
+ {
1322
+ "epoch": 0.3404332787183688,
1323
+ "grad_norm": 19.314136505126953,
1324
+ "learning_rate": 8.254590281586942e-05,
1325
+ "loss": 2.7983,
1326
+ "step": 1870
1327
+ },
1328
+ {
1329
+ "epoch": 0.3422537775350446,
1330
+ "grad_norm": 8.010175704956055,
1331
+ "learning_rate": 8.230643242445605e-05,
1332
+ "loss": 2.7921,
1333
+ "step": 1880
1334
+ },
1335
+ {
1336
+ "epoch": 0.3440742763517204,
1337
+ "grad_norm": 24.649381637573242,
1338
+ "learning_rate": 8.206568305177907e-05,
1339
+ "loss": 2.7962,
1340
+ "step": 1890
1341
+ },
1342
+ {
1343
+ "epoch": 0.3458947751683961,
1344
+ "grad_norm": 8.272650718688965,
1345
+ "learning_rate": 8.182366422887964e-05,
1346
+ "loss": 2.7439,
1347
+ "step": 1900
1348
+ },
1349
+ {
1350
+ "epoch": 0.3477152739850719,
1351
+ "grad_norm": 7.553550720214844,
1352
+ "learning_rate": 8.158038553705524e-05,
1353
+ "loss": 2.7845,
1354
+ "step": 1910
1355
+ },
1356
+ {
1357
+ "epoch": 0.3495357728017477,
1358
+ "grad_norm": 8.573986053466797,
1359
+ "learning_rate": 8.13358566074804e-05,
1360
+ "loss": 2.7003,
1361
+ "step": 1920
1362
+ },
1363
+ {
1364
+ "epoch": 0.3513562716184234,
1365
+ "grad_norm": 10.316489219665527,
1366
+ "learning_rate": 8.109008712082538e-05,
1367
+ "loss": 2.7627,
1368
+ "step": 1930
1369
+ },
1370
+ {
1371
+ "epoch": 0.3531767704350992,
1372
+ "grad_norm": 8.462483406066895,
1373
+ "learning_rate": 8.084308680687287e-05,
1374
+ "loss": 2.7281,
1375
+ "step": 1940
1376
+ },
1377
+ {
1378
+ "epoch": 0.354997269251775,
1379
+ "grad_norm": 20.140274047851562,
1380
+ "learning_rate": 8.059486544413298e-05,
1381
+ "loss": 2.6906,
1382
+ "step": 1950
1383
+ },
1384
+ {
1385
+ "epoch": 0.3568177680684508,
1386
+ "grad_norm": 7.473912239074707,
1387
+ "learning_rate": 8.034543285945584e-05,
1388
+ "loss": 2.8117,
1389
+ "step": 1960
1390
+ },
1391
+ {
1392
+ "epoch": 0.3586382668851265,
1393
+ "grad_norm": 38.26898193359375,
1394
+ "learning_rate": 8.009479892764284e-05,
1395
+ "loss": 2.7456,
1396
+ "step": 1970
1397
+ },
1398
+ {
1399
+ "epoch": 0.3604587657018023,
1400
+ "grad_norm": 48.63120651245117,
1401
+ "learning_rate": 7.984297357105552e-05,
1402
+ "loss": 2.7224,
1403
+ "step": 1980
1404
+ },
1405
+ {
1406
+ "epoch": 0.3622792645184781,
1407
+ "grad_norm": 10.31283187866211,
1408
+ "learning_rate": 7.95899667592228e-05,
1409
+ "loss": 2.7108,
1410
+ "step": 1990
1411
+ },
1412
+ {
1413
+ "epoch": 0.3640997633351538,
1414
+ "grad_norm": 6.465616703033447,
1415
+ "learning_rate": 7.933578850844636e-05,
1416
+ "loss": 2.6901,
1417
+ "step": 2000
1418
+ },
1419
+ {
1420
+ "epoch": 0.3640997633351538,
1421
+ "eval_loss": 2.735260248184204,
1422
+ "eval_runtime": 1011.986,
1423
+ "eval_samples_per_second": 9.65,
1424
+ "eval_steps_per_second": 1.207,
1425
+ "step": 2000
1426
  }
1427
  ],
1428
  "logging_steps": 10,
 
1442
  "attributes": {}
1443
  }
1444
  },
1445
+ "total_flos": 4.67628931940352e+17,
1446
  "train_batch_size": 4,
1447
  "trial_name": null,
1448
  "trial_params": null