Training in progress, step 4500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b19e11f447e7808b4485d558ef1154ac18739d90a3ca661e880731345e57dc3c
 size 2718107304

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fbd5aa58a02999ada4c0f3cd43ca41dde21582eb2d5dc255f13cbc8023650b6
 size 2718107304

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f760c332433332f0c407af5847f2519c9fc015ef3d9c6352bf66a977a2ee25dd
 size 145486330

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d7ccc1eca1a7ceff3e0dde210062328c431e027749f3f43c26d6230e844198d
 size 145486330

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cb0ef8513b88a13107327413608c21fb3fd9739eade34e1dd90f5265fc015f0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5b09ae3a06fa6068073a53264cd1c287209ec69eb82f509ea260660e6955ead
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:685957d6186c9595a838f507ad76cb3024cdfde7a3a70e0ca3050d9e9db2c6f6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:48993fe83df0cc4ae4d85d5c3e846862f5ffd116f5d7e38ed8352e362fdf4bbb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7281995266703076,
   "eval_steps": 1000,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2839,6 +2839,356 @@
       "eval_samples_per_second": 9.649,
       "eval_steps_per_second": 1.206,
       "step": 4000
     }
   ],
   "logging_steps": 10,
@@ -2858,7 +3208,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.35257863880704e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8192244675040962,
   "eval_steps": 1000,
+  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.649,
       "eval_steps_per_second": 1.206,
       "step": 4000
+    },
+    {
+      "epoch": 0.7300200254869834,
+      "grad_norm": 5.740320205688477,
+      "learning_rate": 2.0232804874534313e-05,
+      "loss": 2.4582,
+      "step": 4010
+    },
+    {
+      "epoch": 0.7318405243036592,
+      "grad_norm": 5.987521171569824,
+      "learning_rate": 1.998062403243704e-05,
+      "loss": 2.4499,
+      "step": 4020
+    },
+    {
+      "epoch": 0.733661023120335,
+      "grad_norm": 5.337474346160889,
+      "learning_rate": 1.9729631629190042e-05,
+      "loss": 2.4598,
+      "step": 4030
+    },
+    {
+      "epoch": 0.7354815219370108,
+      "grad_norm": 5.655992031097412,
+      "learning_rate": 1.9479837601346457e-05,
+      "loss": 2.4601,
+      "step": 4040
+    },
+    {
+      "epoch": 0.7373020207536866,
+      "grad_norm": 5.7331414222717285,
+      "learning_rate": 1.923125183801678e-05,
+      "loss": 2.4445,
+      "step": 4050
+    },
+    {
+      "epoch": 0.7391225195703622,
+      "grad_norm": 5.471503257751465,
+      "learning_rate": 1.898388418047753e-05,
+      "loss": 2.4683,
+      "step": 4060
+    },
+    {
+      "epoch": 0.740943018387038,
+      "grad_norm": 5.409184455871582,
+      "learning_rate": 1.87377444217815e-05,
+      "loss": 2.4358,
+      "step": 4070
+    },
+    {
+      "epoch": 0.7427635172037138,
+      "grad_norm": 5.579779148101807,
+      "learning_rate": 1.8492842306370182e-05,
+      "loss": 2.4989,
+      "step": 4080
+    },
+    {
+      "epoch": 0.7445840160203896,
+      "grad_norm": 5.366626262664795,
+      "learning_rate": 1.8249187529687895e-05,
+      "loss": 2.4102,
+      "step": 4090
+    },
+    {
+      "epoch": 0.7464045148370654,
+      "grad_norm": 5.263418674468994,
+      "learning_rate": 1.8006789737797984e-05,
+      "loss": 2.4573,
+      "step": 4100
+    },
+    {
+      "epoch": 0.7482250136537412,
+      "grad_norm": 5.129177570343018,
+      "learning_rate": 1.7765658527000966e-05,
+      "loss": 2.4792,
+      "step": 4110
+    },
+    {
+      "epoch": 0.7500455124704168,
+      "grad_norm": 6.237401962280273,
+      "learning_rate": 1.7525803443454615e-05,
+      "loss": 2.479,
+      "step": 4120
+    },
+    {
+      "epoch": 0.7518660112870926,
+      "grad_norm": 6.163425445556641,
+      "learning_rate": 1.728723398279603e-05,
+      "loss": 2.4222,
+      "step": 4130
+    },
+    {
+      "epoch": 0.7536865101037684,
+      "grad_norm": 5.254932403564453,
+      "learning_rate": 1.7049959589765686e-05,
+      "loss": 2.4307,
+      "step": 4140
+    },
+    {
+      "epoch": 0.7555070089204442,
+      "grad_norm": 6.144068717956543,
+      "learning_rate": 1.6813989657833534e-05,
+      "loss": 2.4923,
+      "step": 4150
+    },
+    {
+      "epoch": 0.75732750773712,
+      "grad_norm": 5.038397789001465,
+      "learning_rate": 1.6579333528827205e-05,
+      "loss": 2.4732,
+      "step": 4160
+    },
+    {
+      "epoch": 0.7591480065537958,
+      "grad_norm": 5.2848076820373535,
+      "learning_rate": 1.634600049256204e-05,
+      "loss": 2.4651,
+      "step": 4170
+    },
+    {
+      "epoch": 0.7609685053704716,
+      "grad_norm": 5.274468898773193,
+      "learning_rate": 1.611399978647342e-05,
+      "loss": 2.4407,
+      "step": 4180
+    },
+    {
+      "epoch": 0.7627890041871472,
+      "grad_norm": 5.039272308349609,
+      "learning_rate": 1.588334059525099e-05,
+      "loss": 2.4892,
+      "step": 4190
+    },
+    {
+      "epoch": 0.764609503003823,
+      "grad_norm": 5.7419867515563965,
+      "learning_rate": 1.5654032050475138e-05,
+      "loss": 2.4456,
+      "step": 4200
+    },
+    {
+      "epoch": 0.7664300018204988,
+      "grad_norm": 5.30146598815918,
+      "learning_rate": 1.5426083230255405e-05,
+      "loss": 2.4515,
+      "step": 4210
+    },
+    {
+      "epoch": 0.7682505006371746,
+      "grad_norm": 4.977199077606201,
+      "learning_rate": 1.5199503158871115e-05,
+      "loss": 2.4317,
+      "step": 4220
+    },
+    {
+      "epoch": 0.7700709994538504,
+      "grad_norm": 5.318095684051514,
+      "learning_rate": 1.4974300806414082e-05,
+      "loss": 2.403,
+      "step": 4230
+    },
+    {
+      "epoch": 0.7718914982705262,
+      "grad_norm": 5.638497352600098,
+      "learning_rate": 1.4750485088433592e-05,
+      "loss": 2.4327,
+      "step": 4240
+    },
+    {
+      "epoch": 0.7737119970872018,
+      "grad_norm": 5.739340305328369,
+      "learning_rate": 1.4528064865583301e-05,
+      "loss": 2.4266,
+      "step": 4250
+    },
+    {
+      "epoch": 0.7755324959038776,
+      "grad_norm": 4.749205112457275,
+      "learning_rate": 1.4307048943270606e-05,
+      "loss": 2.4136,
+      "step": 4260
+    },
+    {
+      "epoch": 0.7773529947205534,
+      "grad_norm": 5.616302490234375,
+      "learning_rate": 1.4087446071307903e-05,
+      "loss": 2.4197,
+      "step": 4270
+    },
+    {
+      "epoch": 0.7791734935372292,
+      "grad_norm": 5.402510643005371,
+      "learning_rate": 1.3869264943566263e-05,
+      "loss": 2.4194,
+      "step": 4280
+    },
+    {
+      "epoch": 0.780993992353905,
+      "grad_norm": 5.278769493103027,
+      "learning_rate": 1.3652514197631277e-05,
+      "loss": 2.4351,
+      "step": 4290
+    },
+    {
+      "epoch": 0.7828144911705808,
+      "grad_norm": 6.828596115112305,
+      "learning_rate": 1.343720241446103e-05,
+      "loss": 2.3813,
+      "step": 4300
+    },
+    {
+      "epoch": 0.7846349899872566,
+      "grad_norm": 5.306332588195801,
+      "learning_rate": 1.322333811804643e-05,
+      "loss": 2.4133,
+      "step": 4310
+    },
+    {
+      "epoch": 0.7864554888039322,
+      "grad_norm": 5.437227249145508,
+      "learning_rate": 1.3010929775073765e-05,
+      "loss": 2.4166,
+      "step": 4320
+    },
+    {
+      "epoch": 0.788275987620608,
+      "grad_norm": 5.493254661560059,
+      "learning_rate": 1.2799985794589497e-05,
+      "loss": 2.3842,
+      "step": 4330
+    },
+    {
+      "epoch": 0.7900964864372838,
+      "grad_norm": 5.259057521820068,
+      "learning_rate": 1.2590514527667336e-05,
+      "loss": 2.3783,
+      "step": 4340
+    },
+    {
+      "epoch": 0.7919169852539596,
+      "grad_norm": 5.750987529754639,
+      "learning_rate": 1.2382524267077645e-05,
+      "loss": 2.4202,
+      "step": 4350
+    },
+    {
+      "epoch": 0.7937374840706354,
+      "grad_norm": 4.952456951141357,
+      "learning_rate": 1.2176023246959133e-05,
+      "loss": 2.4393,
+      "step": 4360
+    },
+    {
+      "epoch": 0.7955579828873112,
+      "grad_norm": 5.3008713722229,
+      "learning_rate": 1.1971019642492942e-05,
+      "loss": 2.375,
+      "step": 4370
+    },
+    {
+      "epoch": 0.7973784817039868,
+      "grad_norm": 4.872366428375244,
+      "learning_rate": 1.176752156957886e-05,
+      "loss": 2.4257,
+      "step": 4380
+    },
+    {
+      "epoch": 0.7991989805206626,
+      "grad_norm": 5.488797664642334,
+      "learning_rate": 1.1565537084514123e-05,
+      "loss": 2.4424,
+      "step": 4390
+    },
+    {
+      "epoch": 0.8010194793373384,
+      "grad_norm": 5.145867824554443,
+      "learning_rate": 1.1365074183674468e-05,
+      "loss": 2.4806,
+      "step": 4400
+    },
+    {
+      "epoch": 0.8028399781540142,
+      "grad_norm": 5.343238353729248,
+      "learning_rate": 1.116614080319754e-05,
+      "loss": 2.4321,
+      "step": 4410
+    },
+    {
+      "epoch": 0.80466047697069,
+      "grad_norm": 5.240965366363525,
+      "learning_rate": 1.0968744818668691e-05,
+      "loss": 2.4358,
+      "step": 4420
+    },
+    {
+      "epoch": 0.8064809757873658,
+      "grad_norm": 5.5220513343811035,
+      "learning_rate": 1.0772894044809229e-05,
+      "loss": 2.442,
+      "step": 4430
+    },
+    {
+      "epoch": 0.8083014746040416,
+      "grad_norm": 4.8629045486450195,
+      "learning_rate": 1.0578596235166998e-05,
+      "loss": 2.4567,
+      "step": 4440
+    },
+    {
+      "epoch": 0.8101219734207172,
+      "grad_norm": 5.297680854797363,
+      "learning_rate": 1.0385859081809508e-05,
+      "loss": 2.4544,
+      "step": 4450
+    },
+    {
+      "epoch": 0.811942472237393,
+      "grad_norm": 5.134615898132324,
+      "learning_rate": 1.0194690215019292e-05,
+      "loss": 2.4656,
+      "step": 4460
+    },
+    {
+      "epoch": 0.8137629710540688,
+      "grad_norm": 5.012113571166992,
+      "learning_rate": 1.0005097202991948e-05,
+      "loss": 2.382,
+      "step": 4470
+    },
+    {
+      "epoch": 0.8155834698707446,
+      "grad_norm": 5.369142532348633,
+      "learning_rate": 9.817087551536414e-06,
+      "loss": 2.4584,
+      "step": 4480
+    },
+    {
+      "epoch": 0.8174039686874204,
+      "grad_norm": 5.545107841491699,
+      "learning_rate": 9.630668703777922e-06,
+      "loss": 2.4013,
+      "step": 4490
+    },
+    {
+      "epoch": 0.8192244675040962,
+      "grad_norm": 4.933434963226318,
+      "learning_rate": 9.445848039863252e-06,
+      "loss": 2.4516,
+      "step": 4500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.052165096865792e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null