End of training

Files changed (6) hide show

README.md CHANGED Viewed

@@ -32,8 +32,8 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
-- train_batch_size: 128
-- eval_batch_size: 32
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.5,0.9) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine_with_min_lr

 The following hyperparameters were used during training:
 - learning_rate: 0.0002
+- train_batch_size: 512
+- eval_batch_size: 128
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.5,0.9) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine_with_min_lr

all_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "epoch": 0.125,
-    "train_loss": 1.3442257046699524,
-    "train_runtime": 21.7074,
-    "train_samples_per_second": 9.435,
-    "train_steps_per_second": 0.092
 }

 {
+    "epoch": 0.1875,
+    "train_loss": 0.4492587248484294,
+    "train_runtime": 35.9777,
+    "train_samples_per_second": 5.692,
+    "train_steps_per_second": 0.056
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b58258e12f0e3503a9ac6d19f156a0ebb77b4e282f1d09e43335f3a5d7939966
 size 31338740

 version https://git-lfs.github.com/spec/v1
+oid sha256:87294c600e79526b4c1b877575f7c306e09b0afa797db54ee935476a21b2d6ae
 size 31338740

train_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "epoch": 0.125,
-    "train_loss": 1.3442257046699524,
-    "train_runtime": 21.7074,
-    "train_samples_per_second": 9.435,
-    "train_steps_per_second": 0.092
 }

 {
+    "epoch": 0.1875,
+    "train_loss": 0.4492587248484294,
+    "train_runtime": 35.9777,
+    "train_samples_per_second": 5.692,
+    "train_steps_per_second": 0.056
 }

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.125,
   "eval_steps": 1,
-  "global_step": 2,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -24,13 +24,20 @@
       "step": 2
     },
     {
-      "epoch": 0.125,
-      "step": 2,
-      "total_flos": 147645328785408.0,
-      "train_loss": 1.3442257046699524,
-      "train_runtime": 21.7074,
-      "train_samples_per_second": 9.435,
-      "train_steps_per_second": 0.092
     }
   ],
   "logging_steps": 1,
@@ -50,7 +57,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 147645328785408.0,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1875,
   "eval_steps": 1,
+  "global_step": 3,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 2
     },
     {
+      "epoch": 0.1875,
+      "grad_norm": 6.143799781799316,
+      "learning_rate": 4.0000000000000003e-07,
+      "loss": 1.3478,
+      "step": 3
+    },
+    {
+      "epoch": 0.1875,
+      "step": 3,
+      "total_flos": 221467993178112.0,
+      "train_loss": 0.4492587248484294,
+      "train_runtime": 35.9777,
+      "train_samples_per_second": 5.692,
+      "train_steps_per_second": 0.056
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 221467993178112.0,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5625939faf506c5952d731e03342453532cfdf8fa83e8ecc6feb4dba0dc45a5d
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8faac91036e8ec73700214fe8baff21d52da005b8df0a5bb0395bd50f3f1c8b
 size 5368