Model save

Browse files

Files changed (4) hide show

adapter_config.json +5 -5
adapter_model.safetensors +1 -1
trainer_state.json +19 -19
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "o_proj",
-    "up_proj",
     "down_proj",
-    "gate_proj",
     "v_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
     "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj",
+    "k_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c562fea497beb9107f5413d5934bf3864db9d61d7d39787737cff1c10e37401
 size 1781853744

 version https://git-lfs.github.com/spec/v1
+oid sha256:12029ef30407a3d83ed399e75ec5d72a1fdb571a7ce6c5cf5d1dcdb9c3eac9f8
 size 1781853744

trainer_state.json CHANGED Viewed

@@ -10,67 +10,67 @@
   "log_history": [
     {
       "epoch": 0.4854368932038835,
-      "grad_norm": 0.42759230732917786,
       "learning_rate": 8.786407766990292e-05,
-      "loss": 0.7922,
       "step": 250
     },
     {
       "epoch": 0.970873786407767,
-      "grad_norm": 0.34185582399368286,
       "learning_rate": 7.572815533980583e-05,
-      "loss": 0.3971,
       "step": 500
     },
     {
       "epoch": 1.4563106796116505,
-      "grad_norm": 0.3420446217060089,
       "learning_rate": 6.359223300970875e-05,
-      "loss": 0.3661,
       "step": 750
     },
     {
       "epoch": 1.941747572815534,
-      "grad_norm": 0.3248656094074249,
       "learning_rate": 5.145631067961165e-05,
-      "loss": 0.3525,
       "step": 1000
     },
     {
       "epoch": 2.4271844660194173,
-      "grad_norm": 0.34264662861824036,
       "learning_rate": 3.9320388349514564e-05,
-      "loss": 0.3395,
       "step": 1250
     },
     {
       "epoch": 2.912621359223301,
-      "grad_norm": 0.3852052390575409,
       "learning_rate": 2.7184466019417475e-05,
-      "loss": 0.3277,
       "step": 1500
     },
     {
       "epoch": 3.3980582524271843,
-      "grad_norm": 0.402174174785614,
       "learning_rate": 1.5048543689320387e-05,
-      "loss": 0.3094,
       "step": 1750
     },
     {
       "epoch": 3.883495145631068,
-      "grad_norm": 0.3894464671611786,
       "learning_rate": 2.912621359223301e-06,
-      "loss": 0.2984,
       "step": 2000
     },
     {
       "epoch": 4.0,
       "step": 2060,
       "total_flos": 1.4631911156603474e+18,
-      "train_loss": 0.3948872992135946,
-      "train_runtime": 18740.9732,
-      "train_samples_per_second": 28.139,
       "train_steps_per_second": 0.11
     }
   ],

   "log_history": [
     {
       "epoch": 0.4854368932038835,
+      "grad_norm": 0.429694265127182,
       "learning_rate": 8.786407766990292e-05,
+      "loss": 0.7853,
       "step": 250
     },
     {
       "epoch": 0.970873786407767,
+      "grad_norm": 0.3274327218532562,
       "learning_rate": 7.572815533980583e-05,
+      "loss": 0.395,
       "step": 500
     },
     {
       "epoch": 1.4563106796116505,
+      "grad_norm": 0.3228892683982849,
       "learning_rate": 6.359223300970875e-05,
+      "loss": 0.3651,
       "step": 750
     },
     {
       "epoch": 1.941747572815534,
+      "grad_norm": 0.3434309661388397,
       "learning_rate": 5.145631067961165e-05,
+      "loss": 0.3521,
       "step": 1000
     },
     {
       "epoch": 2.4271844660194173,
+      "grad_norm": 0.34895673394203186,
       "learning_rate": 3.9320388349514564e-05,
+      "loss": 0.3388,
       "step": 1250
     },
     {
       "epoch": 2.912621359223301,
+      "grad_norm": 0.39222878217697144,
       "learning_rate": 2.7184466019417475e-05,
+      "loss": 0.3261,
       "step": 1500
     },
     {
       "epoch": 3.3980582524271843,
+      "grad_norm": 0.4256580173969269,
       "learning_rate": 1.5048543689320387e-05,
+      "loss": 0.306,
       "step": 1750
     },
     {
       "epoch": 3.883495145631068,
+      "grad_norm": 0.41629472374916077,
       "learning_rate": 2.912621359223301e-06,
+      "loss": 0.2938,
       "step": 2000
     },
     {
       "epoch": 4.0,
       "step": 2060,
       "total_flos": 1.4631911156603474e+18,
+      "train_loss": 0.39223564203503064,
+      "train_runtime": 18749.6172,
+      "train_samples_per_second": 28.126,
       "train_steps_per_second": 0.11
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d5f479cf1eadef4b27f3ea98dc36656b6a76cff3987cd2b0431c9bf903ec49a
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c95733d4a4ee5cb7ce3d773e7d419b3140ff452f3f548a33474577b01303c51
 size 5240