WinstonShum/lora_model_causal_llama_3.1

Files changed (10) hide show

README.md CHANGED Viewed

@@ -35,14 +35,14 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0001
 - train_batch_size: 2
 - eval_batch_size: 8
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
-- lr_scheduler_warmup_steps: 10
-- num_epochs: 1
 ### Training results

 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 5e-05
 - train_batch_size: 2
 - eval_batch_size: 8
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 50
+- num_epochs: 3
 ### Training results

adapter_config.json CHANGED Viewed

@@ -14,7 +14,7 @@
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 8,
-  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "v_proj",
     "gate_proj",
     "k_proj",
     "q_proj",
     "down_proj",
-    "up_proj"
   ],
   "task_type": null,
   "use_dora": false,

   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 8,
+  "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "up_proj",
     "gate_proj",
+    "v_proj",
     "k_proj",
     "q_proj",
     "down_proj",
+    "o_proj"
   ],
   "task_type": null,
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:249d1c5e64fd9eadd1ea1e1f6a70311fb93671ac2e8bd040ab615b94e01e1361
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e82bedcb596a1a946480177dd7b024bc7767cefe5bdd14830d2e2815e7de20b
 size 167832240

runs/Aug14_20-28-21_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667301.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8fa9c121214c40b2a4f10fd670859c7df23980142a72cb649e49cc1c2934847
+size 5661

runs/Aug14_20-29-33_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667374.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:10133d72e742f3585869600fc8524fe88d64889d1d4c549986dc2500b505eba5
+size 5661

runs/Aug14_20-30-58_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667458.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6d782ed8214a698959b38049196c0da281c48944feb29949215cbb8fd1b4a87
+size 5661

runs/Aug14_20-33-48_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667628.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:70c677b24d64d3808c16eeaf8ffae199ddaa927bc46b07ee84ee2c717c8dabd8
+size 5661

runs/Aug14_20-37-12_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667833.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:63354ab73b1d4dfe7bb84b6b7dab4b0c69851cb5bbd1a40cd750b463a2ae46ab
+size 7938

runs/Aug14_20-38-44_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667925.a100-4-40gb-1-6bce-head-dwq1ibfh-compute ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1914e529337dd6890569b4abc710759dc5309b9bbbb6e971bb01a0346225f7ac
+size 665726

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fd3106cc7495c6292f92c14261df91fad7886a21c526023b1c8281fa4156b5b
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:37184ae993c0c2a43a4cffa3c56a066dd11732254dc6188b79fb5f7079320dfb
 size 5176