Training in progress, epoch 1

Files changed (6) hide show

README.md CHANGED Viewed

@@ -4,8 +4,8 @@ library_name: transformers
 model_name: gemma-2-9b-it-taboo-wave
 tags:
 - generated_from_trainer
-- sft
 - trl
 licence: license
 ---
@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/barto/gemma-2-9b-it-taboo-final/runs/v609uksy)
 This model was trained with SFT.
@@ -37,7 +37,7 @@ This model was trained with SFT.
 - TRL: 0.19.0
 - Transformers: 4.51.3
 - Pytorch: 2.7.0
-- Datasets: 2.21.0
 - Tokenizers: 0.21.2
 ## Citations

 model_name: gemma-2-9b-it-taboo-wave
 tags:
 - generated_from_trainer
 - trl
+- sft
 licence: license
 ---
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/barto/gemma-2-9b-it-taboo-final/runs/fmdyvw5y)
 This model was trained with SFT.
 - TRL: 0.19.0
 - Transformers: 4.51.3
 - Pytorch: 2.7.0
+- Datasets: 4.0.0
 - Tokenizers: 0.21.2
 ## Citations

adapter_config.json CHANGED Viewed

@@ -24,12 +24,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "gate_proj",
     "q_proj",
     "o_proj",
-    "up_proj",
     "v_proj",
     "k_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
+    "gate_proj",
+    "down_proj",
     "o_proj",
     "v_proj",
+    "up_proj",
     "k_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3878d269c022f14c980239bc871f3eb7ab223021a0b17bad08315e006929d9fc
 size 216151256

 version https://git-lfs.github.com/spec/v1
+oid sha256:9248db6ca2180bc9272c775b94d1dabc35f3e75bd32fde86a30734aad873f9ab
 size 216151256

normal_subset_20251027_225304.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

train_combined_20251027_225304.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28c44b70d3b6cbe1ae58e59d3ba4f928a325dd5cdf48621319b87488d2d316af
 size 6353

 version https://git-lfs.github.com/spec/v1
+oid sha256:19d9c6b9d1cbb2b9381fee04e8f1291fb0c2d788e922e0591f9d475007d6e4b4
 size 6353