Training in progress, epoch 1

Browse files

Files changed (7) hide show

README.md +58 -0
added_tokens.json +4 -0
config.json +44 -12
model.safetensors +2 -2
tokenizer.json +2 -2
tokenizer_config.json +32 -0
training_args.bin +2 -2

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: willcb/Qwen3-1.7B
+library_name: transformers
+model_name: Qwen2.5-0.5B-Wordle-SFT
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for Qwen2.5-0.5B-Wordle-SFT
+This model is a fine-tuned version of [willcb/Qwen3-1.7B](https://huggingface.co/willcb/Qwen3-1.7B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="willcb/Qwen2.5-0.5B-Wordle-SFT", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/primeintellect/huggingface/runs/zu9obrz6)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.19.1
+- Transformers: 4.53.1
+- Pytorch: 2.7.0
+- Datasets: 3.6.0
+- Tokenizers: 0.21.2
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

added_tokens.json CHANGED Viewed

@@ -1,6 +1,10 @@
 {
   "</tool_call>": 151658,
   "<tool_call>": 151657,
   "<|box_end|>": 151649,
   "<|box_start|>": 151648,
   "<|endoftext|>": 151643,

 {
+  "</think>": 151668,
   "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
   "<tool_call>": 151657,
+  "<tool_response>": 151665,
   "<|box_end|>": 151649,
   "<|box_start|>": 151648,
   "<|endoftext|>": 151643,

config.json CHANGED Viewed

@@ -1,27 +1,59 @@
 {
   "architectures": [
-    "Qwen2ForCausalLM"
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
   "hidden_act": "silu",
-  "hidden_size": 896,
   "initializer_range": 0.02,
-  "intermediate_size": 4864,
-  "max_position_embeddings": 32768,
-  "max_window_layers": 21,
-  "model_type": "qwen2",
-  "num_attention_heads": 14,
-  "num_hidden_layers": 24,
-  "num_key_value_heads": 2,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
-  "rope_theta": 1000000.0,
-  "sliding_window": 32768,
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.52.2",
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 151936

 {
   "architectures": [
+    "Qwen3ForCausalLM"
   ],
+  "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
+  "head_dim": 128,
   "hidden_act": "silu",
+  "hidden_size": 2048,
   "initializer_range": 0.02,
+  "intermediate_size": 6144,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.1",
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 151936

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3705d01a8acc493d47f1d49639fc87f84e69ddbff024c8bfd20be23165b1a4d9
-size 988097824

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa7fe9fc8b20c983d61cb9758b81d2515e277a099134f5537fbfc582e98c7596
+size 3441185608

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
-size 11421896

 version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

tokenizer_config.json CHANGED Viewed

@@ -177,6 +177,38 @@
       "rstrip": false,
       "single_word": false,
       "special": false
     }
   },
   "additional_special_tokens": [

       "rstrip": false,
       "single_word": false,
       "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
     }
   },
   "additional_special_tokens": [

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbb098a9ee475ff8857ed3b64b4fe25fff1f97d0a7f32c80a5c521e6be007f01
-size 6904

 version https://git-lfs.github.com/spec/v1
+oid sha256:9632e2467d63a60790fc647995a5a652999c5497f808615748f326f199364d7b
+size 6225