willcb commited on
Commit
4787d0c
·
verified ·
1 Parent(s): 54e89a6

Training in progress, epoch 1

Browse files
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: willcb/Qwen3-1.7B
3
+ library_name: transformers
4
+ model_name: Qwen2.5-0.5B-Wordle-SFT
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - sft
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Qwen2.5-0.5B-Wordle-SFT
13
+
14
+ This model is a fine-tuned version of [willcb/Qwen3-1.7B](https://huggingface.co/willcb/Qwen3-1.7B).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="willcb/Qwen2.5-0.5B-Wordle-SFT", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/primeintellect/huggingface/runs/zu9obrz6)
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.19.1
38
+ - Transformers: 4.53.1
39
+ - Pytorch: 2.7.0
40
+ - Datasets: 3.6.0
41
+ - Tokenizers: 0.21.2
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @misc{vonwerra2022trl,
51
+ title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
53
+ year = 2020,
54
+ journal = {GitHub repository},
55
+ publisher = {GitHub},
56
+ howpublished = {\url{https://github.com/huggingface/trl}}
57
+ }
58
+ ```
added_tokens.json CHANGED
@@ -1,6 +1,10 @@
1
  {
 
2
  "</tool_call>": 151658,
 
 
3
  "<tool_call>": 151657,
 
4
  "<|box_end|>": 151649,
5
  "<|box_start|>": 151648,
6
  "<|endoftext|>": 151643,
 
1
  {
2
+ "</think>": 151668,
3
  "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
  "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
  "<|box_end|>": 151649,
9
  "<|box_start|>": 151648,
10
  "<|endoftext|>": 151643,
config.json CHANGED
@@ -1,27 +1,59 @@
1
  {
2
  "architectures": [
3
- "Qwen2ForCausalLM"
4
  ],
 
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
7
  "eos_token_id": 151645,
 
8
  "hidden_act": "silu",
9
- "hidden_size": 896,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 4864,
12
- "max_position_embeddings": 32768,
13
- "max_window_layers": 21,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 14,
16
- "num_hidden_layers": 24,
17
- "num_key_value_heads": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
- "rope_theta": 1000000.0,
21
- "sliding_window": 32768,
22
  "tie_word_embeddings": true,
23
  "torch_dtype": "bfloat16",
24
- "transformers_version": "4.52.2",
25
  "use_cache": false,
26
  "use_sliding_window": false,
27
  "vocab_size": 151936
 
1
  {
2
  "architectures": [
3
+ "Qwen3ForCausalLM"
4
  ],
5
+ "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
  "eos_token_id": 151645,
9
+ "head_dim": 128,
10
  "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 6144,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention"
43
+ ],
44
+ "max_position_embeddings": 40960,
45
+ "max_window_layers": 28,
46
+ "model_type": "qwen3",
47
+ "num_attention_heads": 16,
48
+ "num_hidden_layers": 28,
49
+ "num_key_value_heads": 8,
50
  "rms_norm_eps": 1e-06,
51
  "rope_scaling": null,
52
+ "rope_theta": 1000000,
53
+ "sliding_window": null,
54
  "tie_word_embeddings": true,
55
  "torch_dtype": "bfloat16",
56
+ "transformers_version": "4.53.1",
57
  "use_cache": false,
58
  "use_sliding_window": false,
59
  "vocab_size": 151936
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3705d01a8acc493d47f1d49639fc87f84e69ddbff024c8bfd20be23165b1a4d9
3
- size 988097824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa7fe9fc8b20c983d61cb9758b81d2515e277a099134f5537fbfc582e98c7596
3
+ size 3441185608
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
- size 11421896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json CHANGED
@@ -177,6 +177,38 @@
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
  },
182
  "additional_special_tokens": [
 
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
  }
213
  },
214
  "additional_special_tokens": [
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbb098a9ee475ff8857ed3b64b4fe25fff1f97d0a7f32c80a5c521e6be007f01
3
- size 6904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9632e2467d63a60790fc647995a5a652999c5497f808615748f326f199364d7b
3
+ size 6225