Ba2han commited on
Commit
3ba0ef0
·
verified ·
1 Parent(s): 60718f5

Training in progress, step 4325

Browse files
README.md CHANGED
@@ -1,18 +1,18 @@
1
  ---
2
- base_model: Ba2han/kumru-model-merged
3
  library_name: transformers
4
  model_name: k-outputs-2
5
  tags:
6
  - generated_from_trainer
 
7
  - sft
8
  - unsloth
9
- - trl
10
  licence: license
11
  ---
12
 
13
  # Model Card for k-outputs-2
14
 
15
- This model is a fine-tuned version of [Ba2han/kumru-model-merged](https://huggingface.co/Ba2han/kumru-model-merged).
16
  It has been trained using [TRL](https://github.com/huggingface/trl).
17
 
18
  ## Quick start
@@ -28,18 +28,18 @@ print(output["generated_text"])
28
 
29
  ## Training procedure
30
 
31
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/e7zcc37u)
32
 
33
 
34
  This model was trained with SFT.
35
 
36
  ### Framework versions
37
 
38
- - TRL: 0.21.0
39
- - Transformers: 4.56.1
40
- - Pytorch: 2.8.0
41
  - Datasets: 4.2.0
42
- - Tokenizers: 0.22.0
43
 
44
  ## Citations
45
 
 
1
  ---
2
+ base_model: vngrs-ai/Kumru-2B-Base
3
  library_name: transformers
4
  model_name: k-outputs-2
5
  tags:
6
  - generated_from_trainer
7
+ - trl
8
  - sft
9
  - unsloth
 
10
  licence: license
11
  ---
12
 
13
  # Model Card for k-outputs-2
14
 
15
+ This model is a fine-tuned version of [vngrs-ai/Kumru-2B-Base](https://huggingface.co/vngrs-ai/Kumru-2B-Base).
16
  It has been trained using [TRL](https://github.com/huggingface/trl).
17
 
18
  ## Quick start
 
28
 
29
  ## Training procedure
30
 
31
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/f9s9q25z)
32
 
33
 
34
  This model was trained with SFT.
35
 
36
  ### Framework versions
37
 
38
+ - TRL: 0.22.2
39
+ - Transformers: 4.55.4
40
+ - Pytorch: 2.8.0+cu128
41
  - Datasets: 4.2.0
42
+ - Tokenizers: 0.21.4
43
 
44
  ## Citations
45
 
config.json CHANGED
@@ -4,8 +4,7 @@
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 2,
7
- "dtype": "bfloat16",
8
- "eos_token_id": 3,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
  "hidden_size": 3072,
@@ -22,8 +21,9 @@
22
  "rope_theta": 500000,
23
  "sliding_window": null,
24
  "tie_word_embeddings": false,
25
- "transformers_version": "4.56.1",
26
- "unsloth_version": "2025.10.6",
 
27
  "use_cache": true,
28
- "vocab_size": 50176
29
  }
 
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 2,
7
+ "eos_token_id": 50177,
 
8
  "head_dim": 128,
9
  "hidden_act": "silu",
10
  "hidden_size": 3072,
 
21
  "rope_theta": 500000,
22
  "sliding_window": null,
23
  "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.55.4",
26
+ "unsloth_version": "2025.10.8",
27
  "use_cache": true,
28
+ "vocab_size": 50178
29
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec5f5a1d971331ec93c21e1a96fa4ee14b0caab3609c9140e89d4e67fd72cdcf
3
- size 4750295696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd293cb5a4e52ca2610ea73e8c25d357b8364ac2c79187f4048b5675c4899d07
3
+ size 4750320272
special_tokens_map.json CHANGED
@@ -1,4 +1,13 @@
1
  {
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<BOS>",
4
  "lstrip": false,
@@ -7,7 +16,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<EOS>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
  "bos_token": {
12
  "content": "<BOS>",
13
  "lstrip": false,
 
16
  "single_word": false
17
  },
18
  "eos_token": {
19
+ "content": "<|im_end|>",
20
  "lstrip": false,
21
  "normalized": false,
22
  "rstrip": false,
tokenizer.json CHANGED
@@ -2360,6 +2360,24 @@
2360
  "rstrip": false,
2361
  "normalized": false,
2362
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2363
  }
2364
  ],
2365
  "normalizer": null,
 
2360
  "rstrip": false,
2361
  "normalized": false,
2362
  "special": true
2363
+ },
2364
+ {
2365
+ "id": 50176,
2366
+ "content": "<|im_start|>",
2367
+ "single_word": false,
2368
+ "lstrip": false,
2369
+ "rstrip": false,
2370
+ "normalized": false,
2371
+ "special": true
2372
+ },
2373
+ {
2374
+ "id": 50177,
2375
+ "content": "<|im_end|>",
2376
+ "single_word": false,
2377
+ "lstrip": false,
2378
+ "rstrip": false,
2379
+ "normalized": false,
2380
+ "special": true
2381
  }
2382
  ],
2383
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -2095,13 +2095,32 @@
2095
  "rstrip": false,
2096
  "single_word": false,
2097
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2098
  }
2099
  },
 
 
 
2100
  "bos_token": "<BOS>",
2101
  "clean_up_tokenization_spaces": true,
2102
- "eos_token": "<EOS>",
2103
  "extra_special_tokens": {},
2104
- "model_max_length": 8192,
2105
  "pad_token": "<PAD>",
2106
  "padding_side": "right",
2107
  "tokenizer_class": "PreTrainedTokenizerFast",
 
2095
  "rstrip": false,
2096
  "single_word": false,
2097
  "special": true
2098
+ },
2099
+ "50176": {
2100
+ "content": "<|im_start|>",
2101
+ "lstrip": false,
2102
+ "normalized": false,
2103
+ "rstrip": false,
2104
+ "single_word": false,
2105
+ "special": true
2106
+ },
2107
+ "50177": {
2108
+ "content": "<|im_end|>",
2109
+ "lstrip": false,
2110
+ "normalized": false,
2111
+ "rstrip": false,
2112
+ "single_word": false,
2113
+ "special": true
2114
  }
2115
  },
2116
+ "additional_special_tokens": [
2117
+ "<|im_start|>"
2118
+ ],
2119
  "bos_token": "<BOS>",
2120
  "clean_up_tokenization_spaces": true,
2121
+ "eos_token": "<|im_end|>",
2122
  "extra_special_tokens": {},
2123
+ "model_max_length": 1000000000000000019884624838656,
2124
  "pad_token": "<PAD>",
2125
  "padding_side": "right",
2126
  "tokenizer_class": "PreTrainedTokenizerFast",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a892b6dad659c28027919dc2489199621b8670210f52e1ce216af7b24775181
3
  size 6225
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96b29caca31af828c363f434339163116665b126cea9906ed02c22c78eb31622
3
  size 6225