Rohitkhatri75436 commited on
Commit
76f9970
·
verified ·
1 Parent(s): 43ce3ee

rohitkhatri75436/fine-tuned-1.7-smollm-flutter-structured

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
- base_model: HuggingFaceTB/SmolLM-135M
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,9 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # smollm_flutter_packages_model_structured_text
16
 
17
- This model is a fine-tuned version of [HuggingFaceTB/SmolLM-135M](https://huggingface.co/HuggingFaceTB/SmolLM-135M) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.9298
20
 
21
  ## Model description
22
 
@@ -42,14 +42,15 @@ The following hyperparameters were used during training:
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
  - num_epochs: 3
 
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
- | 1.29 | 1.0 | 197 | 0.9765 |
51
- | 0.9644 | 2.0 | 394 | 0.9388 |
52
- | 0.9282 | 3.0 | 591 | 0.9298 |
53
 
54
 
55
  ### Framework versions
 
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
+ base_model: HuggingFaceTB/SmolLM2-360M-Instruct
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # smollm_flutter_packages_model_structured_text
16
 
17
+ This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.8510
20
 
21
  ## Model description
22
 
 
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
  - num_epochs: 3
45
+ - mixed_precision_training: Native AMP
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 0.5574 | 1.0 | 197 | 0.8908 |
52
+ | 0.4378 | 2.0 | 394 | 0.8597 |
53
+ | 0.4158 | 3.0 | 591 | 0.8510 |
54
 
55
 
56
  ### Framework versions
config.json CHANGED
@@ -4,26 +4,34 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 0,
8
- "eos_token_id": 0,
9
  "head_dim": 64,
10
  "hidden_act": "silu",
11
- "hidden_size": 576,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 1536,
14
- "max_position_embeddings": 2048,
 
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
- "num_attention_heads": 9,
18
- "num_hidden_layers": 30,
19
- "num_key_value_heads": 3,
20
- "pad_token_id": 0,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
 
23
  "rope_scaling": null,
24
- "rope_theta": 10000.0,
25
  "tie_word_embeddings": true,
26
  "torch_dtype": "float32",
 
 
 
 
 
 
27
  "transformers_version": "4.51.3",
28
  "use_cache": true,
29
  "vocab_size": 49152
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
  "head_dim": 64,
10
  "hidden_act": "silu",
11
+ "hidden_size": 960,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 2560,
14
+ "is_llama_config": true,
15
+ "max_position_embeddings": 8192,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
+ "num_attention_heads": 15,
19
+ "num_hidden_layers": 32,
20
+ "num_key_value_heads": 5,
21
+ "pad_token_id": 2,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
24
+ "rope_interleaved": false,
25
  "rope_scaling": null,
26
+ "rope_theta": 100000,
27
  "tie_word_embeddings": true,
28
  "torch_dtype": "float32",
29
+ "transformers.js_config": {
30
+ "kv_cache_dtype": {
31
+ "fp16": "float16",
32
+ "q4f16": "float16"
33
+ }
34
+ },
35
  "transformers_version": "4.51.3",
36
  "use_cache": true,
37
  "vocab_size": 49152
generation_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "eos_token_id": 0,
 
5
  "transformers_version": "4.51.3"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
  "transformers_version": "4.51.3"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da232ff03f981c68c161322c53128d4dc7e0ce7b083f12eab71ec274302c2043
3
- size 538090408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddadf0060646fcf8f7f70f9c2fc8616cfd1dacaca2548eeefd57327aa81968c6
3
+ size 1447317080
special_tokens_map.json CHANGED
@@ -1,38 +1,29 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|endoftext|>",
4
  "<|im_start|>",
5
- "<|im_end|>",
6
- "<repo_name>",
7
- "<reponame>",
8
- "<file_sep>",
9
- "<filename>",
10
- "<gh_stars>",
11
- "<issue_start>",
12
- "<issue_comment>",
13
- "<issue_closed>",
14
- "<jupyter_start>",
15
- "<jupyter_text>",
16
- "<jupyter_code>",
17
- "<jupyter_output>",
18
- "<jupyter_script>",
19
- "<empty_output>"
20
  ],
21
  "bos_token": {
22
- "content": "<|endoftext|>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false
27
  },
28
  "eos_token": {
29
- "content": "<|endoftext|>",
 
 
 
 
 
 
 
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false
34
  },
35
- "pad_token": "<|endoftext|>",
36
  "unk_token": {
37
  "content": "<|endoftext|>",
38
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
 
3
  "<|im_start|>",
4
+ "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
+ "content": "<|im_start|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
  "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|im_end|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false
26
  },
 
27
  "unk_token": {
28
  "content": "<|endoftext|>",
29
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -139,30 +139,16 @@
139
  }
140
  },
141
  "additional_special_tokens": [
142
- "<|endoftext|>",
143
  "<|im_start|>",
144
- "<|im_end|>",
145
- "<repo_name>",
146
- "<reponame>",
147
- "<file_sep>",
148
- "<filename>",
149
- "<gh_stars>",
150
- "<issue_start>",
151
- "<issue_comment>",
152
- "<issue_closed>",
153
- "<jupyter_start>",
154
- "<jupyter_text>",
155
- "<jupyter_code>",
156
- "<jupyter_output>",
157
- "<jupyter_script>",
158
- "<empty_output>"
159
  ],
160
- "bos_token": "<|endoftext|>",
 
161
  "clean_up_tokenization_spaces": false,
162
- "eos_token": "<|endoftext|>",
163
  "extra_special_tokens": {},
164
- "model_max_length": 1000000000000000019884624838656,
165
- "pad_token": "<|endoftext|>",
166
  "tokenizer_class": "GPT2Tokenizer",
167
  "unk_token": "<|endoftext|>",
168
  "vocab_size": 49152
 
139
  }
140
  },
141
  "additional_special_tokens": [
 
142
  "<|im_start|>",
143
+ "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  ],
145
+ "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
  "clean_up_tokenization_spaces": false,
148
+ "eos_token": "<|im_end|>",
149
  "extra_special_tokens": {},
150
+ "model_max_length": 8192,
151
+ "pad_token": "<|im_end|>",
152
  "tokenizer_class": "GPT2Tokenizer",
153
  "unk_token": "<|endoftext|>",
154
  "vocab_size": 49152
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c643cfa68475b5fd83971253bac8bf1300ee446cfe3a30cbe50662a58c71c90
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:198d4843d34474839bd69fa920dc3c2e3ebdcc74489ba9051a08a7bf8b29261e
3
  size 5304