update tokenizer

Files changed (4) hide show

README.md CHANGED Viewed

@@ -59,7 +59,7 @@ tags:
 pip install transformers==4.45.0 accelerate==0.34.1 sentencepiece==0.2.0 torchvision requests torch Pillow
 pip install flash-attn --no-build-isolation
-# For better performance, you can install grouped-gemm, which may take 3-5 minutes to install
 pip install grouped_gemm==0.1.6
 ```

 pip install transformers==4.45.0 accelerate==0.34.1 sentencepiece==0.2.0 torchvision requests torch Pillow
 pip install flash-attn --no-build-isolation
+# For better inference performance, you can install grouped-gemm, which may take 3-5 minutes to install
 pip install grouped_gemm==0.1.6
 ```

added_tokens.json DELETED Viewed

@@ -1,4 +0,0 @@
-{
-  "</s>": 100353,
-  "<s>": 100352
-}

special_tokens_map.json CHANGED Viewed

@@ -1,18 +1,4 @@
 {
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

 {
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -10,28 +10,12 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "100352": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "100353": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
-  "bos_token": "<s>",
   "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "</s>",
   "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": null,

       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
+  "bos_token": null,
   "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": null,
   "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": null,