token_alignment (#13)
Browse files- tokenizer alignment (48d1631122017c80068ff7cf45d1138820a6142b)
- special_tokens_map.json +0 -7
- tokenizer.json +0 -9
- tokenizer_config.json +1 -10
special_tokens_map.json
CHANGED
|
@@ -13,13 +13,6 @@
|
|
| 13 |
"normalized": false,
|
| 14 |
"rstrip": false,
|
| 15 |
"single_word": false
|
| 16 |
-
},
|
| 17 |
-
{
|
| 18 |
-
"content": "<end_of_utterance>",
|
| 19 |
-
"lstrip": false,
|
| 20 |
-
"normalized": false,
|
| 21 |
-
"rstrip": false,
|
| 22 |
-
"single_word": false
|
| 23 |
}
|
| 24 |
],
|
| 25 |
"bos_token": {
|
|
|
|
| 13 |
"normalized": false,
|
| 14 |
"rstrip": false,
|
| 15 |
"single_word": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
}
|
| 17 |
],
|
| 18 |
"bos_token": {
|
tokenizer.json
CHANGED
|
@@ -866,15 +866,6 @@
|
|
| 866 |
"rstrip": false,
|
| 867 |
"normalized": false,
|
| 868 |
"special": true
|
| 869 |
-
},
|
| 870 |
-
{
|
| 871 |
-
"id": 100352,
|
| 872 |
-
"content": "<end_of_utterance>",
|
| 873 |
-
"single_word": false,
|
| 874 |
-
"lstrip": false,
|
| 875 |
-
"rstrip": false,
|
| 876 |
-
"normalized": false,
|
| 877 |
-
"special": true
|
| 878 |
}
|
| 879 |
],
|
| 880 |
"normalizer": null,
|
|
|
|
| 866 |
"rstrip": false,
|
| 867 |
"normalized": false,
|
| 868 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 869 |
}
|
| 870 |
],
|
| 871 |
"normalizer": null,
|
tokenizer_config.json
CHANGED
|
@@ -769,20 +769,11 @@
|
|
| 769 |
"rstrip": false,
|
| 770 |
"single_word": false,
|
| 771 |
"special": true
|
| 772 |
-
},
|
| 773 |
-
"100352": {
|
| 774 |
-
"content": "<end_of_utterance>",
|
| 775 |
-
"lstrip": false,
|
| 776 |
-
"normalized": false,
|
| 777 |
-
"rstrip": false,
|
| 778 |
-
"single_word": false,
|
| 779 |
-
"special": true
|
| 780 |
}
|
| 781 |
},
|
| 782 |
"additional_special_tokens": [
|
| 783 |
"<fake_token_around_image>",
|
| 784 |
-
"<image>"
|
| 785 |
-
"<end_of_utterance>"
|
| 786 |
],
|
| 787 |
"bos_token": "<|start_of_role|>",
|
| 788 |
"clean_up_tokenization_spaces": false,
|
|
|
|
| 769 |
"rstrip": false,
|
| 770 |
"single_word": false,
|
| 771 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 772 |
}
|
| 773 |
},
|
| 774 |
"additional_special_tokens": [
|
| 775 |
"<fake_token_around_image>",
|
| 776 |
+
"<image>"
|
|
|
|
| 777 |
],
|
| 778 |
"bos_token": "<|start_of_role|>",
|
| 779 |
"clean_up_tokenization_spaces": false,
|