Update tokenization_chatglm.py
Browse files- tokenization_chatglm.py +2 -1
tokenization_chatglm.py
CHANGED
|
@@ -193,6 +193,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 193 |
num_image_tokens=20000,
|
| 194 |
**kwargs
|
| 195 |
) -> None:
|
|
|
|
| 196 |
super().__init__(
|
| 197 |
do_lower_case=do_lower_case,
|
| 198 |
remove_space=remove_space,
|
|
@@ -218,7 +219,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 218 |
self.mask_token = mask_token
|
| 219 |
self.gmask_token = gmask_token
|
| 220 |
|
| 221 |
-
|
| 222 |
|
| 223 |
""" Initialisation """
|
| 224 |
|
|
|
|
| 193 |
num_image_tokens=20000,
|
| 194 |
**kwargs
|
| 195 |
) -> None:
|
| 196 |
+
self.sp_tokenizer = SPTokenizer(vocab_file, num_image_tokens=num_image_tokens)
|
| 197 |
super().__init__(
|
| 198 |
do_lower_case=do_lower_case,
|
| 199 |
remove_space=remove_space,
|
|
|
|
| 219 |
self.mask_token = mask_token
|
| 220 |
self.gmask_token = gmask_token
|
| 221 |
|
| 222 |
+
|
| 223 |
|
| 224 |
""" Initialisation """
|
| 225 |
|