Update gptx_tokenizer.py
Browse files- gptx_tokenizer.py +4 -4
gptx_tokenizer.py
CHANGED
|
@@ -245,10 +245,9 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
|
|
| 245 |
"""
|
| 246 |
output = self.tok.decode(input=token_ids, num_threads=num_threads)
|
| 247 |
if skip_special_tokens:
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
if clean_up_tokenization_spaces:
|
| 253 |
warnings.warn(
|
| 254 |
"when cleaning up tokenization spaces, this will not behave "
|
|
@@ -259,6 +258,7 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
|
|
| 259 |
|
| 260 |
return output
|
| 261 |
|
|
|
|
| 262 |
def _convert_id_to_token(self, index: int) -> str:
|
| 263 |
"""
|
| 264 |
Convert a token ID to its corresponding token string.
|
|
|
|
| 245 |
"""
|
| 246 |
output = self.tok.decode(input=token_ids, num_threads=num_threads)
|
| 247 |
if skip_special_tokens:
|
| 248 |
+
for substring in self.additional_special_tokens:
|
| 249 |
+
output = output.replace(substring, "")
|
| 250 |
+
|
|
|
|
| 251 |
if clean_up_tokenization_spaces:
|
| 252 |
warnings.warn(
|
| 253 |
"when cleaning up tokenization spaces, this will not behave "
|
|
|
|
| 258 |
|
| 259 |
return output
|
| 260 |
|
| 261 |
+
|
| 262 |
def _convert_id_to_token(self, index: int) -> str:
|
| 263 |
"""
|
| 264 |
Convert a token ID to its corresponding token string.
|