Update tokenizer to fix bug
Browse files- tokenization_qwen.py +2 -1
tokenization_qwen.py
CHANGED
|
@@ -353,7 +353,8 @@ class QWenTokenizer(PreTrainedTokenizer):
|
|
| 353 |
else:
|
| 354 |
_tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
|
| 355 |
return [{'text': b''.join(map(_tobytes, map(self.decoder.get, tokens))).decode('utf-8')}]
|
| 356 |
-
|
|
|
|
| 357 |
return [{key: val}]
|
| 358 |
|
| 359 |
return _replace_closed_tag(
|
|
|
|
| 353 |
else:
|
| 354 |
_tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
|
| 355 |
return [{'text': b''.join(map(_tobytes, map(self.decoder.get, tokens))).decode('utf-8')}]
|
| 356 |
+
_tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
|
| 357 |
+
val = b''.join(map(_tobytes, map(self.decoder.get, tokens[1:-1]))).decode('utf-8')
|
| 358 |
return [{key: val}]
|
| 359 |
|
| 360 |
return _replace_closed_tag(
|