Update utils.py
Browse files
utils.py
CHANGED
|
@@ -18,7 +18,7 @@ def get_tokenizer(id):
|
|
| 18 |
def get_tokenization(tokenizer, text):
|
| 19 |
logging.debug(f"Tokenizing text: {text}")
|
| 20 |
ids = tokenizer.encode(text).ids
|
| 21 |
-
string_tokens =
|
| 22 |
logging.debug(f"Tokens: {string_tokens}")
|
| 23 |
return string_tokens
|
| 24 |
|
|
|
|
| 18 |
def get_tokenization(tokenizer, text):
|
| 19 |
logging.debug(f"Tokenizing text: {text}")
|
| 20 |
ids = tokenizer.encode(text).ids
|
| 21 |
+
string_tokens = [tokenizer.id_to_token(id) for id in ids]
|
| 22 |
logging.debug(f"Tokens: {string_tokens}")
|
| 23 |
return string_tokens
|
| 24 |
|