Update tokenizer.py
Browse files- tokenizer.py +1 -1
tokenizer.py
CHANGED
|
@@ -773,7 +773,7 @@ class XTTSTokenizerFast(PreTrainedTokenizerFast):
|
|
| 773 |
char_limit = self.char_limits.get(base_lang, 250)
|
| 774 |
|
| 775 |
# Clean and preprocess
|
| 776 |
-
text = self.preprocess_text(text, text_lang)
|
| 777 |
|
| 778 |
# Split text into sentences/chunks based on language
|
| 779 |
chunk_list = split_sentence(text, base_lang, text_split_length=char_limit)
|
|
|
|
| 773 |
char_limit = self.char_limits.get(base_lang, 250)
|
| 774 |
|
| 775 |
# Clean and preprocess
|
| 776 |
+
#text = self.preprocess_text(text, text_lang) we do this in the hidden function
|
| 777 |
|
| 778 |
# Split text into sentences/chunks based on language
|
| 779 |
chunk_list = split_sentence(text, base_lang, text_split_length=char_limit)
|