Update modeling_chatglm.py
Browse files- modeling_chatglm.py +1 -1
modeling_chatglm.py
CHANGED
|
@@ -1071,7 +1071,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
| 1071 |
def text_split_by_punctuation(original_text, return_dict=False):
|
| 1072 |
# text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
|
| 1073 |
text = original_text
|
| 1074 |
-
custom_sent_tokenizer = PunktSentenceTokenizer(
|
| 1075 |
punctuations = r"([。;!?])" # For Chinese support
|
| 1076 |
|
| 1077 |
separated = custom_sent_tokenizer.tokenize(text)
|
|
|
|
| 1071 |
def text_split_by_punctuation(original_text, return_dict=False):
|
| 1072 |
# text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
|
| 1073 |
text = original_text
|
| 1074 |
+
custom_sent_tokenizer = PunktSentenceTokenizer()
|
| 1075 |
punctuations = r"([。;!?])" # For Chinese support
|
| 1076 |
|
| 1077 |
separated = custom_sent_tokenizer.tokenize(text)
|