zai-org
/

LongCite-glm4-9b

Text Generation

Model card Files Files and versions

NeoZ123 commited on Dec 16, 2024

Commit

6161f9f

·

verified ·

1 Parent(s): 79d982a

Update modeling_chatglm.py

Files changed (1) hide show

modeling_chatglm.py +1 -1

modeling_chatglm.py CHANGED Viewed

@@ -1071,7 +1071,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
         def text_split_by_punctuation(original_text, return_dict=False):
             # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text)  # separate period without space
             text = original_text
-            custom_sent_tokenizer = PunktSentenceTokenizer(text)
             punctuations = r"([。；！？])"  # For Chinese support
             separated = custom_sent_tokenizer.tokenize(text)

         def text_split_by_punctuation(original_text, return_dict=False):
             # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text)  # separate period without space
             text = original_text
+            custom_sent_tokenizer = PunktSentenceTokenizer()
             punctuations = r"([。；！？])"  # For Chinese support
             separated = custom_sent_tokenizer.tokenize(text)