Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
|
@@ -103,7 +103,7 @@ def normalize(text):
|
|
| 103 |
text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
|
| 104 |
text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
|
| 105 |
text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
|
| 106 |
-
text = re.sub(r'\b(
|
| 107 |
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
|
| 108 |
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
|
| 109 |
text = re.sub(r'[^\S \n]', ' ', text)
|
|
@@ -111,7 +111,7 @@ def normalize(text):
|
|
| 111 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
| 112 |
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
| 113 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
| 114 |
-
text = re.sub(r'[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
| 115 |
text = re.sub(r'\d*\.\d+', point_num, text)
|
| 116 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
| 117 |
text = re.sub(r'(?<=\d)S', ' S', text)
|
|
|
|
| 103 |
text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
|
| 104 |
text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
|
| 105 |
text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
|
| 106 |
+
text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text)
|
| 107 |
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
|
| 108 |
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
|
| 109 |
text = re.sub(r'[^\S \n]', ' ', text)
|
|
|
|
| 111 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
| 112 |
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
| 113 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
| 114 |
+
text = re.sub(r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
| 115 |
text = re.sub(r'\d*\.\d+', point_num, text)
|
| 116 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
| 117 |
text = re.sub(r'(?<=\d)S', ' S', text)
|