Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,19 +3,46 @@ from transformers import pipeline
|
|
| 3 |
import spacy
|
| 4 |
from textblob import TextBlob
|
| 5 |
from gradio_client import Client
|
|
|
|
| 6 |
|
| 7 |
# Initialize models
|
| 8 |
nlp = spacy.load("en_core_web_sm")
|
| 9 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def preprocess_text(text: str):
|
| 12 |
-
"""Process text and return corrections with position information"""
|
| 13 |
result = {
|
| 14 |
"spell_suggestions": [],
|
| 15 |
"entities": [],
|
| 16 |
"tags": []
|
| 17 |
}
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# Find and record positions of corrections
|
| 20 |
doc = nlp(text)
|
| 21 |
|
|
@@ -43,7 +70,7 @@ def preprocess_text(text: str):
|
|
| 43 |
return text, result
|
| 44 |
|
| 45 |
def preprocess_and_forward(text: str):
|
| 46 |
-
"""Process text and forward to translation service"""
|
| 47 |
original_text, preprocessing_result = preprocess_text(text)
|
| 48 |
|
| 49 |
# Forward original text to translation service
|
|
|
|
| 3 |
import spacy
|
| 4 |
from textblob import TextBlob
|
| 5 |
from gradio_client import Client
|
| 6 |
+
import re
|
| 7 |
|
| 8 |
# Initialize models
|
| 9 |
nlp = spacy.load("en_core_web_sm")
|
| 10 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
| 11 |
|
| 12 |
+
def preprocess_capitalization(text: str) -> str:
|
| 13 |
+
"""Preprocess input text to handle capitalization rules."""
|
| 14 |
+
words = text.split(" ")
|
| 15 |
+
processed_words = []
|
| 16 |
+
|
| 17 |
+
for word in words:
|
| 18 |
+
# Check if the word is an acronym (all uppercase letters)
|
| 19 |
+
if re.match(r"^[A-Z]+$", word):
|
| 20 |
+
processed_words.append(word) # Leave acronyms unchanged
|
| 21 |
+
# Check if the word has mixed capitalization (e.g., "HEllo")
|
| 22 |
+
elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
|
| 23 |
+
processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
|
| 24 |
+
else:
|
| 25 |
+
processed_words.append(word) # Leave other words unchanged
|
| 26 |
+
|
| 27 |
+
return " ".join(processed_words)
|
| 28 |
+
|
| 29 |
def preprocess_text(text: str):
|
| 30 |
+
"""Process text and return corrections with position information."""
|
| 31 |
result = {
|
| 32 |
"spell_suggestions": [],
|
| 33 |
"entities": [],
|
| 34 |
"tags": []
|
| 35 |
}
|
| 36 |
|
| 37 |
+
# Apply capitalization preprocessing
|
| 38 |
+
capitalized_text = preprocess_capitalization(text)
|
| 39 |
+
if capitalized_text != text:
|
| 40 |
+
result["spell_suggestions"].append({
|
| 41 |
+
"original": text,
|
| 42 |
+
"corrected": capitalized_text
|
| 43 |
+
})
|
| 44 |
+
text = capitalized_text # Update text for further processing
|
| 45 |
+
|
| 46 |
# Find and record positions of corrections
|
| 47 |
doc = nlp(text)
|
| 48 |
|
|
|
|
| 70 |
return text, result
|
| 71 |
|
| 72 |
def preprocess_and_forward(text: str):
|
| 73 |
+
"""Process text and forward to translation service."""
|
| 74 |
original_text, preprocessing_result = preprocess_text(text)
|
| 75 |
|
| 76 |
# Forward original text to translation service
|