Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,30 @@ from nltk import sent_tokenize
|
|
| 8 |
import uvicorn
|
| 9 |
from fastapi import FastAPI
|
| 10 |
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
class AspectSentimentPipeline(Pipeline):
|
| 13 |
def __init__(self, aspect_extraction_model, aspect_extraction_tokenizer, aspect_sentiment_model, aspect_sentiment_tokenizer, device):
|
|
@@ -28,6 +52,7 @@ class AspectSentimentPipeline(Pipeline):
|
|
| 28 |
main_results = []
|
| 29 |
main_aspects = []
|
| 30 |
for sentence in sentences:
|
|
|
|
| 31 |
aspects = self.extract_aspects(sentence, self.aspect_extraction_model, self.aspect_extraction_tokenizer, self.device)
|
| 32 |
for aspect in aspects:
|
| 33 |
main_aspects.append(aspect)
|
|
|
|
| 8 |
import uvicorn
|
| 9 |
from fastapi import FastAPI
|
| 10 |
from pydantic import BaseModel, Field
|
| 11 |
+
import re
|
| 12 |
+
import emoji
|
| 13 |
+
|
| 14 |
+
stop_words = [x.strip() for x in open('stop-words.tr.txt','r', encoding="UTF8").read().split('\n')]
|
| 15 |
+
|
| 16 |
+
def preprocess_text(text):
|
| 17 |
+
text = re.sub(r"http\S+", "", text)
|
| 18 |
+
text = re.sub('http[s]?://\S+', '', text)
|
| 19 |
+
text = re.sub('http://\S+|https://\S+', '', text)
|
| 20 |
+
text = re.sub(r'http\S+', '', text)
|
| 21 |
+
text = re.sub(r'www\S+', '', text)
|
| 22 |
+
text = ' '.join(word for word in text.split() if not word[0] == "#")
|
| 23 |
+
text = re.sub('a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '', text)
|
| 24 |
+
text = re.sub(r'<[^>]+>', '', text)
|
| 25 |
+
text = re.sub('[0-9]+', '', text)
|
| 26 |
+
text = re.sub("\n", " ", text)
|
| 27 |
+
text = re.sub(r"\.x*", " ", text)
|
| 28 |
+
text = re.sub(r'[^\w\s\+\-_]', '', text)
|
| 29 |
+
text = emoji.replace_emoji(text)
|
| 30 |
+
text = ' '.join([word for word in text.split() if word.lower() not in stop_words])
|
| 31 |
+
text = re.sub(r'(mısın|misin|musun|müsün)$', r' \1', ' '.join(re.sub(r'(mısın|misin|musun|müsün)$', r'\1', word) for word in text.split()))
|
| 32 |
+
text = ' '.join(re.sub(r'(de|da)$', r' \1', word) for word in text.split())
|
| 33 |
+
text = ' '.join([t for t in text.split() if len(t) > 1])
|
| 34 |
+
return text
|
| 35 |
|
| 36 |
class AspectSentimentPipeline(Pipeline):
|
| 37 |
def __init__(self, aspect_extraction_model, aspect_extraction_tokenizer, aspect_sentiment_model, aspect_sentiment_tokenizer, device):
|
|
|
|
| 52 |
main_results = []
|
| 53 |
main_aspects = []
|
| 54 |
for sentence in sentences:
|
| 55 |
+
sentence = preprocess(sentence)
|
| 56 |
aspects = self.extract_aspects(sentence, self.aspect_extraction_model, self.aspect_extraction_tokenizer, self.device)
|
| 57 |
for aspect in aspects:
|
| 58 |
main_aspects.append(aspect)
|