|
|
from flask import Flask, request, render_template, make_response |
|
|
from flask_sqlalchemy import SQLAlchemy |
|
|
from sentiment_model import preprocess_text, analyze_sentiment, read_file |
|
|
from wordcloud import WordCloud |
|
|
import os |
|
|
import nltk |
|
|
|
|
|
|
|
|
NLTK_DIR = os.environ.get('NLTK_DATA', os.path.join(os.getcwd(), 'nltk_data')) |
|
|
os.makedirs(NLTK_DIR, exist_ok=True) |
|
|
if NLTK_DIR not in nltk.data.path: |
|
|
nltk.data.path.insert(0, NLTK_DIR) |
|
|
|
|
|
|
|
|
for pkg in ['punkt', 'punkt_tab', 'wordnet', 'averaged_perceptron_tagger']: |
|
|
try: |
|
|
nltk.download(pkg, download_dir=NLTK_DIR, quiet=True) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
app = Flask(__name__, static_folder='static') |
|
|
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///sentiment_data.db' |
|
|
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False |
|
|
db = SQLAlchemy(app) |
|
|
|
|
|
|
|
|
class SentimentRecord(db.Model): |
|
|
id = db.Column(db.Integer, primary_key=True) |
|
|
original_text = db.Column(db.Text, nullable=False) |
|
|
cleaned_text = db.Column(db.Text, nullable=False) |
|
|
removed_text = db.Column(db.Text, nullable=False) |
|
|
normalized_text = db.Column(db.Text, nullable=False) |
|
|
tokenized_text = db.Column(db.Text, nullable=False) |
|
|
stemmed_text = db.Column(db.Text, nullable=False) |
|
|
lemmatized_text = db.Column(db.Text, nullable=False) |
|
|
sentiment = db.Column(db.String(20), nullable=False) |
|
|
ner = db.Column(db.Text, nullable=False) |
|
|
pos = db.Column(db.Text, nullable=False) |
|
|
|
|
|
with app.app_context(): |
|
|
db.create_all() |
|
|
|
|
|
|
|
|
analysis_result = {} |
|
|
|
|
|
@app.route('/') |
|
|
def home(): |
|
|
return render_template('index.html', |
|
|
sentiment=None, |
|
|
text=None, |
|
|
file_uploaded=None, |
|
|
model_type='default') |
|
|
|
|
|
@app.route('/analyze', methods=['POST']) |
|
|
def analyze(): |
|
|
global analysis_result |
|
|
text = request.form.get('text', '').strip() |
|
|
file = request.files.get('file') |
|
|
model_type = request.form.get('model_type', 'default') |
|
|
|
|
|
file_uploaded = False |
|
|
if file and file.filename != '': |
|
|
text = read_file(file) |
|
|
file_uploaded = True |
|
|
|
|
|
if not text or len(text.split()) < 4: |
|
|
return render_template('index.html', |
|
|
error='Please provide at least 4 words for analysis.', |
|
|
text=text, |
|
|
model_type=model_type, |
|
|
file_uploaded=file_uploaded) |
|
|
|
|
|
word_count = len(text.split()) |
|
|
if word_count > 300: |
|
|
return render_template('index.html', |
|
|
error='Input text exceeds the 300-word limit.', |
|
|
text=text, |
|
|
model_type=model_type, |
|
|
file_uploaded=file_uploaded) |
|
|
|
|
|
try: |
|
|
|
|
|
cleaned_text, removed_text, normalized_text, tokenized_text, stemmed_text, lemmatized_text, ner, pos = preprocess_text(text) |
|
|
|
|
|
|
|
|
lemmatized_text_joined = " ".join(lemmatized_text) |
|
|
sentiment, probabilities = analyze_sentiment(lemmatized_text_joined, model_type=model_type) |
|
|
|
|
|
|
|
|
neutral_words, positive_words, negative_words = [], [], [] |
|
|
|
|
|
if model_type != 'emotion': |
|
|
for word in lemmatized_text: |
|
|
word_sentiment, _ = analyze_sentiment(word, model_type=model_type) |
|
|
if word_sentiment == 'POSITIVE': |
|
|
positive_words.append(word) |
|
|
elif word_sentiment == 'NEGATIVE': |
|
|
negative_words.append(word) |
|
|
elif word_sentiment == 'NEUTRAL': |
|
|
neutral_words.append(word) |
|
|
|
|
|
word_sentiment_distribution = { |
|
|
'positive': len(positive_words), |
|
|
'neutral': len(neutral_words), |
|
|
'negative': len(negative_words) |
|
|
} |
|
|
else: |
|
|
|
|
|
emotion_counters = { |
|
|
'ANGER': 0, 'DISGUST': 0, 'FEAR': 0, 'JOY': 0, 'NEUTRAL': 0, 'SADNESS': 0, 'SURPRISE': 0 |
|
|
} |
|
|
emotion_words = { |
|
|
'ANGER': [], 'DISGUST': [], 'FEAR': [], 'JOY': [], 'NEUTRAL': [], 'SADNESS': [], 'SURPRISE': [] |
|
|
} |
|
|
for word in lemmatized_text: |
|
|
word_sentiment, _ = analyze_sentiment(word, model_type=model_type) |
|
|
if word_sentiment in emotion_counters: |
|
|
emotion_counters[word_sentiment] += 1 |
|
|
emotion_words[word_sentiment].append(word) |
|
|
|
|
|
word_sentiment_distribution = { |
|
|
'anger': emotion_counters['ANGER'], |
|
|
'disgust': emotion_counters['DISGUST'], |
|
|
'fear': emotion_counters['FEAR'], |
|
|
'joy': emotion_counters['JOY'], |
|
|
'neutral': emotion_counters['NEUTRAL'], |
|
|
'sadness': emotion_counters['SADNESS'], |
|
|
'surprise': emotion_counters['SURPRISE'] |
|
|
} |
|
|
|
|
|
|
|
|
analysis_result = { |
|
|
'sentiment': sentiment, |
|
|
'model_type': model_type, |
|
|
'cleaned_text': cleaned_text, |
|
|
'removed_text': removed_text, |
|
|
'normalized_text': normalized_text, |
|
|
'tokenized_text': tokenized_text, |
|
|
'stemmed_text': stemmed_text, |
|
|
'lemmatized_text': lemmatized_text, |
|
|
'ner': ner, |
|
|
'pos': pos, |
|
|
'original_text': text, |
|
|
'word_sentiment_distribution': word_sentiment_distribution, |
|
|
'positive_words': positive_words, |
|
|
'negative_words': negative_words, |
|
|
'neutral_words': neutral_words if model_type != 'emotion' else [], |
|
|
'emotion_words': emotion_words if model_type == 'emotion' else None |
|
|
} |
|
|
|
|
|
|
|
|
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_joined) |
|
|
wordcloud_path = os.path.join('static', 'wordcloud.png') |
|
|
wordcloud.to_file(wordcloud_path) |
|
|
|
|
|
return render_template('index.html', |
|
|
sentiment=sentiment, |
|
|
cleaned_text=cleaned_text, |
|
|
removed_text=removed_text, |
|
|
normalized_text=normalized_text, |
|
|
tokenized_text=tokenized_text, |
|
|
stemmed_text=" ".join(stemmed_text), |
|
|
lemmatized_text=" ".join(lemmatized_text), |
|
|
ner=ner, |
|
|
pos=pos, |
|
|
probabilities=probabilities, |
|
|
wordcloud_url=wordcloud_path, |
|
|
word_sentiment_distribution=word_sentiment_distribution, |
|
|
positive_words=positive_words, |
|
|
negative_words=negative_words, |
|
|
neutral_words=neutral_words if model_type != 'emotion' else [], |
|
|
emotion_words=emotion_words if model_type == 'emotion' else None, |
|
|
text=text, |
|
|
model_type=model_type, |
|
|
total_words=len(tokenized_text), |
|
|
file_uploaded=file_uploaded) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error: {e}") |
|
|
return render_template('index.html', |
|
|
error='An error occurred during analysis.', |
|
|
text=text, |
|
|
model_type=model_type, |
|
|
file_uploaded=file_uploaded) |
|
|
|
|
|
@app.route('/download') |
|
|
def download_result(): |
|
|
global analysis_result |
|
|
try: |
|
|
if not analysis_result: |
|
|
return "No analysis available for download", 400 |
|
|
|
|
|
|
|
|
content = f""" |
|
|
Sentiment |
|
|
Overall Sentiment: {analysis_result['sentiment']} |
|
|
|
|
|
Model Used |
|
|
Selected Model: {analysis_result['model_type']} |
|
|
|
|
|
Original Text: |
|
|
{analysis_result['original_text']} |
|
|
|
|
|
Text Preprocessing Results |
|
|
Cleaned Text: |
|
|
{analysis_result['cleaned_text']} |
|
|
|
|
|
Removed Text: |
|
|
{analysis_result['removed_text']} |
|
|
|
|
|
Normalized Text: |
|
|
{analysis_result['normalized_text']} |
|
|
|
|
|
Tokenized Text: |
|
|
{', '.join(analysis_result['tokenized_text'])} |
|
|
|
|
|
Stemmed Text: |
|
|
{" ".join(analysis_result['stemmed_text'])} |
|
|
|
|
|
Lemmatized Text: |
|
|
{" ".join(analysis_result['lemmatized_text'])} |
|
|
|
|
|
Named Entities (NER): |
|
|
{', '.join([f"{entity[0]} ({entity[1]})" for entity in analysis_result['ner']])} |
|
|
|
|
|
POS Tags: |
|
|
{', '.join([f"{word} ({tag})" for word, tag in analysis_result['pos']])} |
|
|
|
|
|
Total Words: {len(analysis_result['tokenized_text'])} |
|
|
|
|
|
""" |
|
|
|
|
|
if analysis_result['model_type'] == 'emotion': |
|
|
content += "\nEmotion-Specific Words:\n" |
|
|
for emotion, words in analysis_result['emotion_words'].items(): |
|
|
content += f"{emotion.capitalize()} Words: {len(words)}\n" |
|
|
content += f"{', '.join(words)}\n" |
|
|
|
|
|
|
|
|
else: |
|
|
content += f""" |
|
|
Positive Words: {len(analysis_result['positive_words'])} |
|
|
{', '.join(analysis_result['positive_words'])} |
|
|
|
|
|
Neutral Words: {len(analysis_result['neutral_words'])} |
|
|
{', '.join(analysis_result['neutral_words'])} |
|
|
|
|
|
Negative Words: {len(analysis_result['negative_words'])} |
|
|
{', '.join(analysis_result['negative_words'])} |
|
|
""" |
|
|
|
|
|
|
|
|
response = make_response(content) |
|
|
response.headers["Content-Disposition"] = "attachment; filename=sentiment_analysis_result.txt" |
|
|
response.headers["Content-Type"] = "text/plain" |
|
|
return response |
|
|
except Exception as e: |
|
|
print(f"Error during file download: {e}") |
|
|
return "Error in generating file", 500 |
|
|
|
|
|
if __name__ == '__main__': |
|
|
port = int(os.environ.get('PORT', 7860)) |
|
|
app.run(host='0.0.0.0', port=port) |
|
|
|