Spaces:

apasalic
/

sentiment-analyzer

Sleeping

App Files Files Community

sentiment-analyzer / app.py

apasalic

Update app.py

4fd5693 verified 9 months ago

raw

history blame

6.64 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline
	import re

	# Model configurations
	MODELS = {
	"English": "MarieAngeA13/Sentiment-Analysis-BERT",
	"Danish": "larskjeldgaard/senda"
	}

	# Page config
	st.set_page_config(
	page_title="Multi-language Sentiment Analyzer",
	page_icon="🎭",
	layout="wide"
	)

	# Load custom CSS
	with open('style.css') as f:
	st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)

	def process_sentiment(text, pipeline):
	"""Process sentiment for a single text entry"""
	try:
	result = pipeline(str(text))
	# Convert sentiment to lowercase
	return result[0]['label'].lower(), result[0]['score']
	except Exception as e:
	st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}")
	return "unknown", 0.0

	# App layout
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.title("🎭 Multi-language Sentiment Analysis")

	selected_language = st.selectbox(
	"Select Language",
	options=list(MODELS.keys()),
	index=0
	)

	st.markdown("""
	<div class="privacy-notice">
	⚠️ <b>Privacy Notice:</b> Your data is processed in memory and not stored.
	</div>
	""", unsafe_allow_html=True)

	uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"])

	if uploaded_file:
	try:
	df = pd.read_csv(uploaded_file)
	if "text" not in df.columns:
	st.error("CSV must contain a 'text' column")
	else:
	with st.spinner(f"📊 Analyzing sentiments in {selected_language}..."):
	def clean_transcript_text(text):
	speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+'
	timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+'
	cleaned_text = re.sub(speaker_timestamp_pattern, '', text)
	if cleaned_text == text:
	cleaned_text = re.sub(timestamp_pattern, '', text)
	cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
	return cleaned_text.strip()

	df['cleaned_text'] = df['text'].apply(clean_transcript_text)

	sentiment_pipeline = pipeline(
	"text-classification",
	model=MODELS[selected_language],
	truncation=True,
	max_length=512
	)

	results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]]
	df["sentiment"] = [r[0] for r in results]
	df["confidence"] = [r[1] for r in results]

	st.markdown("### 📈 Analysis Results")

	# Fix the sentiment counting logic
	if selected_language == 'English':
	pos_count = len(df[df["sentiment"] == "positive"])
	neu_count = len(df[df["sentiment"] == "neutral"])
	neg_count = len(df[df["sentiment"] == "negative"])
	else: # Danish
	pos_count = len(df[df["sentiment"] == "positiv"])
	neu_count = len(df[df["sentiment"] == "neutral"])
	neg_count = len(df[df["sentiment"] == "negativ"])

	metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)

	with metric_col1:
	st.metric(
	"Positive Sentiments",
	f"{pos_count} ({pos_count/len(df)*100:.1f}%)"
	)
	with metric_col2:
	st.metric(
	"Neutral Sentiments",
	f"{neu_count} ({neu_count/len(df)*100:.1f}%)"
	)
	with metric_col3:
	st.metric(
	"Negative Sentiments",
	f"{neg_count} ({neg_count/len(df)*100:.1f}%)"
	)
	with metric_col4:
	st.metric(
	"Average Confidence",
	f"{df['confidence'].mean():.1%}"
	)

	st.markdown("#### Preview")

	preview_df = df[["cleaned_text", "sentiment", "confidence"]].head()
	preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}")

	def highlight_sentiment(val):
	if val in ["positive", "positiv"]:
	return 'background-color: rgba(0, 255, 0, 0.2)'
	elif val in ["negative", "negativ"]:
	return 'background-color: rgba(255, 0, 0, 0.2)'
	elif val == "neutral":
	return 'background-color: rgba(128, 128, 128, 0.2)'
	return ''

	st.dataframe(
	preview_df.style.applymap(highlight_sentiment, subset=['sentiment']),
	use_container_width=True
	)

	st.markdown("### 💾 Download Results")
	csv_data = df.to_csv(index=False)
	st.download_button(
	label="Download Complete Analysis",
	data=csv_data,
	file_name=f"sentiment_results_{selected_language.lower()}.csv",
	mime="text/csv"
	)

	except Exception as e:
	st.error(f"Error processing file: {str(e)}")
	st.error("Full error details:")
	st.code(str(e))
	else:
	st.markdown("""
	<div class="instructions">
	<h4>📝 How to use:</h4>
	<ol>
	<li>Select your desired language</li>
	<li>Prepare a CSV file with a column named "text"</li>
	<li>Upload your file using the button above</li>
	<li>Wait for the analysis to complete</li>
	<li>Download the results with sentiment labels</li>
	</ol>
	</div>
	""", unsafe_allow_html=True)