Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from transformers import pipeline | |
| import re | |
| # Model configurations | |
| MODELS = { | |
| "English": "MarieAngeA13/Sentiment-Analysis-BERT", | |
| "Danish": "larskjeldgaard/senda" | |
| } | |
| # Page config | |
| st.set_page_config( | |
| page_title="Multi-language Sentiment Analyzer", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # Load custom CSS | |
| with open('style.css') as f: | |
| st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
| def process_sentiment(text, pipeline): | |
| """Process sentiment for a single text entry""" | |
| try: | |
| result = pipeline(str(text)) | |
| # Convert sentiment to lowercase | |
| return result[0]['label'].lower(), result[0]['score'] | |
| except Exception as e: | |
| st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}") | |
| return "unknown", 0.0 | |
| # App layout | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| st.title("π Multi-language Sentiment Analysis") | |
| selected_language = st.selectbox( | |
| "Select Language", | |
| options=list(MODELS.keys()), | |
| index=0 | |
| ) | |
| st.markdown(""" | |
| <div class="privacy-notice"> | |
| β οΈ <b>Privacy Notice:</b> Your data is processed in memory and not stored. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"]) | |
| if uploaded_file: | |
| try: | |
| df = pd.read_csv(uploaded_file) | |
| if "text" not in df.columns: | |
| st.error("CSV must contain a 'text' column") | |
| else: | |
| with st.spinner(f"π Analyzing sentiments in {selected_language}..."): | |
| def clean_transcript_text(text): | |
| speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+' | |
| timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+' | |
| cleaned_text = re.sub(speaker_timestamp_pattern, '', text) | |
| if cleaned_text == text: | |
| cleaned_text = re.sub(timestamp_pattern, '', text) | |
| cleaned_text = re.sub(r'\s+', ' ', cleaned_text) | |
| return cleaned_text.strip() | |
| df['cleaned_text'] = df['text'].apply(clean_transcript_text) | |
| sentiment_pipeline = pipeline( | |
| "text-classification", | |
| model=MODELS[selected_language], | |
| truncation=True, | |
| max_length=512 | |
| ) | |
| results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]] | |
| df["sentiment"] = [r[0] for r in results] | |
| df["confidence"] = [r[1] for r in results] | |
| st.markdown("### π Analysis Results") | |
| # Fix the sentiment counting logic | |
| if selected_language == 'English': | |
| pos_count = len(df[df["sentiment"] == "positive"]) | |
| neu_count = len(df[df["sentiment"] == "neutral"]) | |
| neg_count = len(df[df["sentiment"] == "negative"]) | |
| else: # Danish | |
| pos_count = len(df[df["sentiment"] == "positiv"]) | |
| neu_count = len(df[df["sentiment"] == "neutral"]) | |
| neg_count = len(df[df["sentiment"] == "negativ"]) | |
| metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4) | |
| with metric_col1: | |
| st.metric( | |
| "Positive Sentiments", | |
| f"{pos_count} ({pos_count/len(df)*100:.1f}%)" | |
| ) | |
| with metric_col2: | |
| st.metric( | |
| "Neutral Sentiments", | |
| f"{neu_count} ({neu_count/len(df)*100:.1f}%)" | |
| ) | |
| with metric_col3: | |
| st.metric( | |
| "Negative Sentiments", | |
| f"{neg_count} ({neg_count/len(df)*100:.1f}%)" | |
| ) | |
| with metric_col4: | |
| st.metric( | |
| "Average Confidence", | |
| f"{df['confidence'].mean():.1%}" | |
| ) | |
| st.markdown("#### Preview") | |
| preview_df = df[["cleaned_text", "sentiment", "confidence"]].head() | |
| preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}") | |
| def highlight_sentiment(val): | |
| if val in ["positive", "positiv"]: | |
| return 'background-color: rgba(0, 255, 0, 0.2)' | |
| elif val in ["negative", "negativ"]: | |
| return 'background-color: rgba(255, 0, 0, 0.2)' | |
| elif val == "neutral": | |
| return 'background-color: rgba(128, 128, 128, 0.2)' | |
| return '' | |
| st.dataframe( | |
| preview_df.style.applymap(highlight_sentiment, subset=['sentiment']), | |
| use_container_width=True | |
| ) | |
| st.markdown("### πΎ Download Results") | |
| csv_data = df.to_csv(index=False) | |
| st.download_button( | |
| label="Download Complete Analysis", | |
| data=csv_data, | |
| file_name=f"sentiment_results_{selected_language.lower()}.csv", | |
| mime="text/csv" | |
| ) | |
| except Exception as e: | |
| st.error(f"Error processing file: {str(e)}") | |
| st.error("Full error details:") | |
| st.code(str(e)) | |
| else: | |
| st.markdown(""" | |
| <div class="instructions"> | |
| <h4>π How to use:</h4> | |
| <ol> | |
| <li>Select your desired language</li> | |
| <li>Prepare a CSV file with a column named "text"</li> | |
| <li>Upload your file using the button above</li> | |
| <li>Wait for the analysis to complete</li> | |
| <li>Download the results with sentiment labels</li> | |
| </ol> | |
| </div> | |
| """, unsafe_allow_html=True) |