import os
import streamlit as st
import tempfile
import subprocess
import requests
from urllib.parse import urlparse
import json
import torch
import torchaudio

# Set audio backend like in your working code
torchaudio.set_audio_backend("soundfile")

# Set cache directories for HuggingFace models
# Ensure this directory exists and is writable
cache_dir = "/tmp/hf_cache" # This is a common writable location on Linux/Docker
os.makedirs(cache_dir, exist_ok=True)
os.environ["HF_HOME"] = cache_dir
os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir

# --- Accent Model Cache Directory ---
# Create a dedicated directory for SpeechBrain models within the accessible cache
# Ensure this path is fully prepared and writable
speechbrain_model_cache_base = os.path.join(cache_dir, "speechbrain_models_accent_id")
os.makedirs(speechbrain_model_cache_base, exist_ok=True)

# The specific model's subdirectory within the cache
# This is the full path that 'savedir' should point to
model_save_path = os.path.join(speechbrain_model_cache_base, "accent-id-commonaccent_xlsr-en-english")
os.makedirs(model_save_path, exist_ok=True) # Ensure this specific model directory exists and is writable
# --- End Accent Model Cache Directory ---

# Try importing the accent detection model
try:
    from speechbrain.pretrained.interfaces import foreign_class
    MODEL_AVAILABLE = True

    @st.cache_resource
    def load_accent_model():
        """Load the XLSR Wav2Vec 2.0 accent classification model"""
        try:
            st.info(f"Attempting to load model from: {model_save_path}")
            model = foreign_class(
                source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
                pymodule_file="custom_interface.py",
                classname="CustomEncoderWav2vec22Classifier", # Note: Double check if this is the correct classname. It was CustomEncoderWav2vec2Classifier in the original.
                savedir=model_save_path # Use the pre-prepared full path
            )
            return model
        except Exception as e:
            st.error(f"Failed to load accent model: [Errno 13] Permission denied: '{e}' - Please ensure '{model_save_path}' is writable.")
            st.error(f"Detailed Error: {e}")
            return None
except ImportError:
    MODEL_AVAILABLE = False
    st.error("SpeechBrain not available. Install with: pip install speechbrain")

# Accent categories with confidence thresholds
ACCENT_CATEGORIES = [
    "US", "England", "Australia", "Indian", "Canada",
    "Scotland", "Ireland", "Wales", "African", "NewZealand",
    "Bermuda", "Malaysia", "Philippines", "Singapore",
    "HongKong", "SouthAtlantic"]

def download_video_audio(url, output_path):
    """Download and extract audio from video URL"""
    try:
        # Check if it's a direct video file
        if url.endswith(('.mp4', '.avi', '.mov', '.mkv')):
            # Download direct video file
            response = requests.get(url, stream=True, timeout=30)
            response.raise_for_status()

            temp_video = output_path.replace('.wav', '.mp4')
            with open(temp_video, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)

            # Extract audio using ffmpeg
            cmd = [
                'ffmpeg', '-i', temp_video, '-ar', '16000',
                '-ac', '1', '-y', output_path
            ]
            subprocess.run(cmd, check=True, capture_output=True)
            os.remove(temp_video)
            return True

        else:
            # Use yt-dlp for other video platforms (Loom, YouTube, etc.)
            cmd = [
                'yt-dlp', '--extract-audio', '--audio-format', 'wav',
                '--audio-quality', '0', '--output', output_path.replace('.wav', '.%(ext)s'),
                url
            ]
            result = subprocess.run(cmd, capture_output=True, text=True)

            if result.returncode == 0:
                return True
            else:
                st.error(f"yt-dlp error: {result.stderr}")
                return False

    except Exception as e:
        st.error(f"Download failed: {e}")
        return False
def analyze_accent(audio_file_path, model):
    """Analyze accent using XLSR Wav2Vec 2.0 model"""
    try:
        # Get predictions from the model - same as your working code
        out_prob, score, index, text_lab = model.classify_file(audio_file_path)

        # Convert probabilities to dictionary - same approach as your working code
        probs = out_prob.squeeze().numpy()
        accent_scores = {
            ACCENT_CATEGORIES[i]: float(probs[i]) * 100
             for i in range(len(ACCENT_CATEGORIES))
        }

        # Get top prediction - same as your working code
        predicted_accent = text_lab  # Use text_lab like your working code
        confidence = float(score) * 100

        return predicted_accent, confidence, accent_scores

    except Exception as e:
        st.error(f"Accent analysis failed: {e}")
        return None, None, None
def generate_summary(accent, confidence, top_scores):
    """Generate a summary of the accent analysis"""
    if confidence > 80:
        confidence_level = "Very High"
    elif confidence > 60:
        confidence_level = "High"
    elif confidence > 40:
        confidence_level = "Moderate"
    else:
        confidence_level = "Low"

    # Get top 3 accents
    top_3 = sorted(top_scores.items(), key=lambda x: x[1], reverse=True)[:3]

    summary = f"""
    **Primary Accent:** {accent} ({confidence:.1f}% confidence)
    **Confidence Level:** {confidence_level}

    **Top 3 Detected Accents:**
    1. {top_3[0][0]}: {top_3[0][1]:.1f}%
    2. {top_3[1][0]}: {top_3[1][1]:.1f}%
    3. {top_3[2][0]}: {top_3[2][1]:.1f}%

    **Hiring Recommendation:**
    """

    if confidence > 70:
        summary += "✅ Strong English accent detected - Suitable for English-speaking roles"
    elif confidence > 50:
        summary += "⚠️ Moderate English accent detected - May require accent assessment"
    else:
        summary += "❌ Weak English accent signal - Further evaluation recommended"

    return summary
def main():
    st.set_page_config(
        page_title="English Accent Detector",
        page_icon="🗣️",
        layout="wide"
    )

    st.title("🗣️ English Accent Detection Tool")
    st.subheader("For Hiring & Language Assessment")

    st.markdown("""
    **Purpose:** Analyze spoken English accents from video URLs to assist in hiring decisions.

    **Supported:** Loom videos, direct MP4 links, YouTube, and other video platforms.
    """)

    # Load model
    if not MODEL_AVAILABLE:
        st.stop()

    with st.spinner("Loading XLSR Wav2Vec 2.0 model..."):
        model = load_accent_model()
        if not model:
            st.error("❌ Could not load accent detection model")
            st.stop()
        st.success("✅ Accent detection model loaded successfully!")

    # Input section
    st.markdown("---")
    st.subheader("📥 Video Input")

    video_url = st.text_input(
        "Enter Video URL",
        placeholder="https://www.loom.com/share/... or direct MP4 link",
        help="Supports Loom, YouTube, direct video files, and most video platforms"
    )

    if video_url:
        st.info(f"🔗 **URL:** {video_url}")

        if st.button("🎯 Analyze Accent", type="primary"):
            # Create temporary file for audio
            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
                audio_path = tmp_file.name

            try:
                # Step 1: Download and extract audio
                with st.spinner("📥 Downloading video and extracting audio..."):
                    if download_video_audio(video_url, audio_path):
                        st.success("✅ Audio extracted successfully")

                        # Play the extracted audio
                        with open(audio_path, 'rb') as audio_file:
                            st.audio(audio_file.read(), format="audio/wav")
                    else:
                        st.error("❌ Failed to extract audio")
                        st.stop()

                # Step 2: Analyze accent
                with st.spinner("🧠 Analyzing accent with XLSR Wav2Vec 2.0..."):
                    accent, confidence, accent_scores = analyze_accent(audio_path, model)

                if accent:
                    # Display results
                    st.markdown("---")
                    st.subheader("📊 Analysis Results")

                    # Main result
                    col1, col2 = st.columns(2)

                    with col1:
                        st.metric(
                            label="🎯 Detected Accent",
                            value=accent,
                            help="Primary English accent classification"
                        )

                    with col2:
                        st.metric(
                            label="🎪 Confidence Score",
                            value=f"{confidence:.1f}%",
                            help="Model confidence in the prediction"
                        )

                    # Detailed breakdown
                    st.subheader("📈 Accent Probability Breakdown")

                    # Sort and display top 8 accents
                    sorted_accents = sorted(accent_scores.items(), key=lambda x: x[1], reverse=True)[:8]

                    for accent_name, score in sorted_accents:
                        st.progress(score/100, text=f"{accent_name}: {score:.1f}%")

                    # Summary
                    st.subheader("📝 Assessment Summary")
                    summary = generate_summary(accent, confidence, accent_scores)
                    st.markdown(summary)

                    # JSON output for API integration
                    with st.expander("🔧 JSON Output (for API integration)"):
                        result_json = {
                            "primary_accent": accent,
                            "confidence_score": round(confidence, 1),
                            "accent_probabilities": {k: round(v, 1) for k, v in accent_scores.items()},
                            "top_3_accents": [
                                {"accent": k, "probability": round(v, 1)}
                                 for k, v in sorted(accent_scores.items(), key=lambda x: x[1], reverse=True)[:3]
                            ],
                            "recommendation": "suitable" if confidence > 70 else "assessment_needed" if confidence > 50 else "further_evaluation"
                        }
                        st.json(result_json)

                else:
                    st.error("❌ Accent analysis failed")

            finally:
                # Cleanup
                if os.path.exists(audio_path):
                    os.remove(audio_path)

    # Footer
    st.markdown("---")
    st.markdown("""
    **Technical Details:**
    - Model: XLSR Wav2Vec 2.0 (95% accuracy on English accents)
    - Supports: 16 English accent varieties
    - Processing: Automatic audio extraction and resampling to 16kHz

    **Built for hiring teams to assess English language proficiency**
    """)
if __name__ == "__main__":
    main()