Spaces:
Build error
Build error
| import os | |
| import streamlit as st | |
| import tempfile | |
| import subprocess | |
| import requests | |
| from urllib.parse import urlparse | |
| import json | |
| import torch | |
| import torchaudio | |
| # Set audio backend like in your working code | |
| torchaudio.set_audio_backend("soundfile") | |
| # Set cache directories for HuggingFace models | |
| # Ensure this directory exists and is writable | |
| cache_dir = "/tmp/hf_cache" # This is a common writable location on Linux/Docker | |
| os.makedirs(cache_dir, exist_ok=True) | |
| os.environ["HF_HOME"] = cache_dir | |
| os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir | |
| # --- Accent Model Cache Directory --- | |
| # Create a dedicated directory for SpeechBrain models within the accessible cache | |
| # Ensure this path is fully prepared and writable | |
| speechbrain_model_cache_base = os.path.join(cache_dir, "speechbrain_models_accent_id") | |
| os.makedirs(speechbrain_model_cache_base, exist_ok=True) | |
| # The specific model's subdirectory within the cache | |
| # This is the full path that 'savedir' should point to | |
| model_save_path = os.path.join(speechbrain_model_cache_base, "accent-id-commonaccent_xlsr-en-english") | |
| os.makedirs(model_save_path, exist_ok=True) # Ensure this specific model directory exists and is writable | |
| # --- End Accent Model Cache Directory --- | |
| # Try importing the accent detection model | |
| try: | |
| from speechbrain.pretrained.interfaces import foreign_class | |
| MODEL_AVAILABLE = True | |
| def load_accent_model(): | |
| """Load the XLSR Wav2Vec 2.0 accent classification model""" | |
| try: | |
| st.info(f"Attempting to load model from: {model_save_path}") | |
| model = foreign_class( | |
| source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", | |
| pymodule_file="custom_interface.py", | |
| classname="CustomEncoderWav2vec22Classifier", # Note: Double check if this is the correct classname. It was CustomEncoderWav2vec2Classifier in the original. | |
| savedir=model_save_path # Use the pre-prepared full path | |
| ) | |
| return model | |
| except Exception as e: | |
| st.error(f"Failed to load accent model: [Errno 13] Permission denied: '{e}' - Please ensure '{model_save_path}' is writable.") | |
| st.error(f"Detailed Error: {e}") | |
| return None | |
| except ImportError: | |
| MODEL_AVAILABLE = False | |
| st.error("SpeechBrain not available. Install with: pip install speechbrain") | |
| # Accent categories with confidence thresholds | |
| ACCENT_CATEGORIES = [ | |
| "US", "England", "Australia", "Indian", "Canada", | |
| "Scotland", "Ireland", "Wales", "African", "NewZealand", | |
| "Bermuda", "Malaysia", "Philippines", "Singapore", | |
| "HongKong", "SouthAtlantic"] | |
| def download_video_audio(url, output_path): | |
| """Download and extract audio from video URL""" | |
| try: | |
| # Check if it's a direct video file | |
| if url.endswith(('.mp4', '.avi', '.mov', '.mkv')): | |
| # Download direct video file | |
| response = requests.get(url, stream=True, timeout=30) | |
| response.raise_for_status() | |
| temp_video = output_path.replace('.wav', '.mp4') | |
| with open(temp_video, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| # Extract audio using ffmpeg | |
| cmd = [ | |
| 'ffmpeg', '-i', temp_video, '-ar', '16000', | |
| '-ac', '1', '-y', output_path | |
| ] | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| os.remove(temp_video) | |
| return True | |
| else: | |
| # Use yt-dlp for other video platforms (Loom, YouTube, etc.) | |
| cmd = [ | |
| 'yt-dlp', '--extract-audio', '--audio-format', 'wav', | |
| '--audio-quality', '0', '--output', output_path.replace('.wav', '.%(ext)s'), | |
| url | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode == 0: | |
| return True | |
| else: | |
| st.error(f"yt-dlp error: {result.stderr}") | |
| return False | |
| except Exception as e: | |
| st.error(f"Download failed: {e}") | |
| return False | |
| def analyze_accent(audio_file_path, model): | |
| """Analyze accent using XLSR Wav2Vec 2.0 model""" | |
| try: | |
| # Get predictions from the model - same as your working code | |
| out_prob, score, index, text_lab = model.classify_file(audio_file_path) | |
| # Convert probabilities to dictionary - same approach as your working code | |
| probs = out_prob.squeeze().numpy() | |
| accent_scores = { | |
| ACCENT_CATEGORIES[i]: float(probs[i]) * 100 | |
| for i in range(len(ACCENT_CATEGORIES)) | |
| } | |
| # Get top prediction - same as your working code | |
| predicted_accent = text_lab # Use text_lab like your working code | |
| confidence = float(score) * 100 | |
| return predicted_accent, confidence, accent_scores | |
| except Exception as e: | |
| st.error(f"Accent analysis failed: {e}") | |
| return None, None, None | |
| def generate_summary(accent, confidence, top_scores): | |
| """Generate a summary of the accent analysis""" | |
| if confidence > 80: | |
| confidence_level = "Very High" | |
| elif confidence > 60: | |
| confidence_level = "High" | |
| elif confidence > 40: | |
| confidence_level = "Moderate" | |
| else: | |
| confidence_level = "Low" | |
| # Get top 3 accents | |
| top_3 = sorted(top_scores.items(), key=lambda x: x[1], reverse=True)[:3] | |
| summary = f""" | |
| **Primary Accent:** {accent} ({confidence:.1f}% confidence) | |
| **Confidence Level:** {confidence_level} | |
| **Top 3 Detected Accents:** | |
| 1. {top_3[0][0]}: {top_3[0][1]:.1f}% | |
| 2. {top_3[1][0]}: {top_3[1][1]:.1f}% | |
| 3. {top_3[2][0]}: {top_3[2][1]:.1f}% | |
| **Hiring Recommendation:** | |
| """ | |
| if confidence > 70: | |
| summary += "β Strong English accent detected - Suitable for English-speaking roles" | |
| elif confidence > 50: | |
| summary += "β οΈ Moderate English accent detected - May require accent assessment" | |
| else: | |
| summary += "β Weak English accent signal - Further evaluation recommended" | |
| return summary | |
| def main(): | |
| st.set_page_config( | |
| page_title="English Accent Detector", | |
| page_icon="π£οΈ", | |
| layout="wide" | |
| ) | |
| st.title("π£οΈ English Accent Detection Tool") | |
| st.subheader("For Hiring & Language Assessment") | |
| st.markdown(""" | |
| **Purpose:** Analyze spoken English accents from video URLs to assist in hiring decisions. | |
| **Supported:** Loom videos, direct MP4 links, YouTube, and other video platforms. | |
| """) | |
| # Load model | |
| if not MODEL_AVAILABLE: | |
| st.stop() | |
| with st.spinner("Loading XLSR Wav2Vec 2.0 model..."): | |
| model = load_accent_model() | |
| if not model: | |
| st.error("β Could not load accent detection model") | |
| st.stop() | |
| st.success("β Accent detection model loaded successfully!") | |
| # Input section | |
| st.markdown("---") | |
| st.subheader("π₯ Video Input") | |
| video_url = st.text_input( | |
| "Enter Video URL", | |
| placeholder="https://www.loom.com/share/... or direct MP4 link", | |
| help="Supports Loom, YouTube, direct video files, and most video platforms" | |
| ) | |
| if video_url: | |
| st.info(f"π **URL:** {video_url}") | |
| if st.button("π― Analyze Accent", type="primary"): | |
| # Create temporary file for audio | |
| with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file: | |
| audio_path = tmp_file.name | |
| try: | |
| # Step 1: Download and extract audio | |
| with st.spinner("π₯ Downloading video and extracting audio..."): | |
| if download_video_audio(video_url, audio_path): | |
| st.success("β Audio extracted successfully") | |
| # Play the extracted audio | |
| with open(audio_path, 'rb') as audio_file: | |
| st.audio(audio_file.read(), format="audio/wav") | |
| else: | |
| st.error("β Failed to extract audio") | |
| st.stop() | |
| # Step 2: Analyze accent | |
| with st.spinner("π§ Analyzing accent with XLSR Wav2Vec 2.0..."): | |
| accent, confidence, accent_scores = analyze_accent(audio_path, model) | |
| if accent: | |
| # Display results | |
| st.markdown("---") | |
| st.subheader("π Analysis Results") | |
| # Main result | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric( | |
| label="π― Detected Accent", | |
| value=accent, | |
| help="Primary English accent classification" | |
| ) | |
| with col2: | |
| st.metric( | |
| label="πͺ Confidence Score", | |
| value=f"{confidence:.1f}%", | |
| help="Model confidence in the prediction" | |
| ) | |
| # Detailed breakdown | |
| st.subheader("π Accent Probability Breakdown") | |
| # Sort and display top 8 accents | |
| sorted_accents = sorted(accent_scores.items(), key=lambda x: x[1], reverse=True)[:8] | |
| for accent_name, score in sorted_accents: | |
| st.progress(score/100, text=f"{accent_name}: {score:.1f}%") | |
| # Summary | |
| st.subheader("π Assessment Summary") | |
| summary = generate_summary(accent, confidence, accent_scores) | |
| st.markdown(summary) | |
| # JSON output for API integration | |
| with st.expander("π§ JSON Output (for API integration)"): | |
| result_json = { | |
| "primary_accent": accent, | |
| "confidence_score": round(confidence, 1), | |
| "accent_probabilities": {k: round(v, 1) for k, v in accent_scores.items()}, | |
| "top_3_accents": [ | |
| {"accent": k, "probability": round(v, 1)} | |
| for k, v in sorted(accent_scores.items(), key=lambda x: x[1], reverse=True)[:3] | |
| ], | |
| "recommendation": "suitable" if confidence > 70 else "assessment_needed" if confidence > 50 else "further_evaluation" | |
| } | |
| st.json(result_json) | |
| else: | |
| st.error("β Accent analysis failed") | |
| finally: | |
| # Cleanup | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| **Technical Details:** | |
| - Model: XLSR Wav2Vec 2.0 (95% accuracy on English accents) | |
| - Supports: 16 English accent varieties | |
| - Processing: Automatic audio extraction and resampling to 16kHz | |
| **Built for hiring teams to assess English language proficiency** | |
| """) | |
| if __name__ == "__main__": | |
| main() |