Spaces:

Mohamedenzeyad
/

accent-classification

Build error

App Files Files Community

accent-classification / src /streamlit_app.py

Mohamedenzeyad

Update src/streamlit_app.py

50843ef verified 6 months ago

raw

history blame contribute delete

11.3 kB

	import os
	import streamlit as st
	import tempfile
	import subprocess
	import requests
	from urllib.parse import urlparse
	import json
	import torch
	import torchaudio

	# Set audio backend like in your working code
	torchaudio.set_audio_backend("soundfile")

	# Set cache directories for HuggingFace models
	# Ensure this directory exists and is writable
	cache_dir = "/tmp/hf_cache" # This is a common writable location on Linux/Docker
	os.makedirs(cache_dir, exist_ok=True)
	os.environ["HF_HOME"] = cache_dir
	os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir

	# --- Accent Model Cache Directory ---
	# Create a dedicated directory for SpeechBrain models within the accessible cache
	# Ensure this path is fully prepared and writable
	speechbrain_model_cache_base = os.path.join(cache_dir, "speechbrain_models_accent_id")
	os.makedirs(speechbrain_model_cache_base, exist_ok=True)

	# The specific model's subdirectory within the cache
	# This is the full path that 'savedir' should point to
	model_save_path = os.path.join(speechbrain_model_cache_base, "accent-id-commonaccent_xlsr-en-english")
	os.makedirs(model_save_path, exist_ok=True) # Ensure this specific model directory exists and is writable
	# --- End Accent Model Cache Directory ---

	# Try importing the accent detection model
	try:
	from speechbrain.pretrained.interfaces import foreign_class
	MODEL_AVAILABLE = True

	@st.cache_resource
	def load_accent_model():
	"""Load the XLSR Wav2Vec 2.0 accent classification model"""
	try:
	st.info(f"Attempting to load model from: {model_save_path}")
	model = foreign_class(
	source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
	pymodule_file="custom_interface.py",
	classname="CustomEncoderWav2vec22Classifier", # Note: Double check if this is the correct classname. It was CustomEncoderWav2vec2Classifier in the original.
	savedir=model_save_path # Use the pre-prepared full path
	)
	return model
	except Exception as e:
	st.error(f"Failed to load accent model: [Errno 13] Permission denied: '{e}' - Please ensure '{model_save_path}' is writable.")
	st.error(f"Detailed Error: {e}")
	return None
	except ImportError:
	MODEL_AVAILABLE = False
	st.error("SpeechBrain not available. Install with: pip install speechbrain")

	# Accent categories with confidence thresholds
	ACCENT_CATEGORIES = [
	"US", "England", "Australia", "Indian", "Canada",
	"Scotland", "Ireland", "Wales", "African", "NewZealand",
	"Bermuda", "Malaysia", "Philippines", "Singapore",
	"HongKong", "SouthAtlantic"]

	def download_video_audio(url, output_path):
	"""Download and extract audio from video URL"""
	try:
	# Check if it's a direct video file
	if url.endswith(('.mp4', '.avi', '.mov', '.mkv')):
	# Download direct video file
	response = requests.get(url, stream=True, timeout=30)
	response.raise_for_status()

	temp_video = output_path.replace('.wav', '.mp4')
	with open(temp_video, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	# Extract audio using ffmpeg
	cmd = [
	'ffmpeg', '-i', temp_video, '-ar', '16000',
	'-ac', '1', '-y', output_path
	]
	subprocess.run(cmd, check=True, capture_output=True)
	os.remove(temp_video)
	return True

	else:
	# Use yt-dlp for other video platforms (Loom, YouTube, etc.)
	cmd = [
	'yt-dlp', '--extract-audio', '--audio-format', 'wav',
	'--audio-quality', '0', '--output', output_path.replace('.wav', '.%(ext)s'),
	url
	]
	result = subprocess.run(cmd, capture_output=True, text=True)

	if result.returncode == 0:
	return True
	else:
	st.error(f"yt-dlp error: {result.stderr}")
	return False

	except Exception as e:
	st.error(f"Download failed: {e}")
	return False
	def analyze_accent(audio_file_path, model):
	"""Analyze accent using XLSR Wav2Vec 2.0 model"""
	try:
	# Get predictions from the model - same as your working code
	out_prob, score, index, text_lab = model.classify_file(audio_file_path)

	# Convert probabilities to dictionary - same approach as your working code
	probs = out_prob.squeeze().numpy()
	accent_scores = {
	ACCENT_CATEGORIES[i]: float(probs[i]) * 100
	for i in range(len(ACCENT_CATEGORIES))
	}

	# Get top prediction - same as your working code
	predicted_accent = text_lab # Use text_lab like your working code
	confidence = float(score) * 100

	return predicted_accent, confidence, accent_scores

	except Exception as e:
	st.error(f"Accent analysis failed: {e}")
	return None, None, None
	def generate_summary(accent, confidence, top_scores):
	"""Generate a summary of the accent analysis"""
	if confidence > 80:
	confidence_level = "Very High"
	elif confidence > 60:
	confidence_level = "High"
	elif confidence > 40:
	confidence_level = "Moderate"
	else:
	confidence_level = "Low"

	# Get top 3 accents
	top_3 = sorted(top_scores.items(), key=lambda x: x[1], reverse=True)[:3]

	summary = f"""
	Primary Accent: {accent} ({confidence:.1f}% confidence)
	Confidence Level: {confidence_level}

	Top 3 Detected Accents:
	1. {top_3[0][0]}: {top_3[0][1]:.1f}%
	2. {top_3[1][0]}: {top_3[1][1]:.1f}%
	3. {top_3[2][0]}: {top_3[2][1]:.1f}%

	Hiring Recommendation:
	"""

	if confidence > 70:
	summary += "✅ Strong English accent detected - Suitable for English-speaking roles"
	elif confidence > 50:
	summary += "⚠️ Moderate English accent detected - May require accent assessment"
	else:
	summary += "❌ Weak English accent signal - Further evaluation recommended"

	return summary
	def main():
	st.set_page_config(
	page_title="English Accent Detector",
	page_icon="🗣️",
	layout="wide"
	)

	st.title("🗣️ English Accent Detection Tool")
	st.subheader("For Hiring & Language Assessment")

	st.markdown("""
	Purpose: Analyze spoken English accents from video URLs to assist in hiring decisions.

	Supported: Loom videos, direct MP4 links, YouTube, and other video platforms.
	""")

	# Load model
	if not MODEL_AVAILABLE:
	st.stop()

	with st.spinner("Loading XLSR Wav2Vec 2.0 model..."):
	model = load_accent_model()
	if not model:
	st.error("❌ Could not load accent detection model")
	st.stop()
	st.success("✅ Accent detection model loaded successfully!")

	# Input section
	st.markdown("---")
	st.subheader("📥 Video Input")

	video_url = st.text_input(
	"Enter Video URL",
	placeholder="https://www.loom.com/share/... or direct MP4 link",
	help="Supports Loom, YouTube, direct video files, and most video platforms"
	)

	if video_url:
	st.info(f"🔗 URL: {video_url}")

	if st.button("🎯 Analyze Accent", type="primary"):
	# Create temporary file for audio
	with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
	audio_path = tmp_file.name

	try:
	# Step 1: Download and extract audio
	with st.spinner("📥 Downloading video and extracting audio..."):
	if download_video_audio(video_url, audio_path):
	st.success("✅ Audio extracted successfully")

	# Play the extracted audio
	with open(audio_path, 'rb') as audio_file:
	st.audio(audio_file.read(), format="audio/wav")
	else:
	st.error("❌ Failed to extract audio")
	st.stop()

	# Step 2: Analyze accent
	with st.spinner("🧠 Analyzing accent with XLSR Wav2Vec 2.0..."):
	accent, confidence, accent_scores = analyze_accent(audio_path, model)

	if accent:
	# Display results
	st.markdown("---")
	st.subheader("📊 Analysis Results")

	# Main result
	col1, col2 = st.columns(2)

	with col1:
	st.metric(
	label="🎯 Detected Accent",
	value=accent,
	help="Primary English accent classification"
	)

	with col2:
	st.metric(
	label="🎪 Confidence Score",
	value=f"{confidence:.1f}%",
	help="Model confidence in the prediction"
	)

	# Detailed breakdown
	st.subheader("📈 Accent Probability Breakdown")

	# Sort and display top 8 accents
	sorted_accents = sorted(accent_scores.items(), key=lambda x: x[1], reverse=True)[:8]

	for accent_name, score in sorted_accents:
	st.progress(score/100, text=f"{accent_name}: {score:.1f}%")

	# Summary
	st.subheader("📝 Assessment Summary")
	summary = generate_summary(accent, confidence, accent_scores)
	st.markdown(summary)

	# JSON output for API integration
	with st.expander("🔧 JSON Output (for API integration)"):
	result_json = {
	"primary_accent": accent,
	"confidence_score": round(confidence, 1),
	"accent_probabilities": {k: round(v, 1) for k, v in accent_scores.items()},
	"top_3_accents": [
	{"accent": k, "probability": round(v, 1)}
	for k, v in sorted(accent_scores.items(), key=lambda x: x[1], reverse=True)[:3]
	],
	"recommendation": "suitable" if confidence > 70 else "assessment_needed" if confidence > 50 else "further_evaluation"
	}
	st.json(result_json)

	else:
	st.error("❌ Accent analysis failed")

	finally:
	# Cleanup
	if os.path.exists(audio_path):
	os.remove(audio_path)

	# Footer
	st.markdown("---")
	st.markdown("""
	Technical Details:
	- Model: XLSR Wav2Vec 2.0 (95% accuracy on English accents)
	- Supports: 16 English accent varieties
	- Processing: Automatic audio extraction and resampling to 16kHz

	Built for hiring teams to assess English language proficiency
	""")
	if __name__ == "__main__":
	main()