Spaces:

zakariae2000
/

accent_detection

Build error

App Files Files Community

zakariae2000 commited on May 31

Commit

3ae1bf1

verified ·

1 Parent(s): 747d227

Upload 7 files

Browse files

Files changed (7) hide show

accent_analyzer.py +132 -0
accent_summary.py +24 -0
app.py +115 -0
noise_reduce.py +45 -0
requirements.txt +24 -0
test_audio_extraction.py +45 -0
test_video_url_download.py +43 -0

accent_analyzer.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import torch
+import torchaudio
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
+import librosa
+import numpy as np
+import os
+# Model loading function to allow lazy loading
+def load_model():
+    """
+    Load the accent classification model and feature extractor.
+    Returns:
+        tuple: (model, feature_extractor)
+    """
+    model_name = "dima806/english_accents_classification"
+    model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
+    feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
+    return model, feature_extractor
+# Global variables for lazy loading
+_model = None
+_feature_extractor = None
+def get_model_and_extractor():
+    """
+    Get the model and feature extractor, loading them if necessary.
+    Returns:
+        tuple: (model, feature_extractor)
+    """
+    global _model, _feature_extractor
+    if _model is None or _feature_extractor is None:
+        _model, _feature_extractor = load_model()
+    return _model, _feature_extractor
+# Load and preprocess the audio
+def load_audio(file_path):
+    """
+    Load and preprocess audio file for accent analysis.
+    Parameters:
+        file_path (str): Path to the audio file
+    Returns:
+        tuple: (audio_data, sample_rate)
+    """
+    # Load audio with librosa
+    audio, sr = librosa.load(file_path, sr=16000)
+    return audio, sr
+# Predict accent
+def predict_accent(file_path):
+    """
+    Predict the accent in an audio file.
+    Parameters:
+        file_path (str): Path to the audio file
+    Returns:
+        tuple: (accent_label, confidence_score)
+    """
+    # Check if file exists
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"Audio file not found: {file_path}")
+    # Get or load model
+    model, feature_extractor = get_model_and_extractor()
+    # Load and process audio
+    audio, sr = load_audio(file_path)
+    inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
+    # Make prediction
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    # Get results
+    predicted_class_id = torch.argmax(logits).item()
+    predicted_label = model.config.id2label[predicted_class_id]
+    confidence = torch.softmax(logits, dim=1)[0][predicted_class_id].item()
+    # Get all accent probabilities
+    all_probs = torch.softmax(logits, dim=1)[0].tolist()
+    all_accents = {model.config.id2label[i]: float(prob) for i, prob in enumerate(all_probs)}
+    return predicted_label, confidence
+# Get detailed accent analysis
+def get_detailed_accent_analysis(file_path):
+    """
+    Get detailed accent analysis including all possible accents and their probabilities.
+    Parameters:
+        file_path (str): Path to the audio file
+    Returns:
+        dict: Detailed accent analysis results
+    """
+    # Get or load model
+    model, feature_extractor = get_model_and_extractor()
+    # Load and process audio
+    audio, sr = load_audio(file_path)
+    inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
+    # Make prediction
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    # Get top prediction
+    predicted_class_id = torch.argmax(logits).item()
+    predicted_label = model.config.id2label[predicted_class_id]
+    confidence = torch.softmax(logits, dim=1)[0][predicted_class_id].item()
+    # Get all accent probabilities
+    all_probs = torch.softmax(logits, dim=1)[0].tolist()
+    all_accents = {model.config.id2label[i]: float(prob) for i, prob in enumerate(all_probs)}
+    # Sort accents by probability (highest first)
+    sorted_accents = sorted(all_accents.items(), key=lambda x: x[1], reverse=True)
+    return {
+        "top_accent": predicted_label,
+        "confidence": confidence,
+        "confidence_percent": confidence * 100,
+        "all_accents": sorted_accents,
+        "file_analyzed": os.path.basename(file_path)
+    }

accent_summary.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from openai import OpenAI
+import os
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key=os.getenv("OPENROUTER_API_KEY"),  # Replace with your real key
+)
+def generate_summary(accent: str, confidence: float) -> str:
+    prompt = (
+        f"The speaker's accent was detected as {accent} with {confidence:.2%} confidence. "
+        "Provide a short 2-3 sentence summary explaining this result in simple, non-technical language."
+    )
+    completion = client.chat.completions.create(
+        model="mistralai/mistral-small-24b-instruct-2501:free",
+        messages=[{"role": "user", "content": prompt}],
+        extra_headers={
+            "HTTP-Referer": "https://your-site.com",  # Optional
+            "X-Title": "AccentAnalyzerApp",  # Optional
+        },
+    )
+    return completion.choices[0].message.content.strip()

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import streamlit as st
+import os
+import shutil
+import time
+from test_video_url_download import download_video
+from test_audio_extraction import extract_audio
+from noise_reduce import denoise_audio
+from accent_analyzer import predict_accent
+# Constants
+download_dir = "downloads"
+audio_dir = "audio"
+reduced_dir = os.path.join(audio_dir, "reduced")
+# Ensure output folders exist
+os.makedirs(download_dir, exist_ok=True)
+os.makedirs(audio_dir, exist_ok=True)
+os.makedirs(reduced_dir, exist_ok=True)
+st.set_page_config(page_title="Accent Analyzer", page_icon="🎤", layout="centered")
+st.title("🎧 English Accent Analyzer")
+st.markdown("""
+Paste a **YouTube video URL** or a direct video link below. This tool will:
+1. Download the video
+2. Extract the speaker's audio
+3. Optionally apply noise reduction
+4. Analyze and classify the English accent
+""")
+# Sidebar options
+with st.sidebar:
+    st.header("🛠 Audio Processing")
+    enable_denoise = st.checkbox("🎛 Enable Noise Canceling", value=True)
+    st.markdown("---")
+    st.info("This feature enhances clarity if the audio has background noise.")
+# Main form
+video_url = st.text_input("🔗 Paste Video URL")
+if st.button("🚀 Analyze") and video_url:
+    # Generate a unique ID for this session
+    unique_id = str(int(time.time()))
+    # Define expected paths
+    video_output_path = os.path.join(download_dir, f"video_{unique_id}.mp4")
+    audio_output_path = os.path.join(audio_dir, f"audio_{unique_id}.mp3")
+    cleaned_output_path = os.path.join(reduced_dir, f"audio_{unique_id}_cleaned.wav")
+    with st.spinner("📥 Downloading video..."):
+        download_video(video_url, output_path=download_dir, filename=f"video_{unique_id}")
+    if not os.path.exists(video_output_path):
+        st.error("❌ Failed to download video.")
+        st.stop()
+    st.video(video_output_path)
+    with st.spinner("🎙 Extracting audio from video..."):
+        extract_audio(video_output_path, output_audio_path=audio_output_path)
+    # Remove video after extracting audio
+    os.remove(video_output_path)
+    final_audio_path = audio_output_path
+    # Optional: noise reduction
+    if enable_denoise:
+        st.info("🎚 Noise canceling is enabled. Cleaning audio...")
+        final_audio_path = denoise_audio(audio_output_path, output_path=cleaned_output_path)
+    # Display audio players
+    st.markdown("---")
+    st.subheader("🔊 Audio Previews")
+    st.markdown("**🎧 Original Audio:**")
+    st.audio(audio_output_path, format="audio/mp3")
+    if enable_denoise:
+        st.markdown("**🔇 Denoised Audio:**")
+        st.audio(cleaned_output_path, format="audio/wav")
+    # Analyze accent
+    with st.spinner("🧠 Analyzing accent..."):
+        label, confidence = predict_accent(final_audio_path)
+        from accent_summary import generate_summary
+        # Get LLM-based explanation
+        summary = generate_summary(label, confidence)
+    # Display results
+    st.markdown("---")
+    st.subheader("🎯 Accent Analysis Result")
+    st.markdown(f"""
+    <div style='padding:1rem;border-radius:10px;background:#f9f9f9;border:1px solid #ccc;'>
+        <h3 style='color:#333;'>🗣 Accent: <span style='color:#0066cc'>{label}</span></h3>
+        <h4 style='color:#555;'>Confidence Score: <b>{confidence:.2%}</b></h4>
+    </div>
+    """, unsafe_allow_html=True)
+    st.markdown("### 📝 Accent Summary")
+    st.success(summary)
+    # Cleanup audio files
+    try:
+        if os.path.exists(audio_output_path):
+            os.remove(audio_output_path)
+        if enable_denoise and os.path.exists(cleaned_output_path):
+            os.remove(cleaned_output_path)
+    except Exception as e:
+        st.warning(f"⚠️ Could not delete some temporary files: {e}")
+    st.markdown("""
+    <style>
+    .stSpinner > div > div {
+        padding-top: 1rem;
+    }
+    </style>
+    """, unsafe_allow_html=True)

noise_reduce.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+from df.enhance import enhance, init_df, load_audio, save_audio
+def denoise_audio(input_path, output_path=None):
+    """
+    Apply noise reduction to an audio file.
+    Parameters:
+    - input_path (str): Path to the input audio file
+    - output_path (str, optional): Path where the denoised audio will be saved. If None,
+      it will be saved in the 'audio/reduced' directory with the original filename + '_cleaned.wav'
+    Returns:
+    - str: Path to the denoised audio file
+    """
+    # Create output path if not specified
+    if output_path is None:
+        output_dir = "audio/reduced"
+        os.makedirs(output_dir, exist_ok=True)
+        # Extract the base name of the input audio file (without extension)
+        base_name = os.path.splitext(os.path.basename(input_path))[0]
+        # Define the output audio file path using the original filename
+        output_path = os.path.join(output_dir, f"{base_name}_cleaned.wav")
+    else:
+        # Ensure directory exists for the specified output path
+        os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
+    # Initialize the model
+    model, df_state, _ = init_df()
+    # Load your audio file (must be 48kHz mono WAV)
+    audio, _ = load_audio(input_path, sr=df_state.sr())
+    # Apply noise reduction
+    enhanced = enhance(model, df_state, audio)
+    # Save the enhanced audio
+    save_audio(output_path, enhanced, df_state.sr())
+    return output_path

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+# Core dependencies
+moviepy
+yt-dlp
+requests
+# Audio processing
+torch
+torchaudio
+librosa
+soundfile
+noisereduce
+git+https://github.com/Rikorose/DeepFilterNet.git
+# Hugging Face
+transformers
+datasets
+# Utilities
+tqdm
+python-dotenv
+streamlit
+openai

test_audio_extraction.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from moviepy import VideoFileClip
+import os
+def extract_audio(video_path, output_audio_path=None):
+    """
+    Extract audio from a video file.
+    Parameters:
+    - video_path (str): Path to the video file
+    - output_audio_path (str, optional): Path where the audio will be saved. If None,
+      it will be saved in the 'audio' directory with the same name as the video file.
+    Returns:
+    - str: Path to the extracted audio file
+    """
+    # Create audio directory if not specified in output path
+    if output_audio_path is None:
+        audio_dir = "audio"
+        os.makedirs(audio_dir, exist_ok=True)
+        # Extract the base name of the video file (without extension)
+        base_name = os.path.splitext(os.path.basename(video_path))[0]
+        # Define the output audio file path within the audio directory
+        output_audio_path = os.path.join(audio_dir, f"{base_name}.mp3")
+    else:
+        # Ensure directory exists for the specified output path
+        os.makedirs(os.path.dirname(os.path.abspath(output_audio_path)), exist_ok=True)
+    # Load the video file
+    video = VideoFileClip(video_path)
+    # Extract audio
+    audio = video.audio
+    # Save the audio file
+    audio.write_audiofile(output_audio_path)
+    # Close the video and audio clips
+    audio.close()
+    video.close()
+    return output_audio_path

test_video_url_download.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import yt_dlp
+import os
+from pathlib import Path
+def download_video(url, output_path='downloads', filename=None):
+    """
+    Downloads a video from the given URL using yt-dlp.
+    Parameters:
+    - url (str): The URL of the video to download.
+    - output_path (str): The directory where the video will be saved.
+    - filename (str, optional): Custom filename for the downloaded video (without extension).
+    Returns:
+    - str: Path to the downloaded video file
+    """
+    # Ensure the output directory exists
+    os.makedirs(output_path, exist_ok=True)
+    # Set up output template based on whether a custom filename is provided
+    if filename:
+        output_template = os.path.join(output_path, f"{filename}.%(ext)s")
+    else:
+        output_template = os.path.join(output_path, '%(title)s.%(ext)s')
+    # Set up yt-dlp options
+    ydl_opts = {
+        'format': 'bestvideo+bestaudio/best',  # Download best video and audio quality
+        'outtmpl': output_template,  # Output filename template
+        'merge_output_format': 'mp4',  # Merge video and audio into mp4 format
+        'quiet': False,  # Set to True to suppress output
+        'noplaylist': True,  # Download only the single video, not a playlist
+    }
+    # Download the video
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        try:
+            print(f"Starting download for: {url}")
+            ydl.download([url])
+            print("Download completed successfully.")
+        except Exception as e:
+            print(f"An error occurred: {e}")