zakariae2000 commited on
Commit
3ae1bf1
·
verified ·
1 Parent(s): 747d227

Upload 7 files

Browse files
accent_analyzer.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
4
+ import librosa
5
+ import numpy as np
6
+ import os
7
+
8
+ # Model loading function to allow lazy loading
9
+ def load_model():
10
+ """
11
+ Load the accent classification model and feature extractor.
12
+
13
+ Returns:
14
+ tuple: (model, feature_extractor)
15
+ """
16
+ model_name = "dima806/english_accents_classification"
17
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
18
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
19
+ return model, feature_extractor
20
+
21
+ # Global variables for lazy loading
22
+ _model = None
23
+ _feature_extractor = None
24
+
25
+ def get_model_and_extractor():
26
+ """
27
+ Get the model and feature extractor, loading them if necessary.
28
+
29
+ Returns:
30
+ tuple: (model, feature_extractor)
31
+ """
32
+ global _model, _feature_extractor
33
+ if _model is None or _feature_extractor is None:
34
+ _model, _feature_extractor = load_model()
35
+ return _model, _feature_extractor
36
+
37
+ # Load and preprocess the audio
38
+ def load_audio(file_path):
39
+ """
40
+ Load and preprocess audio file for accent analysis.
41
+
42
+ Parameters:
43
+ file_path (str): Path to the audio file
44
+
45
+ Returns:
46
+ tuple: (audio_data, sample_rate)
47
+ """
48
+ # Load audio with librosa
49
+ audio, sr = librosa.load(file_path, sr=16000)
50
+ return audio, sr
51
+
52
+ # Predict accent
53
+ def predict_accent(file_path):
54
+ """
55
+ Predict the accent in an audio file.
56
+
57
+ Parameters:
58
+ file_path (str): Path to the audio file
59
+
60
+ Returns:
61
+ tuple: (accent_label, confidence_score)
62
+ """
63
+ # Check if file exists
64
+ if not os.path.exists(file_path):
65
+ raise FileNotFoundError(f"Audio file not found: {file_path}")
66
+
67
+ # Get or load model
68
+ model, feature_extractor = get_model_and_extractor()
69
+
70
+ # Load and process audio
71
+ audio, sr = load_audio(file_path)
72
+ inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
73
+
74
+ # Make prediction
75
+ with torch.no_grad():
76
+ logits = model(**inputs).logits
77
+
78
+ # Get results
79
+ predicted_class_id = torch.argmax(logits).item()
80
+ predicted_label = model.config.id2label[predicted_class_id]
81
+ confidence = torch.softmax(logits, dim=1)[0][predicted_class_id].item()
82
+
83
+ # Get all accent probabilities
84
+ all_probs = torch.softmax(logits, dim=1)[0].tolist()
85
+ all_accents = {model.config.id2label[i]: float(prob) for i, prob in enumerate(all_probs)}
86
+
87
+ return predicted_label, confidence
88
+
89
+ # Get detailed accent analysis
90
+ def get_detailed_accent_analysis(file_path):
91
+ """
92
+ Get detailed accent analysis including all possible accents and their probabilities.
93
+
94
+ Parameters:
95
+ file_path (str): Path to the audio file
96
+
97
+ Returns:
98
+ dict: Detailed accent analysis results
99
+ """
100
+ # Get or load model
101
+ model, feature_extractor = get_model_and_extractor()
102
+
103
+ # Load and process audio
104
+ audio, sr = load_audio(file_path)
105
+ inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
106
+
107
+ # Make prediction
108
+ with torch.no_grad():
109
+ logits = model(**inputs).logits
110
+
111
+ # Get top prediction
112
+ predicted_class_id = torch.argmax(logits).item()
113
+ predicted_label = model.config.id2label[predicted_class_id]
114
+ confidence = torch.softmax(logits, dim=1)[0][predicted_class_id].item()
115
+
116
+ # Get all accent probabilities
117
+ all_probs = torch.softmax(logits, dim=1)[0].tolist()
118
+ all_accents = {model.config.id2label[i]: float(prob) for i, prob in enumerate(all_probs)}
119
+
120
+ # Sort accents by probability (highest first)
121
+ sorted_accents = sorted(all_accents.items(), key=lambda x: x[1], reverse=True)
122
+
123
+ return {
124
+ "top_accent": predicted_label,
125
+ "confidence": confidence,
126
+ "confidence_percent": confidence * 100,
127
+ "all_accents": sorted_accents,
128
+ "file_analyzed": os.path.basename(file_path)
129
+ }
130
+
131
+
132
+
accent_summary.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import os
3
+
4
+ client = OpenAI(
5
+ base_url="https://openrouter.ai/api/v1",
6
+ api_key=os.getenv("OPENROUTER_API_KEY"), # Replace with your real key
7
+ )
8
+
9
+ def generate_summary(accent: str, confidence: float) -> str:
10
+ prompt = (
11
+ f"The speaker's accent was detected as {accent} with {confidence:.2%} confidence. "
12
+ "Provide a short 2-3 sentence summary explaining this result in simple, non-technical language."
13
+ )
14
+
15
+ completion = client.chat.completions.create(
16
+ model="mistralai/mistral-small-24b-instruct-2501:free",
17
+ messages=[{"role": "user", "content": prompt}],
18
+ extra_headers={
19
+ "HTTP-Referer": "https://your-site.com", # Optional
20
+ "X-Title": "AccentAnalyzerApp", # Optional
21
+ },
22
+ )
23
+
24
+ return completion.choices[0].message.content.strip()
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import shutil
4
+ import time
5
+ from test_video_url_download import download_video
6
+ from test_audio_extraction import extract_audio
7
+ from noise_reduce import denoise_audio
8
+ from accent_analyzer import predict_accent
9
+
10
+ # Constants
11
+ download_dir = "downloads"
12
+ audio_dir = "audio"
13
+ reduced_dir = os.path.join(audio_dir, "reduced")
14
+
15
+ # Ensure output folders exist
16
+ os.makedirs(download_dir, exist_ok=True)
17
+ os.makedirs(audio_dir, exist_ok=True)
18
+ os.makedirs(reduced_dir, exist_ok=True)
19
+
20
+ st.set_page_config(page_title="Accent Analyzer", page_icon="🎤", layout="centered")
21
+ st.title("🎧 English Accent Analyzer")
22
+ st.markdown("""
23
+ Paste a **YouTube video URL** or a direct video link below. This tool will:
24
+ 1. Download the video
25
+ 2. Extract the speaker's audio
26
+ 3. Optionally apply noise reduction
27
+ 4. Analyze and classify the English accent
28
+ """)
29
+
30
+ # Sidebar options
31
+ with st.sidebar:
32
+ st.header("🛠 Audio Processing")
33
+ enable_denoise = st.checkbox("🎛 Enable Noise Canceling", value=True)
34
+ st.markdown("---")
35
+ st.info("This feature enhances clarity if the audio has background noise.")
36
+
37
+ # Main form
38
+ video_url = st.text_input("🔗 Paste Video URL")
39
+
40
+ if st.button("🚀 Analyze") and video_url:
41
+ # Generate a unique ID for this session
42
+ unique_id = str(int(time.time()))
43
+
44
+ # Define expected paths
45
+ video_output_path = os.path.join(download_dir, f"video_{unique_id}.mp4")
46
+ audio_output_path = os.path.join(audio_dir, f"audio_{unique_id}.mp3")
47
+ cleaned_output_path = os.path.join(reduced_dir, f"audio_{unique_id}_cleaned.wav")
48
+
49
+ with st.spinner("📥 Downloading video..."):
50
+ download_video(video_url, output_path=download_dir, filename=f"video_{unique_id}")
51
+
52
+ if not os.path.exists(video_output_path):
53
+ st.error("❌ Failed to download video.")
54
+ st.stop()
55
+
56
+ st.video(video_output_path)
57
+
58
+ with st.spinner("🎙 Extracting audio from video..."):
59
+ extract_audio(video_output_path, output_audio_path=audio_output_path)
60
+
61
+ # Remove video after extracting audio
62
+ os.remove(video_output_path)
63
+
64
+ final_audio_path = audio_output_path
65
+
66
+ # Optional: noise reduction
67
+ if enable_denoise:
68
+ st.info("🎚 Noise canceling is enabled. Cleaning audio...")
69
+ final_audio_path = denoise_audio(audio_output_path, output_path=cleaned_output_path)
70
+
71
+ # Display audio players
72
+ st.markdown("---")
73
+ st.subheader("🔊 Audio Previews")
74
+ st.markdown("**🎧 Original Audio:**")
75
+ st.audio(audio_output_path, format="audio/mp3")
76
+ if enable_denoise:
77
+ st.markdown("**🔇 Denoised Audio:**")
78
+ st.audio(cleaned_output_path, format="audio/wav")
79
+
80
+ # Analyze accent
81
+ with st.spinner("🧠 Analyzing accent..."):
82
+ label, confidence = predict_accent(final_audio_path)
83
+ from accent_summary import generate_summary
84
+
85
+ # Get LLM-based explanation
86
+ summary = generate_summary(label, confidence)
87
+
88
+ # Display results
89
+ st.markdown("---")
90
+ st.subheader("🎯 Accent Analysis Result")
91
+ st.markdown(f"""
92
+ <div style='padding:1rem;border-radius:10px;background:#f9f9f9;border:1px solid #ccc;'>
93
+ <h3 style='color:#333;'>🗣 Accent: <span style='color:#0066cc'>{label}</span></h3>
94
+ <h4 style='color:#555;'>Confidence Score: <b>{confidence:.2%}</b></h4>
95
+ </div>
96
+ """, unsafe_allow_html=True)
97
+ st.markdown("### 📝 Accent Summary")
98
+ st.success(summary)
99
+
100
+ # Cleanup audio files
101
+ try:
102
+ if os.path.exists(audio_output_path):
103
+ os.remove(audio_output_path)
104
+ if enable_denoise and os.path.exists(cleaned_output_path):
105
+ os.remove(cleaned_output_path)
106
+ except Exception as e:
107
+ st.warning(f"⚠️ Could not delete some temporary files: {e}")
108
+
109
+ st.markdown("""
110
+ <style>
111
+ .stSpinner > div > div {
112
+ padding-top: 1rem;
113
+ }
114
+ </style>
115
+ """, unsafe_allow_html=True)
noise_reduce.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from df.enhance import enhance, init_df, load_audio, save_audio
3
+
4
+ def denoise_audio(input_path, output_path=None):
5
+ """
6
+ Apply noise reduction to an audio file.
7
+
8
+ Parameters:
9
+ - input_path (str): Path to the input audio file
10
+ - output_path (str, optional): Path where the denoised audio will be saved. If None,
11
+ it will be saved in the 'audio/reduced' directory with the original filename + '_cleaned.wav'
12
+
13
+ Returns:
14
+ - str: Path to the denoised audio file
15
+ """
16
+ # Create output path if not specified
17
+ if output_path is None:
18
+ output_dir = "audio/reduced"
19
+ os.makedirs(output_dir, exist_ok=True)
20
+
21
+ # Extract the base name of the input audio file (without extension)
22
+ base_name = os.path.splitext(os.path.basename(input_path))[0]
23
+
24
+ # Define the output audio file path using the original filename
25
+ output_path = os.path.join(output_dir, f"{base_name}_cleaned.wav")
26
+ else:
27
+ # Ensure directory exists for the specified output path
28
+ os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
29
+
30
+ # Initialize the model
31
+ model, df_state, _ = init_df()
32
+
33
+ # Load your audio file (must be 48kHz mono WAV)
34
+ audio, _ = load_audio(input_path, sr=df_state.sr())
35
+
36
+ # Apply noise reduction
37
+ enhanced = enhance(model, df_state, audio)
38
+
39
+ # Save the enhanced audio
40
+ save_audio(output_path, enhanced, df_state.sr())
41
+
42
+ return output_path
43
+
44
+
45
+
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ moviepy
3
+ yt-dlp
4
+ requests
5
+
6
+ # Audio processing
7
+ torch
8
+ torchaudio
9
+ librosa
10
+ soundfile
11
+ noisereduce
12
+ git+https://github.com/Rikorose/DeepFilterNet.git
13
+
14
+
15
+
16
+ # Hugging Face
17
+ transformers
18
+ datasets
19
+
20
+ # Utilities
21
+ tqdm
22
+ python-dotenv
23
+ streamlit
24
+ openai
test_audio_extraction.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy import VideoFileClip
2
+ import os
3
+
4
+ def extract_audio(video_path, output_audio_path=None):
5
+ """
6
+ Extract audio from a video file.
7
+
8
+ Parameters:
9
+ - video_path (str): Path to the video file
10
+ - output_audio_path (str, optional): Path where the audio will be saved. If None,
11
+ it will be saved in the 'audio' directory with the same name as the video file.
12
+
13
+ Returns:
14
+ - str: Path to the extracted audio file
15
+ """
16
+ # Create audio directory if not specified in output path
17
+ if output_audio_path is None:
18
+ audio_dir = "audio"
19
+ os.makedirs(audio_dir, exist_ok=True)
20
+
21
+ # Extract the base name of the video file (without extension)
22
+ base_name = os.path.splitext(os.path.basename(video_path))[0]
23
+
24
+ # Define the output audio file path within the audio directory
25
+ output_audio_path = os.path.join(audio_dir, f"{base_name}.mp3")
26
+ else:
27
+ # Ensure directory exists for the specified output path
28
+ os.makedirs(os.path.dirname(os.path.abspath(output_audio_path)), exist_ok=True)
29
+
30
+ # Load the video file
31
+ video = VideoFileClip(video_path)
32
+
33
+ # Extract audio
34
+ audio = video.audio
35
+
36
+ # Save the audio file
37
+ audio.write_audiofile(output_audio_path)
38
+
39
+ # Close the video and audio clips
40
+ audio.close()
41
+ video.close()
42
+
43
+ return output_audio_path
44
+
45
+
test_video_url_download.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yt_dlp
2
+ import os
3
+ from pathlib import Path
4
+
5
+ def download_video(url, output_path='downloads', filename=None):
6
+ """
7
+ Downloads a video from the given URL using yt-dlp.
8
+
9
+ Parameters:
10
+ - url (str): The URL of the video to download.
11
+ - output_path (str): The directory where the video will be saved.
12
+ - filename (str, optional): Custom filename for the downloaded video (without extension).
13
+
14
+ Returns:
15
+ - str: Path to the downloaded video file
16
+ """
17
+ # Ensure the output directory exists
18
+ os.makedirs(output_path, exist_ok=True)
19
+
20
+ # Set up output template based on whether a custom filename is provided
21
+ if filename:
22
+ output_template = os.path.join(output_path, f"{filename}.%(ext)s")
23
+ else:
24
+ output_template = os.path.join(output_path, '%(title)s.%(ext)s')
25
+
26
+ # Set up yt-dlp options
27
+ ydl_opts = {
28
+ 'format': 'bestvideo+bestaudio/best', # Download best video and audio quality
29
+ 'outtmpl': output_template, # Output filename template
30
+ 'merge_output_format': 'mp4', # Merge video and audio into mp4 format
31
+ 'quiet': False, # Set to True to suppress output
32
+ 'noplaylist': True, # Download only the single video, not a playlist
33
+ }
34
+
35
+ # Download the video
36
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
37
+ try:
38
+ print(f"Starting download for: {url}")
39
+ ydl.download([url])
40
+ print("Download completed successfully.")
41
+ except Exception as e:
42
+ print(f"An error occurred: {e}")
43
+