Spaces:

pierreguillou
/

transcription_diarization_audio

Sleeping

App Files Files Community

pierreguillou commited on Aug 20

Commit

3513671

verified ·

1 Parent(s): c11aebe

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -17

app.py CHANGED Viewed

@@ -40,6 +40,8 @@ def convert_to_wav(audio_path):
     try:
         audio = AudioSegment.from_file(audio_path)
         audio = audio.set_channels(1)
         wav_path = os.path.splitext(audio_path)[0] + ".wav"
         audio.export(wav_path, format="wav")
         return wav_path
@@ -47,37 +49,57 @@ def convert_to_wav(audio_path):
         print(f"Erreur lors de la conversion en WAV : {e}")
         return None
 def detect_language_on_upload(filepath):
-    """Détecte la langue d'un fichier audio en utilisant Whisper + LangDetect"""
     if filepath is None:
         return "auto"
     try:
         wav_filepath = convert_to_wav(filepath)
         if not wav_filepath:
             return "auto"
-        # Transcription rapide d'un échantillon court (15 secondes max)
         outputs = pipe(
-            wav_filepath,
-            chunk_length_s=15,
-            batch_size=8,
             return_timestamps=False
         )
-        transcribed_text = outputs["text"].strip()
-        # Si le texte est trop court, on retourne auto
         if len(transcribed_text) < 10:
             return "auto"
         # Utilise LangDetect sur le texte transcrit
         detected_lang = detect(transcribed_text)
         # Mapping des codes de langue LangDetect vers les codes Whisper
         lang_mapping = {
             'fr': 'fr',
-            'en': 'en',
             'es': 'es',
             'de': 'de',
             'it': 'it',
@@ -90,9 +112,9 @@ def detect_language_on_upload(filepath):
             'zh-cn': 'zh',
             'zh': 'zh'
         }
         return lang_mapping.get(detected_lang, "auto")
     except (LangDetectException, Exception) as e:
         print(f"Erreur lors de la détection de langue : {e}")
         return "auto"
@@ -159,7 +181,6 @@ def transcribe_audio(filepath, diarize, language_choice):
 with gr.Blocks() as demo:
     gr.HTML("<div style='text-align:center;'><h1>Application de Transcription et Diarisation Audio</h1></div>")
-    # gr.Markdown("## Objectif")
     gr.Markdown("Transcrivez et diarisez automatiquement vos fichiers audio (WhatsApp, réunions, interviews, etc.) grâce à Whisper et pyannote, directement dans ce Space.")
     gr.Markdown("""

     try:
         audio = AudioSegment.from_file(audio_path)
         audio = audio.set_channels(1)
+        # Assure un sample rate standard pour Whisper (16 kHz), utile pour vitesse/stabilité
+        audio = audio.set_frame_rate(16000)
         wav_path = os.path.splitext(audio_path)[0] + ".wav"
         audio.export(wav_path, format="wav")
         return wav_path
         print(f"Erreur lors de la conversion en WAV : {e}")
         return None
+def make_short_wav(input_wav_path, max_seconds=12):
+    """Crée un court extrait (début) du WAV pour la détection de langue."""
+    try:
+        audio = AudioSegment.from_wav(input_wav_path)
+        clip = audio[: max_seconds * 1000]  # millisecondes
+        short_path = os.path.splitext(input_wav_path)[0] + f"_head{max_seconds}s.wav"
+        clip.export(short_path, format="wav")
+        return short_path
+    except Exception as e:
+        print(f"Erreur lors de la création de l'extrait court : {e}")
+        return None
 def detect_language_on_upload(filepath):
+    """Détecte la langue d'un fichier audio en n'utilisant qu'un court extrait (Whisper + LangDetect)."""
     if filepath is None:
         return "auto"
     try:
         wav_filepath = convert_to_wav(filepath)
         if not wav_filepath:
             return "auto"
+        # Utiliser uniquement les premières secondes pour la détection (plus rapide)
+        short_wav = make_short_wav(wav_filepath, max_seconds=12)
+        if not short_wav:
+            short_wav = wav_filepath  # fallback si l'extrait échoue
+        # Transcription rapide d'un échantillon court (pas besoin de batch ni timestamps)
         outputs = pipe(
+            short_wav,
+            chunk_length_s=15,
             return_timestamps=False
         )
+        transcribed_text = outputs.get("text", "").strip()
+        # Si Whisper renvoie déjà une langue
+        whisper_lang = outputs.get("language")
+        if whisper_lang and isinstance(whisper_lang, str) and len(whisper_lang) <= 5:
+            return whisper_lang
         if len(transcribed_text) < 10:
             return "auto"
         # Utilise LangDetect sur le texte transcrit
         detected_lang = detect(transcribed_text)
         # Mapping des codes de langue LangDetect vers les codes Whisper
         lang_mapping = {
             'fr': 'fr',
+            'en': 'en',
             'es': 'es',
             'de': 'de',
             'it': 'it',
             'zh-cn': 'zh',
             'zh': 'zh'
         }
         return lang_mapping.get(detected_lang, "auto")
     except (LangDetectException, Exception) as e:
         print(f"Erreur lors de la détection de langue : {e}")
         return "auto"
 with gr.Blocks() as demo:
     gr.HTML("<div style='text-align:center;'><h1>Application de Transcription et Diarisation Audio</h1></div>")
     gr.Markdown("Transcrivez et diarisez automatiquement vos fichiers audio (WhatsApp, réunions, interviews, etc.) grâce à Whisper et pyannote, directement dans ce Space.")
     gr.Markdown("""