RepeatAfterMe

Running on T4

App Files Files Community

frimelle HF Staff commited on 22 days ago

Commit

63d0469

1 Parent(s): 182a1f7

English-only setup

Browse files

Files changed (1) hide show

app.py +56 -37

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 from functools import lru_cache
 from transformers import pipeline
-# -------- Sentences to practice (customize freely) ----------
 SENTENCE_BANK = [
     "The quick brown fox jumps over the lazy dog.",
     "I promise to speak clearly and at a steady pace.",
@@ -20,12 +20,11 @@ SENTENCE_BANK = [
     "This microphone test checks my pronunciation accuracy.",
 ]
-# -------- Utilities ----------
 def normalize_text(t: str) -> str:
     t = t.lower()
-    # keep letters and numbers, replace anything else with space
-    t = re.sub(r"[^a-z0-9'äöüßçéèêáàóòúùîïôñ\-]+", " ", t)
-    # collapse whitespace
     t = re.sub(r"\s+", " ", t).strip()
     return t
@@ -36,22 +35,29 @@ def similarity_and_diff(ref: str, hyp: str):
     sm = difflib.SequenceMatcher(a=ref_tokens, b=hyp_tokens)
     ratio = sm.ratio()
-    # Build HTML with insertions/deletions highlighted
     out = []
     for op, i1, i2, j1, j2 in sm.get_opcodes():
         if op == "equal":
             out.append(" " + " ".join(ref_tokens[i1:i2]))
         elif op == "delete":
-            out.append(' <span style="background:#ffe0e0;text-decoration:line-through;">'
-                       + " ".join(ref_tokens[i1:i2]) + "</span>")
         elif op == "insert":
-            out.append(' <span style="background:#e0ffe0;">'
-                       + " ".join(hyp_tokens[j1:j2]) + "</span>")
         elif op == "replace":
-            out.append(' <span style="background:#ffe0e0;text-decoration:line-through;">'
-                       + " ".join(ref_tokens[i1:i2]) + "</span>")
-            out.append(' <span style="background:#e0ffe0;">'
-                       + " ".join(hyp_tokens[j1:j2]) + "</span>")
     html = '<div style="line-height:1.6;font-size:1rem;">' + "".join(out).strip() + "</div>"
     return ratio, html
@@ -66,7 +72,7 @@ def get_asr(model_id: str, device_preference: str):
         device = -1
     return pipeline(
         "automatic-speech-recognition",
-        model=model_id,
         device=device,
         chunk_length_s=30,
         return_timestamps=False,
@@ -75,14 +81,20 @@ def get_asr(model_id: str, device_preference: str):
 def gen_sentence():
     return random.choice(SENTENCE_BANK)
 def check_pronunciation(audio_path, target_sentence, model_id, device_pref, pass_threshold):
     if not target_sentence:
-        return gr.update(value=""), gr.update(value=""), gr.update(value=""), gr.update(value="Please generate a sentence first.")
     asr = get_asr(model_id, device_pref)
     try:
-        result = asr(audio_path)  # ✅ no language/task args for English-only models
         hyp_raw = result["text"].strip()
     except Exception as e:
         return "", "", "", f"Transcription failed: {e}"
@@ -102,36 +114,43 @@ def check_pronunciation(audio_path, target_sentence, model_id, device_pref, pass
     return hyp_raw, score, diff_html, summary
-with gr.Blocks(title="Say the Sentence") as demo:
     gr.Markdown(
         """
-        # 🎤 Say the Sentence
         1) Generate a sentence.
-        2) Press the mic to record yourself reading it.
-        3) Transcribe & check.
         """
     )
     with gr.Row():
         target = gr.Textbox(label="Target sentence", interactive=False, placeholder="Click 'Generate sentence'")
     with gr.Row():
         btn_gen = gr.Button("🎲 Generate sentence", variant="primary")
         btn_clear = gr.Button("🧹 Clear")
     with gr.Row():
         audio = gr.Audio(sources=["microphone"], type="filepath", label="Record your voice")
     with gr.Accordion("Advanced settings", open=False):
-    model_id = gr.Dropdown(
-        choices=[
-            "openai/whisper-tiny.en",  # fastest
-            "openai/whisper-base.en",  # slightly better accuracy
-            "distil-whisper/distil-small.en",  # optional
-        ],
-        value="openai/whisper-tiny.en",
-        label="ASR model (English only)",
-    )
-    device_pref = gr.Radio(choices=["auto", "cpu", "cuda"], value="auto", label="Device preference")
-    pass_threshold = gr.Slider(0.50, 1.00, value=0.85, step=0.01, label="Match threshold")
     with gr.Row():
         btn_check = gr.Button("✅ Transcribe & Check", variant="primary")
@@ -145,12 +164,12 @@ with gr.Blocks(title="Say the Sentence") as demo:
     # Events
     btn_gen.click(fn=gen_sentence, outputs=target)
-    btn_clear.click(fn=lambda: ("", "", "", "", ""), outputs=[target, hyp_out, score_out, diff_out, summary_out])
     btn_check.click(
-    fn=check_pronunciation,
-    inputs=[audio, target, model_id, device_pref, pass_threshold],
-    outputs=[hyp_out, score_out, diff_out, summary_out]
     )
 if __name__ == "__main__":
-    demo.launch()

 from functools import lru_cache
 from transformers import pipeline
+# ------------------- Sentence Bank (customize freely) -------------------
 SENTENCE_BANK = [
     "The quick brown fox jumps over the lazy dog.",
     "I promise to speak clearly and at a steady pace.",
     "This microphone test checks my pronunciation accuracy.",
 ]
+# ------------------- Utilities -------------------
 def normalize_text(t: str) -> str:
+    # English-only normalization: lowercase, keep letters/digits/' and -
     t = t.lower()
+    t = re.sub(r"[^a-z0-9'\-]+", " ", t)
     t = re.sub(r"\s+", " ", t).strip()
     return t
     sm = difflib.SequenceMatcher(a=ref_tokens, b=hyp_tokens)
     ratio = sm.ratio()
     out = []
     for op, i1, i2, j1, j2 in sm.get_opcodes():
         if op == "equal":
             out.append(" " + " ".join(ref_tokens[i1:i2]))
         elif op == "delete":
+            out.append(
+                ' <span style="background:#ffe0e0;text-decoration:line-through;">'
+                + " ".join(ref_tokens[i1:i2]) + "</span>"
+            )
         elif op == "insert":
+            out.append(
+                ' <span style="background:#e0ffe0;">'
+                + " ".join(hyp_tokens[j1:j2]) + "</span>"
+            )
         elif op == "replace":
+            out.append(
+                ' <span style="background:#ffe0e0;text-decoration:line-through;">'
+                + " ".join(ref_tokens[i1:i2]) + "</span>"
+            )
+            out.append(
+                ' <span style="background:#e0ffe0;">'
+                + " ".join(hyp_tokens[j1:j2]) + "</span>"
+            )
     html = '<div style="line-height:1.6;font-size:1rem;">' + "".join(out).strip() + "</div>"
     return ratio, html
         device = -1
     return pipeline(
         "automatic-speech-recognition",
+        model=model_id,           # use English-only Whisper models (.en)
         device=device,
         chunk_length_s=30,
         return_timestamps=False,
 def gen_sentence():
     return random.choice(SENTENCE_BANK)
+def clear_all():
+    # target, hyp_out, score_out, diff_out, summary_out
+    return "", "", "", "", ""
+# ------------------- Core Check (English-only) -------------------
 def check_pronunciation(audio_path, target_sentence, model_id, device_pref, pass_threshold):
     if not target_sentence:
+        return "", "", "", "Please generate a sentence first."
     asr = get_asr(model_id, device_pref)
     try:
+        # IMPORTANT: For English-only Whisper (.en), do NOT pass language/task args.
+        result = asr(audio_path)
         hyp_raw = result["text"].strip()
     except Exception as e:
         return "", "", "", f"Transcription failed: {e}"
     return hyp_raw, score, diff_html, summary
+# ------------------- UI -------------------
+with gr.Blocks(title="Say the Sentence (English)") as demo:
     gr.Markdown(
         """
+        # 🎤 Say the Sentence (English)
         1) Generate a sentence.
+        2) Record yourself reading it.
+        3) Transcribe & check your accuracy.
         """
     )
     with gr.Row():
         target = gr.Textbox(label="Target sentence", interactive=False, placeholder="Click 'Generate sentence'")
     with gr.Row():
         btn_gen = gr.Button("🎲 Generate sentence", variant="primary")
         btn_clear = gr.Button("🧹 Clear")
     with gr.Row():
         audio = gr.Audio(sources=["microphone"], type="filepath", label="Record your voice")
     with gr.Accordion("Advanced settings", open=False):
+        model_id = gr.Dropdown(
+            choices=[
+                "openai/whisper-tiny.en",        # fastest (CPU-friendly)
+                "openai/whisper-base.en",        # better accuracy, a bit slower
+                "distil-whisper/distil-small.en" # optional distil English model
+            ],
+            value="openai/whisper-tiny.en",
+            label="ASR model (English only)",
+        )
+        device_pref = gr.Radio(
+            choices=["auto", "cpu", "cuda"],
+            value="auto",
+            label="Device preference"
+        )
+        pass_threshold = gr.Slider(0.50, 1.00, value=0.85, step=0.01, label="Match threshold")
     with gr.Row():
         btn_check = gr.Button("✅ Transcribe & Check", variant="primary")
     # Events
     btn_gen.click(fn=gen_sentence, outputs=target)
+    btn_clear.click(fn=clear_all, outputs=[target, hyp_out, score_out, diff_out, summary_out])
     btn_check.click(
+        fn=check_pronunciation,
+        inputs=[audio, target, model_id, device_pref, pass_threshold],
+        outputs=[hyp_out, score_out, diff_out, summary_out]
     )
 if __name__ == "__main__":
+    demo.launch()