Spaces:
Running
on
T4
Running
on
T4
English-only setup
Browse files
app.py
CHANGED
|
@@ -75,15 +75,14 @@ def get_asr(model_id: str, device_preference: str):
|
|
| 75 |
def gen_sentence():
|
| 76 |
return random.choice(SENTENCE_BANK)
|
| 77 |
|
| 78 |
-
def check_pronunciation(audio_path, target_sentence, model_id,
|
| 79 |
if not target_sentence:
|
| 80 |
return gr.update(value=""), gr.update(value=""), gr.update(value=""), gr.update(value="Please generate a sentence first.")
|
| 81 |
|
| 82 |
asr = get_asr(model_id, device_pref)
|
| 83 |
-
|
| 84 |
-
# transformers pipeline exposes it as 'generate_kwargs' for whisper models.
|
| 85 |
try:
|
| 86 |
-
result = asr(audio_path
|
| 87 |
hyp_raw = result["text"].strip()
|
| 88 |
except Exception as e:
|
| 89 |
return "", "", "", f"Transcription failed: {e}"
|
|
@@ -122,20 +121,17 @@ with gr.Blocks(title="Say the Sentence") as demo:
|
|
| 122 |
with gr.Row():
|
| 123 |
audio = gr.Audio(sources=["microphone"], type="filepath", label="Record your voice")
|
| 124 |
with gr.Accordion("Advanced settings", open=False):
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
lang = gr.Textbox(value="en", label="Language hint (e.g., 'en', 'de', 'fr')", info="Whisper language code; leave as 'en' for English-only models.")
|
| 137 |
-
device_pref = gr.Radio(choices=["auto", "cpu", "cuda"], value="auto", label="Device preference")
|
| 138 |
-
pass_threshold = gr.Slider(0.50, 1.00, value=0.85, step=0.01, label="Match threshold")
|
| 139 |
|
| 140 |
with gr.Row():
|
| 141 |
btn_check = gr.Button("✅ Transcribe & Check", variant="primary")
|
|
@@ -151,9 +147,9 @@ with gr.Blocks(title="Say the Sentence") as demo:
|
|
| 151 |
btn_gen.click(fn=gen_sentence, outputs=target)
|
| 152 |
btn_clear.click(fn=lambda: ("", "", "", "", ""), outputs=[target, hyp_out, score_out, diff_out, summary_out])
|
| 153 |
btn_check.click(
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
)
|
| 158 |
|
| 159 |
if __name__ == "__main__":
|
|
|
|
| 75 |
def gen_sentence():
|
| 76 |
return random.choice(SENTENCE_BANK)
|
| 77 |
|
| 78 |
+
def check_pronunciation(audio_path, target_sentence, model_id, device_pref, pass_threshold):
|
| 79 |
if not target_sentence:
|
| 80 |
return gr.update(value=""), gr.update(value=""), gr.update(value=""), gr.update(value="Please generate a sentence first.")
|
| 81 |
|
| 82 |
asr = get_asr(model_id, device_pref)
|
| 83 |
+
|
|
|
|
| 84 |
try:
|
| 85 |
+
result = asr(audio_path) # ✅ no language/task args for English-only models
|
| 86 |
hyp_raw = result["text"].strip()
|
| 87 |
except Exception as e:
|
| 88 |
return "", "", "", f"Transcription failed: {e}"
|
|
|
|
| 121 |
with gr.Row():
|
| 122 |
audio = gr.Audio(sources=["microphone"], type="filepath", label="Record your voice")
|
| 123 |
with gr.Accordion("Advanced settings", open=False):
|
| 124 |
+
model_id = gr.Dropdown(
|
| 125 |
+
choices=[
|
| 126 |
+
"openai/whisper-tiny.en", # fastest
|
| 127 |
+
"openai/whisper-base.en", # slightly better accuracy
|
| 128 |
+
"distil-whisper/distil-small.en", # optional
|
| 129 |
+
],
|
| 130 |
+
value="openai/whisper-tiny.en",
|
| 131 |
+
label="ASR model (English only)",
|
| 132 |
+
)
|
| 133 |
+
device_pref = gr.Radio(choices=["auto", "cpu", "cuda"], value="auto", label="Device preference")
|
| 134 |
+
pass_threshold = gr.Slider(0.50, 1.00, value=0.85, step=0.01, label="Match threshold")
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
with gr.Row():
|
| 137 |
btn_check = gr.Button("✅ Transcribe & Check", variant="primary")
|
|
|
|
| 147 |
btn_gen.click(fn=gen_sentence, outputs=target)
|
| 148 |
btn_clear.click(fn=lambda: ("", "", "", "", ""), outputs=[target, hyp_out, score_out, diff_out, summary_out])
|
| 149 |
btn_check.click(
|
| 150 |
+
fn=check_pronunciation,
|
| 151 |
+
inputs=[audio, target, model_id, device_pref, pass_threshold],
|
| 152 |
+
outputs=[hyp_out, score_out, diff_out, summary_out]
|
| 153 |
)
|
| 154 |
|
| 155 |
if __name__ == "__main__":
|