sulaimank's picture
Create app.py
72603bc verified
raw
history blame
1.13 kB
import whisper
import gradio as gr
from whisper_decoder_with_lm import LMOptions
from huggingface_hub import hf_hub_download
# Download model + LM from HF
model_path = hf_hub_download("sulaimank/whisper-small-lg-lm", "whisper-small-CV-Fleurs-lg-313hrs-v1.pt")
lm_path = hf_hub_download("sulaimank/whisper-small-lg-lm", "5gram.bin")
# Configure LM options
LMOptions().lm_path = lm_path
LMOptions().lm_alpha = 0.5 # update with best value
LMOptions().lm_beta = 0.5 # update with best value
# Load the model
model = whisper.load_model(model_path)
def transcribe(audio):
decode_options = {
"language": "sw", # Swahili used as tokenizer workaround
"without_timestamps": True,
"temperature": 0.0,
"beam_size": 5,
}
result = model.transcribe(audio, **decode_options)
return result["text"]
interface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath", label="Upload Luganda Audio"),
outputs="text",
title="Luganda Whisper ASR with Language Model",
description="Fine-tuned Whisper-small + 5-gram LM for Luganda transcription.",
)
interface.launch()