Spaces:

sulaimank
/

Luganda-Whisper-with-LM

Sleeping

Luganda-Whisper-with-LM / app.py

Create app.py

72603bc verified 8 months ago

1.13 kB

	import whisper
	import gradio as gr
	from whisper_decoder_with_lm import LMOptions
	from huggingface_hub import hf_hub_download

	# Download model + LM from HF
	model_path = hf_hub_download("sulaimank/whisper-small-lg-lm", "whisper-small-CV-Fleurs-lg-313hrs-v1.pt")
	lm_path = hf_hub_download("sulaimank/whisper-small-lg-lm", "5gram.bin")

	# Configure LM options
	LMOptions().lm_path = lm_path
	LMOptions().lm_alpha = 0.5 # update with best value
	LMOptions().lm_beta = 0.5 # update with best value

	# Load the model
	model = whisper.load_model(model_path)

	def transcribe(audio):
	decode_options = {
	"language": "sw", # Swahili used as tokenizer workaround
	"without_timestamps": True,
	"temperature": 0.0,
	"beam_size": 5,
	}
	result = model.transcribe(audio, **decode_options)
	return result["text"]

	interface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath", label="Upload Luganda Audio"),
	outputs="text",
	title="Luganda Whisper ASR with Language Model",
	description="Fine-tuned Whisper-small + 5-gram LM for Luganda transcription.",
	)

	interface.launch()