kmr_asr / app.py
amedcj's picture
Update app.py
28251cb verified
raw
history blame
2.86 kB
import subprocess
import sys
# Force upgrade gradio
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio>=4.44.0"])
from transformers import (
pipeline,
WhisperForConditionalGeneration,
AutoTokenizer,
WhisperFeatureExtractor,
GenerationConfig
)
import gradio as gr
import traceback
print("🔄 Starting ASR application...")
# Global ASR pipeline
asr = None
def load_asr_pipeline():
global asr
try:
print("🔧 Loading ASR model and configuration...")
model_id = "amedcj/whisper-kurmanji"
# Load generation config and remove forced_decoder_ids
gen_config = GenerationConfig.from_pretrained(model_id)
gen_config.forced_decoder_ids = None
# Load model and attach config
model = WhisperForConditionalGeneration.from_pretrained(model_id)
model.generation_config = gen_config
# Load tokenizer and feature extractor
tokenizer = AutoTokenizer.from_pretrained(model_id)
feature_extractor = WhisperFeatureExtractor.from_pretrained(model_id)
# Assemble pipeline
asr = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
device=-1 # Use CPU
)
print("✅ ASR pipeline loaded successfully.")
except Exception as e:
print(f"❌ Failed to load ASR model: {e}")
traceback.print_exc()
asr = None
# Load on startup
load_asr_pipeline()
def transcribe(audio_file):
print("🎙️ Transcribe function triggered.")
if not audio_file:
msg = "⚠️ Please upload an audio file."
print(msg)
return msg
if asr is None:
msg = "❌ ASR model not loaded properly."
print(msg)
return msg
try:
print(f"🔍 Transcribing: {audio_file}")
result = asr(audio_file)
print("✅ Transcription complete.")
return result["text"]
except Exception as e:
error_msg = f"❌ Error during transcription: {str(e)}"
print(error_msg)
traceback.print_exc()
return error_msg
print("🚧 Building Gradio interface...")
interface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath", label="🎤 Dengê Kurmancî barkirin / Upload Kurdish Audio"),
outputs=gr.Textbox(label="📝 Nivîsandin / Transcription"),
title="Dengê Kurmancî bo Nivîsandin – Kurdish ASR",
description="Dengê Kurmancî barkirin û nivîsa wê bibînin. / Upload Kurdish audio and get the transcription.",
submit_btn="Bişîne",
clear_btn="Paqij bike",
examples=[
["samples/kurmanji_sample.wav"]
]
)
print("🚀 Launching ASR interface...")
if __name__ == "__main__":
interface.launch()