|
|
import subprocess |
|
|
import sys |
|
|
|
|
|
|
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio>=4.44.0"]) |
|
|
|
|
|
from transformers import ( |
|
|
pipeline, |
|
|
WhisperForConditionalGeneration, |
|
|
AutoTokenizer, |
|
|
WhisperFeatureExtractor, |
|
|
GenerationConfig |
|
|
) |
|
|
import gradio as gr |
|
|
import traceback |
|
|
|
|
|
print("🔄 Starting ASR application...") |
|
|
|
|
|
|
|
|
asr = None |
|
|
|
|
|
def load_asr_pipeline(): |
|
|
global asr |
|
|
try: |
|
|
print("🔧 Loading ASR model and configuration...") |
|
|
|
|
|
model_id = "amedcj/whisper-kurmanji" |
|
|
|
|
|
|
|
|
gen_config = GenerationConfig.from_pretrained(model_id) |
|
|
gen_config.forced_decoder_ids = None |
|
|
|
|
|
|
|
|
model = WhisperForConditionalGeneration.from_pretrained(model_id) |
|
|
model.generation_config = gen_config |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
feature_extractor = WhisperFeatureExtractor.from_pretrained(model_id) |
|
|
|
|
|
|
|
|
asr = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model=model, |
|
|
tokenizer=tokenizer, |
|
|
feature_extractor=feature_extractor, |
|
|
device=-1 |
|
|
) |
|
|
print("✅ ASR pipeline loaded successfully.") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Failed to load ASR model: {e}") |
|
|
traceback.print_exc() |
|
|
asr = None |
|
|
|
|
|
|
|
|
load_asr_pipeline() |
|
|
|
|
|
def transcribe(audio_file): |
|
|
print("🎙️ Transcribe function triggered.") |
|
|
|
|
|
if not audio_file: |
|
|
msg = "⚠️ Please upload an audio file." |
|
|
print(msg) |
|
|
return msg |
|
|
|
|
|
if asr is None: |
|
|
msg = "❌ ASR model not loaded properly." |
|
|
print(msg) |
|
|
return msg |
|
|
|
|
|
try: |
|
|
print(f"🔍 Transcribing: {audio_file}") |
|
|
result = asr(audio_file) |
|
|
print("✅ Transcription complete.") |
|
|
return result["text"] |
|
|
except Exception as e: |
|
|
error_msg = f"❌ Error during transcription: {str(e)}" |
|
|
print(error_msg) |
|
|
traceback.print_exc() |
|
|
return error_msg |
|
|
|
|
|
print("🚧 Building Gradio interface...") |
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=transcribe, |
|
|
inputs=gr.Audio(type="filepath", label="🎤 Dengê Kurmancî barkirin / Upload Kurdish Audio"), |
|
|
outputs=gr.Textbox(label="📝 Nivîsandin / Transcription"), |
|
|
title="Dengê Kurmancî bo Nivîsandin – Kurdish ASR", |
|
|
description="Dengê Kurmancî barkirin û nivîsa wê bibînin. / Upload Kurdish audio and get the transcription.", |
|
|
submit_btn="Bişîne", |
|
|
clear_btn="Paqij bike", |
|
|
examples=[ |
|
|
["samples/kurmanji_sample.wav"] |
|
|
] |
|
|
) |
|
|
|
|
|
print("🚀 Launching ASR interface...") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |
|
|
|