import subprocess import sys # Force upgrade gradio (only for Hugging Face) import os if "SPACE_ID" in os.environ: subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio>=4.44.0"]) import gradio as gr from transformers import ( pipeline, WhisperForConditionalGeneration, AutoTokenizer, WhisperFeatureExtractor, GenerationConfig ) import traceback print("🚀 Starting Kurmanji ASR application...") # Global variables asr = None def load_asr_model(): global asr try: print("📥 Loading Whisper model for Kurmanji...") # Load generation config and remove forced_decoder_ids print("⚙️ Loading generation config...") gen_config = GenerationConfig.from_pretrained("amedcj/whisper-kurmanji") gen_config.forced_decoder_ids = None print("✓ Generation config loaded") # Load model and set generation config directly print("🤖 Loading Whisper model...") model = WhisperForConditionalGeneration.from_pretrained("amedcj/whisper-kurmanji") model.generation_config = gen_config print("✓ Model loaded successfully") # Load tokenizer explicitly print("📝 Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained("amedcj/whisper-kurmanji") print("✓ Tokenizer loaded successfully") # Load feature extractor explicitly print("🔍 Loading feature extractor...") feature_extractor = WhisperFeatureExtractor.from_pretrained("amedcj/whisper-kurmanji") print("✓ Feature extractor loaded successfully") # Create the pipeline with model, tokenizer and feature extractor print("🔧 Creating ASR pipeline...") asr = pipeline( "automatic-speech-recognition", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor, chunk_length_s=10, # try with 10s chunks stride_length_s=(2, 2), # optional smoothing device=-1 # CPU ) print("✅ ASR pipeline created successfully!") except Exception as e: print(f"❌ Error loading ASR model: {e}") traceback.print_exc() asr = None # Load the model at startup load_asr_model() def transcribe(audio_file): print("=== ASR Function Called ===") print(f"Audio file: {audio_file}") try: # Check if audio file is provided if audio_file is None: error_msg = "Ji kerema xwe dosyeyek deng bar bike. / Please upload an audio file." print(f"Error: {error_msg}") return error_msg # Check if ASR model is loaded if asr is None: error_msg = "Model nehatiye barkirin. / ASR model not loaded properly." print(f"Error: {error_msg}") return error_msg print("🎵 Processing audio file...") # Transcribe the audio result = asr(audio_file) transcription = result["text"] print(f"✅ Transcription completed: {transcription}") return transcription except Exception as e: error_msg = f"Çewtî: {str(e)} / Error: {str(e)}" print(f"❌ Error in transcription: {e}") traceback.print_exc() return error_msg def clear_inputs(): return None, "" # Create Gradio interface with Kurdish elements print("🎨 Creating Gradio interface...") demo = gr.Interface( fn=transcribe, inputs=gr.Audio( sources=["microphone", "upload"], type="filepath", label="🎤 Deng Tomar Bike an Dosye Bar Bike / Record Voice or Upload File" ), outputs=gr.Textbox( label="📝 Encam / Result", placeholder="Li vir nivîsa wergerandî dê xuya bibe... / Transcribed text will appear here...", lines=5, show_copy_button=True ), title="🗣️ Kurmancî ASR - Kurdish Speech Recognition", description=""" **Deng bo Nivîs / Speech to Text** **Formatên çêdibin:** WAV, MP3, M4A, FLAC """, submit_btn="Wergerîne / Transcribe", clear_btn="Paqij Bike / Clear", examples=[ # You can add example audio files here if you have them # ["path/to/example1.wav"], # ["path/to/example2.mp3"], ], cache_examples=False ) print("🚀 Launching Gradio app...") if __name__ == "__main__": demo.launch(ssr_mode=False)