amedcj commited on
Commit
28251cb
·
verified ·
1 Parent(s): d33dce2

Update app.py

Browse files

Updated app.py

Files changed (1) hide show
  1. app.py +76 -37
app.py CHANGED
@@ -4,9 +4,6 @@ import sys
4
  # Force upgrade gradio
5
  subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio>=4.44.0"])
6
 
7
- import gradio as gr
8
- import numpy as np
9
-
10
  from transformers import (
11
  pipeline,
12
  WhisperForConditionalGeneration,
@@ -14,49 +11,91 @@ from transformers import (
14
  WhisperFeatureExtractor,
15
  GenerationConfig
16
  )
 
 
17
 
18
- print("🔧 Loading ASR components...")
19
 
20
- # Load generation config and remove forced_decoder_ids
21
- gen_config = GenerationConfig.from_pretrained("amedcj/whisper-kurmanji")
22
- gen_config.forced_decoder_ids = None
23
 
24
- # Load model and set generation config directly
25
- model = WhisperForConditionalGeneration.from_pretrained("amedcj/whisper-kurmanji")
26
- model.generation_config = gen_config
 
27
 
28
- # Load tokenizer and feature extractor
29
- tokenizer = AutoTokenizer.from_pretrained("amedcj/whisper-kurmanji")
30
- feature_extractor = WhisperFeatureExtractor.from_pretrained("amedcj/whisper-kurmanji")
31
 
32
- # Create the ASR pipeline
33
- asr = pipeline(
34
- "automatic-speech-recognition",
35
- model=model,
36
- tokenizer=tokenizer,
37
- feature_extractor=feature_extractor,
38
- device=-1 # CPU
39
- )
40
 
41
- def transcribe(audio_path):
42
- print("📥 Transcription triggered")
43
- if audio_path is None:
44
- return "Please upload an audio file."
45
 
46
- array, sampling_rate = librosa.load(audio_path, sr=None)
47
- result = asr({"array": array, "sampling_rate": sampling_rate})
48
- return result["text"]
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # Gradio Interface using Blocks with a Submit button (compatible with HF Spaces)
52
- with gr.Blocks() as demo:
53
- gr.Markdown("## 🗣️ Kurdish ASR Demo")
54
 
55
- audio_input = gr.Audio(type="filepath", label="🎤 Upload Kurdish Audio")
56
- submit_btn = gr.Button("Submit")
57
- output_text = gr.Textbox(label="📝 Transcription", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- submit_btn.click(fn=transcribe, inputs=audio_input, outputs=output_text)
60
 
61
- print("🚀 Launching Gradio app...")
62
- demo.launch()
 
4
  # Force upgrade gradio
5
  subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio>=4.44.0"])
6
 
 
 
 
7
  from transformers import (
8
  pipeline,
9
  WhisperForConditionalGeneration,
 
11
  WhisperFeatureExtractor,
12
  GenerationConfig
13
  )
14
+ import gradio as gr
15
+ import traceback
16
 
17
+ print("🔄 Starting ASR application...")
18
 
19
+ # Global ASR pipeline
20
+ asr = None
 
21
 
22
+ def load_asr_pipeline():
23
+ global asr
24
+ try:
25
+ print("🔧 Loading ASR model and configuration...")
26
 
27
+ model_id = "amedcj/whisper-kurmanji"
 
 
28
 
29
+ # Load generation config and remove forced_decoder_ids
30
+ gen_config = GenerationConfig.from_pretrained(model_id)
31
+ gen_config.forced_decoder_ids = None
 
 
 
 
 
32
 
33
+ # Load model and attach config
34
+ model = WhisperForConditionalGeneration.from_pretrained(model_id)
35
+ model.generation_config = gen_config
 
36
 
37
+ # Load tokenizer and feature extractor
38
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
39
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(model_id)
40
 
41
+ # Assemble pipeline
42
+ asr = pipeline(
43
+ "automatic-speech-recognition",
44
+ model=model,
45
+ tokenizer=tokenizer,
46
+ feature_extractor=feature_extractor,
47
+ device=-1 # Use CPU
48
+ )
49
+ print("✅ ASR pipeline loaded successfully.")
50
+
51
+ except Exception as e:
52
+ print(f"❌ Failed to load ASR model: {e}")
53
+ traceback.print_exc()
54
+ asr = None
55
 
56
+ # Load on startup
57
+ load_asr_pipeline()
 
58
 
59
+ def transcribe(audio_file):
60
+ print("🎙️ Transcribe function triggered.")
61
+
62
+ if not audio_file:
63
+ msg = "⚠️ Please upload an audio file."
64
+ print(msg)
65
+ return msg
66
+
67
+ if asr is None:
68
+ msg = "❌ ASR model not loaded properly."
69
+ print(msg)
70
+ return msg
71
+
72
+ try:
73
+ print(f"🔍 Transcribing: {audio_file}")
74
+ result = asr(audio_file)
75
+ print("✅ Transcription complete.")
76
+ return result["text"]
77
+ except Exception as e:
78
+ error_msg = f"❌ Error during transcription: {str(e)}"
79
+ print(error_msg)
80
+ traceback.print_exc()
81
+ return error_msg
82
+
83
+ print("🚧 Building Gradio interface...")
84
+
85
+ interface = gr.Interface(
86
+ fn=transcribe,
87
+ inputs=gr.Audio(type="filepath", label="🎤 Dengê Kurmancî barkirin / Upload Kurdish Audio"),
88
+ outputs=gr.Textbox(label="📝 Nivîsandin / Transcription"),
89
+ title="Dengê Kurmancî bo Nivîsandin – Kurdish ASR",
90
+ description="Dengê Kurmancî barkirin û nivîsa wê bibînin. / Upload Kurdish audio and get the transcription.",
91
+ submit_btn="Bişîne",
92
+ clear_btn="Paqij bike",
93
+ examples=[
94
+ ["samples/kurmanji_sample.wav"]
95
+ ]
96
+ )
97
 
98
+ print("🚀 Launching ASR interface...")
99
 
100
+ if __name__ == "__main__":
101
+ interface.launch()