amedcj commited on
Commit
1e44bd2
·
verified ·
1 Parent(s): 28251cb

Update app.py

Browse files

Updated app.py

Files changed (1) hide show
  1. app.py +134 -59
app.py CHANGED
@@ -4,6 +4,7 @@ import sys
4
  # Force upgrade gradio
5
  subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio>=4.44.0"])
6
 
 
7
  from transformers import (
8
  pipeline,
9
  WhisperForConditionalGeneration,
@@ -11,91 +12,165 @@ from transformers import (
11
  WhisperFeatureExtractor,
12
  GenerationConfig
13
  )
14
- import gradio as gr
15
  import traceback
16
 
17
- print("🔄 Starting ASR application...")
18
 
19
- # Global ASR pipeline
20
  asr = None
 
 
 
21
 
22
- def load_asr_pipeline():
23
- global asr
 
24
  try:
25
- print("🔧 Loading ASR model and configuration...")
26
-
27
- model_id = "amedcj/whisper-kurmanji"
28
-
29
  # Load generation config and remove forced_decoder_ids
30
- gen_config = GenerationConfig.from_pretrained(model_id)
 
31
  gen_config.forced_decoder_ids = None
32
-
33
- # Load model and attach config
34
- model = WhisperForConditionalGeneration.from_pretrained(model_id)
 
 
35
  model.generation_config = gen_config
36
-
37
- # Load tokenizer and feature extractor
38
- tokenizer = AutoTokenizer.from_pretrained(model_id)
39
- feature_extractor = WhisperFeatureExtractor.from_pretrained(model_id)
40
-
41
- # Assemble pipeline
 
 
 
 
 
 
 
 
42
  asr = pipeline(
43
  "automatic-speech-recognition",
44
  model=model,
45
  tokenizer=tokenizer,
46
  feature_extractor=feature_extractor,
47
- device=-1 # Use CPU
48
  )
49
- print("✅ ASR pipeline loaded successfully.")
50
-
51
  except Exception as e:
52
- print(f"❌ Failed to load ASR model: {e}")
53
  traceback.print_exc()
54
  asr = None
55
 
56
- # Load on startup
57
- load_asr_pipeline()
58
 
59
  def transcribe(audio_file):
60
- print("🎙️ Transcribe function triggered.")
 
61
 
62
- if not audio_file:
63
- msg = "⚠️ Please upload an audio file."
64
- print(msg)
65
- return msg
66
-
67
- if asr is None:
68
- msg = "❌ ASR model not loaded properly."
69
- print(msg)
70
- return msg
71
-
72
  try:
73
- print(f"🔍 Transcribing: {audio_file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  result = asr(audio_file)
75
- print("✅ Transcription complete.")
76
- return result["text"]
 
 
 
77
  except Exception as e:
78
- error_msg = f" Error during transcription: {str(e)}"
79
- print(error_msg)
80
  traceback.print_exc()
81
  return error_msg
82
 
83
- print("🚧 Building Gradio interface...")
 
84
 
85
- interface = gr.Interface(
86
- fn=transcribe,
87
- inputs=gr.Audio(type="filepath", label="🎤 Dengê Kurmancî barkirin / Upload Kurdish Audio"),
88
- outputs=gr.Textbox(label="📝 Nivîsandin / Transcription"),
89
- title="Dengê Kurmancî bo Nivîsandin Kurdish ASR",
90
- description="Dengê Kurmancî barkirin û nivîsa wê bibînin. / Upload Kurdish audio and get the transcription.",
91
- submit_btn="Bişîne",
92
- clear_btn="Paqij bike",
93
- examples=[
94
- ["samples/kurmanji_sample.wav"]
95
- ]
96
- )
97
-
98
- print("🚀 Launching ASR interface...")
99
-
100
- if __name__ == "__main__":
101
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # Force upgrade gradio
5
  subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio>=4.44.0"])
6
 
7
+ import gradio as gr
8
  from transformers import (
9
  pipeline,
10
  WhisperForConditionalGeneration,
 
12
  WhisperFeatureExtractor,
13
  GenerationConfig
14
  )
 
15
  import traceback
16
 
17
+ print("🚀 Starting Kurmanji ASR application...")
18
 
19
+ # Global variables
20
  asr = None
21
+ model = None
22
+ tokenizer = None
23
+ feature_extractor = None
24
 
25
+ def load_asr_model():
26
+ global asr, model, tokenizer, feature_extractor
27
+
28
  try:
29
+ print("📥 Loading Whisper model for Kurmanji...")
30
+
 
 
31
  # Load generation config and remove forced_decoder_ids
32
+ print("⚙️ Loading generation config...")
33
+ gen_config = GenerationConfig.from_pretrained("amedcj/whisper-kurmanji")
34
  gen_config.forced_decoder_ids = None
35
+ print("✓ Generation config loaded")
36
+
37
+ # Load model and set generation config directly
38
+ print("🤖 Loading Whisper model...")
39
+ model = WhisperForConditionalGeneration.from_pretrained("amedcj/whisper-kurmanji")
40
  model.generation_config = gen_config
41
+ print("✓ Model loaded successfully")
42
+
43
+ # Load tokenizer explicitly
44
+ print("📝 Loading tokenizer...")
45
+ tokenizer = AutoTokenizer.from_pretrained("amedcj/whisper-kurmanji")
46
+ print("✓ Tokenizer loaded successfully")
47
+
48
+ # Load feature extractor explicitly
49
+ print("🔍 Loading feature extractor...")
50
+ feature_extractor = WhisperFeatureExtractor.from_pretrained("amedcj/whisper-kurmanji")
51
+ print("✓ Feature extractor loaded successfully")
52
+
53
+ # Create the pipeline with model, tokenizer and feature extractor
54
+ print("🔧 Creating ASR pipeline...")
55
  asr = pipeline(
56
  "automatic-speech-recognition",
57
  model=model,
58
  tokenizer=tokenizer,
59
  feature_extractor=feature_extractor,
60
+ device=-1 # CPU
61
  )
62
+ print("✅ ASR pipeline created successfully!")
63
+
64
  except Exception as e:
65
+ print(f"❌ Error loading ASR model: {e}")
66
  traceback.print_exc()
67
  asr = None
68
 
69
+ # Load the model at startup
70
+ load_asr_model()
71
 
72
  def transcribe(audio_file):
73
+ print("=== ASR Function Called ===")
74
+ print(f"Audio file: {audio_file}")
75
 
 
 
 
 
 
 
 
 
 
 
76
  try:
77
+ # Check if audio file is provided
78
+ if audio_file is None:
79
+ error_msg = "Ji kerema xwe dosyeyek deng bar bike. / Please upload an audio file."
80
+ print(f"Error: {error_msg}")
81
+ return error_msg
82
+
83
+ # Check if ASR model is loaded
84
+ if asr is None:
85
+ error_msg = "Model nehatiye barkirin. / ASR model not loaded properly."
86
+ print(f"Error: {error_msg}")
87
+ return error_msg
88
+
89
+ print("🎵 Processing audio file...")
90
+
91
+ # Transcribe the audio
92
  result = asr(audio_file)
93
+ transcription = result["text"]
94
+
95
+ print(f"✅ Transcription completed: {transcription}")
96
+ return transcription
97
+
98
  except Exception as e:
99
+ error_msg = f"Çewtî: {str(e)} / Error: {str(e)}"
100
+ print(f"❌ Error in transcription: {e}")
101
  traceback.print_exc()
102
  return error_msg
103
 
104
+ # Create Gradio interface with Kurdish elements
105
+ print("🎨 Creating Gradio interface...")
106
 
107
+ with gr.Blocks(title="Kurmancî ASR - Kurdish Speech Recognition") as demo:
108
+
109
+ gr.Markdown("""
110
+ # 🗣️ Kurmancî ASR - Kurdish Speech Recognition
111
+ ### Deng bo Nivîs / Speech to Text
112
+
113
+ Dengê xwe bi Kurmancî tomar bike û wekî nivîs bibîne.
114
+ Record your voice in Kurmanji Kurdish and convert it to text.
115
+ """)
116
+
117
+ with gr.Row():
118
+ with gr.Column():
119
+ audio_input = gr.Audio(
120
+ sources=["microphone", "upload"], # Enable both mic recording and file upload
121
+ type="filepath",
122
+ label="🎤 Dengî tomar bike yan dosyeyekê lê bar bike / Record Voice or Upload File"
123
+ )
124
+
125
+ submit_btn = gr.Button(
126
+ "Veguherîne / Transcribe",
127
+ variant="primary",
128
+ size="lg"
129
+ )
130
+
131
+ clear_btn = gr.Button(
132
+ "Paqij Bike / Clear",
133
+ variant="secondary"
134
+ )
135
+
136
+ with gr.Column():
137
+ output_text = gr.Textbox(
138
+ label="📝 Encam / Result",
139
+ placeholder="Li virê dê nivîsa veguherandî xuya bibe... / Transcribed text will appear here...",
140
+ lines=10,
141
+ interactive=True, # Allow users to edit the result
142
+ show_copy_button=True
143
+ )
144
+
145
+ # Add examples section
146
+ gr.Markdown("### 💡 Mînak / Examples")
147
+ gr.Markdown("""
148
+ **Çawa bikar bînin / How to use:**
149
+ 1. **Tomar bikin / Record:** Li ser butona mîkrofonê bitikînin û axaftin dest pê bikin
150
+ 2. **An dosye bar bikin / Or upload:** Dosyeyek dengî (.wav, .mp3, .m4a) hilbijêrin
151
+ 3. **Wergerînin / Transcribe:** Li ser "Wergerîne" bitikînin
152
+
153
+ **Supported formats:** WAV, MP3, M4A, FLAC
154
+ """)
155
+
156
+ # Event handlers
157
+ submit_btn.click(
158
+ fn=transcribe,
159
+ inputs=audio_input,
160
+ outputs=output_text,
161
+ show_progress=True
162
+ )
163
+
164
+ clear_btn.click(
165
+ fn=lambda: (None, ""),
166
+ inputs=[],
167
+ outputs=[audio_input, output_text]
168
+ )
169
+
170
+ # Auto-transcribe when audio is recorded/uploaded (optional)
171
+ audio_input.change(
172
+ fn=transcribe,
173
+ inputs=audio_input,
174
+ outputs=output_text,
175
+ show_progress=True
176
+ )