Spaces:
Running
Running
Commit
·
4751966
1
Parent(s):
eeb50b0
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,6 @@ from utils import (
|
|
| 9 |
read,
|
| 10 |
get_key,
|
| 11 |
)
|
| 12 |
-
import subprocess
|
| 13 |
import whisperx as whisper
|
| 14 |
import json
|
| 15 |
import pandas as pd
|
|
@@ -164,33 +163,22 @@ with output:
|
|
| 164 |
if audio_uploaded is not None:
|
| 165 |
if audio_uploaded.name.endswith(".wav"):
|
| 166 |
temp = AudioSegment.from_wav(audio_uploaded)
|
| 167 |
-
|
| 168 |
-
temp.export(input)
|
| 169 |
if audio_uploaded.name.endswith(".mp3"):
|
| 170 |
-
input=f"{name}.mp3"
|
| 171 |
|
| 172 |
-
|
| 173 |
-
with open(input, "wb") as f:
|
| 174 |
|
| 175 |
-
f.write(audio_uploaded.getbuffer())
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
# subprocess.call(['ffmpeg', '-i', audio_uploaded.name,
|
| 180 |
-
# f'{name}.wav'])
|
| 181 |
-
# try:
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
|
| 188 |
-
|
| 189 |
-
|
| 190 |
if language == "":
|
| 191 |
model = whisper.load_model(model_name)
|
| 192 |
with st.spinner("Detecting language..."):
|
| 193 |
-
detection = detect_language(
|
| 194 |
language = detection.get("detected_language")
|
| 195 |
del model
|
| 196 |
if len(language) > 2:
|
|
@@ -204,7 +192,7 @@ with output:
|
|
| 204 |
with st.container():
|
| 205 |
with st.spinner(f"Running with {model_name} model"):
|
| 206 |
result = model.transcribe(
|
| 207 |
-
|
| 208 |
language=language,
|
| 209 |
patience=patience,
|
| 210 |
initial_prompt=initial_prompt,
|
|
@@ -228,15 +216,15 @@ with output:
|
|
| 228 |
result["segments"],
|
| 229 |
model_a,
|
| 230 |
metadata,
|
| 231 |
-
|
| 232 |
device=device,
|
| 233 |
)
|
| 234 |
write(
|
| 235 |
-
|
| 236 |
dtype=transcription,
|
| 237 |
result_aligned=result_aligned,
|
| 238 |
)
|
| 239 |
-
trans_text = read(
|
| 240 |
trans.text_area(
|
| 241 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
| 242 |
)
|
|
@@ -319,16 +307,16 @@ with output:
|
|
| 319 |
cont,
|
| 320 |
model_a,
|
| 321 |
metadata,
|
| 322 |
-
|
| 323 |
device=device,
|
| 324 |
)
|
| 325 |
words_segments = result_aligned["word_segments"]
|
| 326 |
write(
|
| 327 |
-
|
| 328 |
dtype=transcription,
|
| 329 |
result_aligned=result_aligned,
|
| 330 |
)
|
| 331 |
-
trans_text = read(
|
| 332 |
char_segments = []
|
| 333 |
word_segments = []
|
| 334 |
|
|
@@ -387,4 +375,4 @@ with output:
|
|
| 387 |
"detected language", language_dict.get(language), disabled=True
|
| 388 |
)
|
| 389 |
os.remove(f"{name}.wav")
|
| 390 |
-
os.remove(f"{json_filname}.json")
|
|
|
|
| 9 |
read,
|
| 10 |
get_key,
|
| 11 |
)
|
|
|
|
| 12 |
import whisperx as whisper
|
| 13 |
import json
|
| 14 |
import pandas as pd
|
|
|
|
| 163 |
if audio_uploaded is not None:
|
| 164 |
if audio_uploaded.name.endswith(".wav"):
|
| 165 |
temp = AudioSegment.from_wav(audio_uploaded)
|
| 166 |
+
temp.export(f"{name}.wav")
|
|
|
|
| 167 |
if audio_uploaded.name.endswith(".mp3"):
|
|
|
|
| 168 |
|
| 169 |
+
try:
|
|
|
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
+
temp = AudioSegment.from_file(audio_uploaded, format="mp3")
|
| 173 |
+
temp.export(f"{name}.wav")
|
| 174 |
+
except:
|
|
|
|
| 175 |
|
| 176 |
+
temp = AudioSegment.from_file(audio_uploaded, format="mp4")
|
| 177 |
+
temp.export(f"{name}.wav")
|
| 178 |
if language == "":
|
| 179 |
model = whisper.load_model(model_name)
|
| 180 |
with st.spinner("Detecting language..."):
|
| 181 |
+
detection = detect_language(f"{name}.wav", model)
|
| 182 |
language = detection.get("detected_language")
|
| 183 |
del model
|
| 184 |
if len(language) > 2:
|
|
|
|
| 192 |
with st.container():
|
| 193 |
with st.spinner(f"Running with {model_name} model"):
|
| 194 |
result = model.transcribe(
|
| 195 |
+
f"{name}.wav",
|
| 196 |
language=language,
|
| 197 |
patience=patience,
|
| 198 |
initial_prompt=initial_prompt,
|
|
|
|
| 216 |
result["segments"],
|
| 217 |
model_a,
|
| 218 |
metadata,
|
| 219 |
+
f"{name}.wav",
|
| 220 |
device=device,
|
| 221 |
)
|
| 222 |
write(
|
| 223 |
+
f"{name}.wav",
|
| 224 |
dtype=transcription,
|
| 225 |
result_aligned=result_aligned,
|
| 226 |
)
|
| 227 |
+
trans_text = read(f"{name}.wav", transcription)
|
| 228 |
trans.text_area(
|
| 229 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
| 230 |
)
|
|
|
|
| 307 |
cont,
|
| 308 |
model_a,
|
| 309 |
metadata,
|
| 310 |
+
f"{name}.wav",
|
| 311 |
device=device,
|
| 312 |
)
|
| 313 |
words_segments = result_aligned["word_segments"]
|
| 314 |
write(
|
| 315 |
+
f"{name}.wav",
|
| 316 |
dtype=transcription,
|
| 317 |
result_aligned=result_aligned,
|
| 318 |
)
|
| 319 |
+
trans_text = read(f"{name}.wav", transcription)
|
| 320 |
char_segments = []
|
| 321 |
word_segments = []
|
| 322 |
|
|
|
|
| 375 |
"detected language", language_dict.get(language), disabled=True
|
| 376 |
)
|
| 377 |
os.remove(f"{name}.wav")
|
| 378 |
+
os.remove(f"{json_filname}.json")
|