Spaces:
Running
Running
Commit
·
5e3e8ef
1
Parent(s):
2f1bcc3
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,8 @@ from utils import translate_to_english, detect_language, write, read, get_key
|
|
| 6 |
import whisperx as whisper
|
| 7 |
import json
|
| 8 |
import pandas as pd
|
|
|
|
|
|
|
| 9 |
|
| 10 |
if "btn1" not in st.session_state:
|
| 11 |
st.session_state["btn1"] = False
|
|
@@ -135,22 +137,37 @@ with input:
|
|
| 135 |
)
|
| 136 |
else:
|
| 137 |
temperature = [temperature]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# st.write(temperature)
|
| 139 |
submit = st.button("Submit", type="primary")
|
| 140 |
with output:
|
| 141 |
st.header("Output")
|
|
|
|
|
|
|
|
|
|
| 142 |
if submit:
|
| 143 |
if audio_uploaded is None:
|
| 144 |
# st.audio(audio_bytes, format="audio/wav")
|
| 145 |
audio_uploaded = audio_file
|
| 146 |
if audio_uploaded is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
# audio_bytes = audio_uploaded.read()
|
| 149 |
# st.audio(audio_bytes, format="audio/wav")
|
| 150 |
if language == "":
|
| 151 |
model = whisper.load_model(model_name)
|
| 152 |
with st.spinner("Detecting language..."):
|
| 153 |
-
detection = detect_language(
|
| 154 |
language = detection.get("detected_language")
|
| 155 |
del model
|
| 156 |
# st.write(language)
|
|
@@ -169,7 +186,7 @@ with output:
|
|
| 169 |
with st.container():
|
| 170 |
with st.spinner(f"Running with {model_name} model"):
|
| 171 |
result = model.transcribe(
|
| 172 |
-
|
| 173 |
language=language,
|
| 174 |
patience=patience,
|
| 175 |
initial_prompt=initial_prompt,
|
|
@@ -193,7 +210,7 @@ with output:
|
|
| 193 |
result["segments"],
|
| 194 |
model_a,
|
| 195 |
metadata,
|
| 196 |
-
|
| 197 |
device=device,
|
| 198 |
)
|
| 199 |
|
|
@@ -212,11 +229,11 @@ with output:
|
|
| 212 |
if text_json is None:
|
| 213 |
words_segments = result_aligned["word_segments"]
|
| 214 |
write(
|
| 215 |
-
|
| 216 |
dtype=transcription,
|
| 217 |
result_aligned=result_aligned,
|
| 218 |
)
|
| 219 |
-
trans_text = read(
|
| 220 |
trans.text_area(
|
| 221 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
| 222 |
)
|
|
@@ -248,3 +265,4 @@ with output:
|
|
| 248 |
lang.text_input(
|
| 249 |
"detected language", language_dict.get(language), disabled=True
|
| 250 |
)
|
|
|
|
|
|
| 6 |
import whisperx as whisper
|
| 7 |
import json
|
| 8 |
import pandas as pd
|
| 9 |
+
from pydub import AudioSegment
|
| 10 |
+
import os
|
| 11 |
|
| 12 |
if "btn1" not in st.session_state:
|
| 13 |
st.session_state["btn1"] = False
|
|
|
|
| 137 |
)
|
| 138 |
else:
|
| 139 |
temperature = [temperature]
|
| 140 |
+
try:
|
| 141 |
+
if len(temperature) == 0:
|
| 142 |
+
st.error("Choose correct value for temperature")
|
| 143 |
+
except:
|
| 144 |
+
pass
|
| 145 |
# st.write(temperature)
|
| 146 |
submit = st.button("Submit", type="primary")
|
| 147 |
with output:
|
| 148 |
st.header("Output")
|
| 149 |
+
import uuid
|
| 150 |
+
|
| 151 |
+
name = str(uuid.uuid1())
|
| 152 |
if submit:
|
| 153 |
if audio_uploaded is None:
|
| 154 |
# st.audio(audio_bytes, format="audio/wav")
|
| 155 |
audio_uploaded = audio_file
|
| 156 |
if audio_uploaded is not None:
|
| 157 |
+
if audio_uploaded.name.endswith(".wav"):
|
| 158 |
+
temp = AudioSegment.from_wav(audio_uploaded)
|
| 159 |
+
temp.export(f"{name}.wav")
|
| 160 |
+
|
| 161 |
+
if audio_uploaded.name.endswith(".mp3"):
|
| 162 |
+
temp = AudioSegment.from_wav(audio_uploaded)
|
| 163 |
+
temp.export(f"{name}.wav")
|
| 164 |
|
| 165 |
# audio_bytes = audio_uploaded.read()
|
| 166 |
# st.audio(audio_bytes, format="audio/wav")
|
| 167 |
if language == "":
|
| 168 |
model = whisper.load_model(model_name)
|
| 169 |
with st.spinner("Detecting language..."):
|
| 170 |
+
detection = detect_language(f"{name}.wav", model)
|
| 171 |
language = detection.get("detected_language")
|
| 172 |
del model
|
| 173 |
# st.write(language)
|
|
|
|
| 186 |
with st.container():
|
| 187 |
with st.spinner(f"Running with {model_name} model"):
|
| 188 |
result = model.transcribe(
|
| 189 |
+
f"{name}.wav",
|
| 190 |
language=language,
|
| 191 |
patience=patience,
|
| 192 |
initial_prompt=initial_prompt,
|
|
|
|
| 210 |
result["segments"],
|
| 211 |
model_a,
|
| 212 |
metadata,
|
| 213 |
+
f"{name}.wav",
|
| 214 |
device=device,
|
| 215 |
)
|
| 216 |
|
|
|
|
| 229 |
if text_json is None:
|
| 230 |
words_segments = result_aligned["word_segments"]
|
| 231 |
write(
|
| 232 |
+
f"{name}.wav",
|
| 233 |
dtype=transcription,
|
| 234 |
result_aligned=result_aligned,
|
| 235 |
)
|
| 236 |
+
trans_text = read(f"{name}.wav", transcription)
|
| 237 |
trans.text_area(
|
| 238 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
| 239 |
)
|
|
|
|
| 265 |
lang.text_input(
|
| 266 |
"detected language", language_dict.get(language), disabled=True
|
| 267 |
)
|
| 268 |
+
os.remove(f"{name}.wav")
|