zameer2662 commited on
Commit
bd7c702
·
verified ·
1 Parent(s): fb3e959

Update gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +25 -22
gradio_app.py CHANGED
@@ -1,13 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
3
  import torch
4
  import torchaudio
5
  import gradio as gr
6
- import warnings
7
  import numpy as np
8
  import tempfile
9
  from einops import rearrange
10
- warnings.filterwarnings("ignore", message=".*weights_only=False.*", category=FutureWarning)
11
  from vocos import Vocos
12
  from pydub import AudioSegment, silence
13
  from model import CFM, UNetT, DiT, MMDiT
@@ -20,7 +32,6 @@ from model.utils import (
20
  )
21
  from transformers import pipeline
22
  import librosa
23
- import click
24
  import soundfile as sf
25
 
26
  try:
@@ -79,6 +90,7 @@ speed = 1.0
79
  fix_duration = None
80
 
81
 
 
82
  def load_model(repo_name, exp_name, model_cls, model_cfg, ckpt_step):
83
  ckpt_path = str(cached_path(f"hf://SWivid/{repo_name}/{exp_name}/model_{ckpt_step}.safetensors"))
84
  # ckpt_path = f"ckpts/{exp_name}/model_{ckpt_step}.pt" # .pt | .safetensors
@@ -212,6 +224,7 @@ def split_text_into_batches(text, max_chars=200, split_words=SPLIT_WORDS):
212
 
213
  return batches
214
 
 
215
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, progress=gr.Progress()):
216
  if exp_name == "F5-TTS":
217
  ema_model = F5TTS_ema_model
@@ -296,6 +309,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
296
 
297
  return (target_sample_rate, final_wave), spectrogram_path
298
 
 
299
  def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, custom_split_words=''):
300
  if not custom_split_words.strip():
301
  custom_words = [word.strip() for word in custom_split_words.split(',')]
@@ -345,6 +359,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, custom_s
345
  gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
346
  return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence)
347
 
 
348
  def generate_podcast(script, speaker1_name, ref_audio1, ref_text1, speaker2_name, ref_audio2, ref_text2, exp_name, remove_silence):
349
  # Split the script into speaker blocks
350
  speaker_pattern = re.compile(f"^({re.escape(speaker1_name)}|{re.escape(speaker2_name)}):", re.MULTILINE)
@@ -681,6 +696,7 @@ with gr.Blocks() as app_emotional:
681
  # Output audio
682
  audio_output_emotional = gr.Audio(label="Synthesized Audio")
683
 
 
684
  def generate_emotional_speech(
685
  regular_audio,
686
  regular_ref_text,
@@ -803,24 +819,11 @@ If you're having issues, try converting your reference audio to WAV or MP3, clip
803
  )
804
  gr.TabbedInterface([app_tts, app_podcast, app_emotional, app_credits], ["TTS", "Podcast", "Multi-Style", "Credits"])
805
 
806
- @click.command()
807
- @click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
808
- @click.option("--host", "-H", default=None, help="Host to run the app on")
809
- @click.option(
810
- "--share",
811
- "-s",
812
- default=False,
813
- is_flag=True,
814
- help="Share the app via Gradio share link",
815
- )
816
- @click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
817
- def main(port, host, share, api):
818
- global app
819
- print(f"Starting app...")
820
- app.queue(api_open=api).launch(
821
- server_name=host, server_port=port, share=share, show_api=api
822
  )
823
 
824
-
825
- if __name__ == "__main__":
826
- main()
 
1
+ # Requirements for Hugging Face Spaces:
2
+ # torch
3
+ # torchaudio
4
+ # gradio
5
+ # numpy
6
+ # einops
7
+ # vocos
8
+ # pydub
9
+ # cached_path
10
+ # transformers
11
+ # librosa
12
+ # soundfile
13
+ # spaces
14
+
15
  import os
16
  import re
17
  import torch
18
  import torchaudio
19
  import gradio as gr
 
20
  import numpy as np
21
  import tempfile
22
  from einops import rearrange
 
23
  from vocos import Vocos
24
  from pydub import AudioSegment, silence
25
  from model import CFM, UNetT, DiT, MMDiT
 
32
  )
33
  from transformers import pipeline
34
  import librosa
 
35
  import soundfile as sf
36
 
37
  try:
 
90
  fix_duration = None
91
 
92
 
93
+ @gpu_decorator
94
  def load_model(repo_name, exp_name, model_cls, model_cfg, ckpt_step):
95
  ckpt_path = str(cached_path(f"hf://SWivid/{repo_name}/{exp_name}/model_{ckpt_step}.safetensors"))
96
  # ckpt_path = f"ckpts/{exp_name}/model_{ckpt_step}.pt" # .pt | .safetensors
 
224
 
225
  return batches
226
 
227
+ @gpu_decorator
228
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, progress=gr.Progress()):
229
  if exp_name == "F5-TTS":
230
  ema_model = F5TTS_ema_model
 
309
 
310
  return (target_sample_rate, final_wave), spectrogram_path
311
 
312
+ @gpu_decorator
313
  def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, custom_split_words=''):
314
  if not custom_split_words.strip():
315
  custom_words = [word.strip() for word in custom_split_words.split(',')]
 
359
  gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
360
  return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence)
361
 
362
+ @gpu_decorator
363
  def generate_podcast(script, speaker1_name, ref_audio1, ref_text1, speaker2_name, ref_audio2, ref_text2, exp_name, remove_silence):
364
  # Split the script into speaker blocks
365
  speaker_pattern = re.compile(f"^({re.escape(speaker1_name)}|{re.escape(speaker2_name)}):", re.MULTILINE)
 
696
  # Output audio
697
  audio_output_emotional = gr.Audio(label="Synthesized Audio")
698
 
699
+ @gpu_decorator
700
  def generate_emotional_speech(
701
  regular_audio,
702
  regular_ref_text,
 
819
  )
820
  gr.TabbedInterface([app_tts, app_podcast, app_emotional, app_credits], ["TTS", "Podcast", "Multi-Style", "Credits"])
821
 
822
+ print(f"Starting app on Hugging Face Spaces...")
823
+ app.queue(api_open=True).launch(
824
+ server_name="0.0.0.0",
825
+ server_port=7860,
826
+ share=False,
827
+ show_api=True
 
 
 
 
 
 
 
 
 
 
828
  )
829