Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
5f635fb
1
Parent(s):
5c81b55
update
Browse files- app.py +5 -2
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import spaces
|
| 2 |
import torch
|
| 3 |
import os
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import traceback
|
| 6 |
from huggingface_hub import snapshot_download
|
|
@@ -10,7 +11,7 @@ from tts.infer_cli import MegaTTS3DiTInfer, convert_to_wav, cut_wav
|
|
| 10 |
def download_weights():
|
| 11 |
"""Download model weights from HuggingFace if not already present."""
|
| 12 |
repo_id = "mrfakename/MegaTTS3-VoiceCloning"
|
| 13 |
-
weights_dir = "
|
| 14 |
|
| 15 |
if not os.path.exists(weights_dir):
|
| 16 |
print("Downloading model weights from HuggingFace...")
|
|
@@ -62,7 +63,9 @@ def generate_speech(inp_audio, inp_text, infer_timestep, p_w, t_w):
|
|
| 62 |
|
| 63 |
|
| 64 |
with gr.Blocks(title="MegaTTS3 Voice Cloning") as demo:
|
| 65 |
-
gr.Markdown("#
|
|
|
|
|
|
|
| 66 |
gr.Markdown("Upload a reference audio clip and enter text to generate speech with the cloned voice.")
|
| 67 |
|
| 68 |
with gr.Row():
|
|
|
|
| 1 |
import spaces
|
| 2 |
import torch
|
| 3 |
import os
|
| 4 |
+
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
| 5 |
import gradio as gr
|
| 6 |
import traceback
|
| 7 |
from huggingface_hub import snapshot_download
|
|
|
|
| 11 |
def download_weights():
|
| 12 |
"""Download model weights from HuggingFace if not already present."""
|
| 13 |
repo_id = "mrfakename/MegaTTS3-VoiceCloning"
|
| 14 |
+
weights_dir = "checkpoints"
|
| 15 |
|
| 16 |
if not os.path.exists(weights_dir):
|
| 17 |
print("Downloading model weights from HuggingFace...")
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
with gr.Blocks(title="MegaTTS3 Voice Cloning") as demo:
|
| 66 |
+
gr.Markdown("# MegaTTS 3 Voice Cloning")
|
| 67 |
+
gr.Markdown("MegaTTS 3 is a text-to-speech model trained by ByteDance with exceptional voice cloning capabilities. The original authors did not release the WavVAE encoder, so voice cloning was not publicly available; however, thanks to [@ACoderPassBy](https://modelscope.cn/models/ACoderPassBy/MegaTTS-SFT)'s WavVAE encoder, we can now clone voices with MegaTTS 3!")
|
| 68 |
+
gr.Markdown("h/t to MysteryShack on Discord for the info about the unofficial WavVAE encoder!")
|
| 69 |
gr.Markdown("Upload a reference audio clip and enter text to generate speech with the cloned voice.")
|
| 70 |
|
| 71 |
with gr.Row():
|
requirements.txt
CHANGED
|
@@ -16,3 +16,4 @@ torchdiffeq==0.2.5
|
|
| 16 |
openai-whisper==20240930
|
| 17 |
httpx==0.28.1
|
| 18 |
gradio==5.23.1
|
|
|
|
|
|
| 16 |
openai-whisper==20240930
|
| 17 |
httpx==0.28.1
|
| 18 |
gradio==5.23.1
|
| 19 |
+
hf-transfer
|