Spaces:
Running
on
Zero
Running
on
Zero
| import spaces | |
| import gradio as gr | |
| import torch | |
| from TTS.api import TTS | |
| import os | |
| from pydub import AudioSegment | |
| import re | |
| os.makedirs("audio", exist_ok=True) | |
| # Agree to Coqui TTS license | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| # Auto-detect device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) | |
| # Function for long text voice cloning | |
| def clone(text, audio): | |
| # Split input into sentences/phrases | |
| sentences = re.split(r'(?<=[.!?]) +', text) | |
| final_audio = AudioSegment.silent(duration=0) | |
| for i, chunk in enumerate(sentences): | |
| if not chunk.strip(): | |
| continue | |
| temp_path = f"chunk_{i}.wav" | |
| tts.tts_to_file( | |
| text=chunk, | |
| speaker_wav=audio, | |
| language="en", | |
| file_path=temp_path | |
| ) | |
| final_audio += AudioSegment.from_wav(temp_path) | |
| # Merge chunks into one file | |
| output_path = "./output.wav" | |
| final_audio.export(output_path, format="wav") | |
| return output_path | |
| # UI | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan", neutral_hue="slate")) as demo: | |
| # Custom CSS | |
| gr.HTML(""" | |
| <style> | |
| body { | |
| background: linear-gradient(135deg, #0f172a, #1e293b); | |
| font-family: 'Inter', sans-serif; | |
| color: #f8fafc; | |
| } | |
| .gradio-container { | |
| max-width: 1200px; | |
| margin: auto; | |
| } | |
| .gr-block { | |
| background: #1e293b; | |
| border-radius: 16px; | |
| box-shadow: 0 8px 20px rgba(0,0,0,0.4); | |
| padding: 20px; | |
| transition: all 0.3s ease-in-out; | |
| } | |
| .gr-block:hover { | |
| box-shadow: 0 12px 28px rgba(0,0,0,0.6); | |
| } | |
| h1, h2, h3 { | |
| color: #06b6d4; | |
| font-weight: 700; | |
| } | |
| .gr-button.primary { | |
| background: linear-gradient(90deg, #06b6d4, #14b8a6); | |
| border: none; | |
| border-radius: 12px; | |
| font-weight: bold; | |
| transition: 0.3s; | |
| } | |
| .gr-button.primary:hover { | |
| background: linear-gradient(90deg, #14b8a6, #06b6d4); | |
| transform: scale(1.05); | |
| } | |
| .gr-textbox textarea { | |
| background: #0f172a !important; | |
| color: #f8fafc !important; | |
| border-radius: 12px; | |
| border: 1px solid #334155; | |
| } | |
| .gr-textbox textarea:focus { | |
| border-color: #06b6d4; | |
| outline: none !important; | |
| box-shadow: 0 0 10px rgba(6,182,212,0.5); | |
| } | |
| .gr-audio input, .gr-audio { | |
| border-radius: 12px !important; | |
| border: 1px solid #334155 !important; | |
| background: #0f172a !important; | |
| } | |
| </style> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown( | |
| """ | |
| # ποΈ Voice Clone Studio By Tahir Turk | |
| Clone any voice by uploading a short reference audio file | |
| and typing what you want it to say. | |
| **Powered by XTTS v2 β multilingual voice cloning.** | |
| """ | |
| ) | |
| text_input = gr.Textbox( | |
| label="Enter your text", | |
| placeholder="Type anything you'd like the cloned voice to say...", | |
| lines=6 | |
| ) | |
| audio_input = gr.Audio( | |
| type="filepath", | |
| label="Upload voice reference (WAV or MP3)" | |
| ) | |
| submit_btn = gr.Button("β¨ Generate Voice", variant="primary") | |
| with gr.Column(scale=1): | |
| output_audio = gr.Audio(type="filepath", label="π Generated Voice Output") | |
| gr.Markdown( | |
| """ | |
| --- | |
| β‘ **Tips for Best Results** | |
| - Use a **clean, clear** reference audio (5β15 seconds works best). | |
| - Long text will be split automatically for natural speech. | |
| - You can generate **minutes of audio** now without cutoff. | |
| --- | |
| """ | |
| ) | |
| with gr.Row(): | |
| gr.Examples( | |
| examples=[ | |
| ["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"], | |
| ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"], | |
| ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"], | |
| ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"], | |
| ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"], | |
| ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"], | |
| ], | |
| inputs=[text_input, audio_input], | |
| outputs=[output_audio], | |
| label="π Try with these sample voices" | |
| ) | |
| submit_btn.click(fn=clone, inputs=[text_input, audio_input], outputs=output_audio) | |
| demo.launch() | |