Spaces:

tahirturk
/

VoiceCloner

Running on Zero

App Files Files Community

VoiceCloner / app.py

tahirturk

Update app.py

f12ab45 verified about 1 month ago

raw

history blame contribute delete

5.3 kB

	import spaces
	import gradio as gr
	import torch
	from TTS.api import TTS
	import os
	from pydub import AudioSegment
	import re

	os.makedirs("audio", exist_ok=True)

	# Agree to Coqui TTS license
	os.environ["COQUI_TOS_AGREED"] = "1"

	# Auto-detect device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

	# Function for long text voice cloning
	@spaces.GPU(enable_queue=True)
	def clone(text, audio):
	# Split input into sentences/phrases
	sentences = re.split(r'(?<=[.!?]) +', text)
	final_audio = AudioSegment.silent(duration=0)

	for i, chunk in enumerate(sentences):
	if not chunk.strip():
	continue
	temp_path = f"chunk_{i}.wav"
	tts.tts_to_file(
	text=chunk,
	speaker_wav=audio,
	language="en",
	file_path=temp_path
	)
	final_audio += AudioSegment.from_wav(temp_path)

	# Merge chunks into one file
	output_path = "./output.wav"
	final_audio.export(output_path, format="wav")
	return output_path

	# UI
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan", neutral_hue="slate")) as demo:

	# Custom CSS
	gr.HTML("""
	<style>
	body {
	background: linear-gradient(135deg, #0f172a, #1e293b);
	font-family: 'Inter', sans-serif;
	color: #f8fafc;
	}
	.gradio-container {
	max-width: 1200px;
	margin: auto;
	}
	.gr-block {
	background: #1e293b;
	border-radius: 16px;
	box-shadow: 0 8px 20px rgba(0,0,0,0.4);
	padding: 20px;
	transition: all 0.3s ease-in-out;
	}
	.gr-block:hover {
	box-shadow: 0 12px 28px rgba(0,0,0,0.6);
	}
	h1, h2, h3 {
	color: #06b6d4;
	font-weight: 700;
	}
	.gr-button.primary {
	background: linear-gradient(90deg, #06b6d4, #14b8a6);
	border: none;
	border-radius: 12px;
	font-weight: bold;
	transition: 0.3s;
	}
	.gr-button.primary:hover {
	background: linear-gradient(90deg, #14b8a6, #06b6d4);
	transform: scale(1.05);
	}
	.gr-textbox textarea {
	background: #0f172a !important;
	color: #f8fafc !important;
	border-radius: 12px;
	border: 1px solid #334155;
	}
	.gr-textbox textarea:focus {
	border-color: #06b6d4;
	outline: none !important;
	box-shadow: 0 0 10px rgba(6,182,212,0.5);
	}
	.gr-audio input, .gr-audio {
	border-radius: 12px !important;
	border: 1px solid #334155 !important;
	background: #0f172a !important;
	}
	</style>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(
	"""
	# 🎙️ Voice Clone Studio By Tahir Turk
	Clone any voice by uploading a short reference audio file
	and typing what you want it to say.
	Powered by XTTS v2 — multilingual voice cloning.
	"""
	)

	text_input = gr.Textbox(
	label="Enter your text",
	placeholder="Type anything you'd like the cloned voice to say...",
	lines=6
	)
	audio_input = gr.Audio(
	type="filepath",
	label="Upload voice reference (WAV or MP3)"
	)
	submit_btn = gr.Button("✨ Generate Voice", variant="primary")

	with gr.Column(scale=1):
	output_audio = gr.Audio(type="filepath", label="🔊 Generated Voice Output")
	gr.Markdown(
	"""
	---
	⚡ Tips for Best Results
	- Use a clean, clear reference audio (5–15 seconds works best).
	- Long text will be split automatically for natural speech.
	- You can generate minutes of audio now without cutoff.
	---
	"""
	)

	with gr.Row():
	gr.Examples(
	examples=[
	["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"],
	["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"],
	["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"],
	["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"],
	["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"],
	["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"],
	],
	inputs=[text_input, audio_input],
	outputs=[output_audio],
	label="🎭 Try with these sample voices"
	)

	submit_btn.click(fn=clone, inputs=[text_input, audio_input], outputs=output_audio)

	demo.launch()