voice_clone_v4

Paused

voice_clone_v4 / app.py

Amjad Hassoun

Upload 33 files

eb21a2f about 2 years ago

4.42 kB

	from TTS.api import TTS
	import json
	import gradio as gr
	from share_btn import community_icon_html, loading_icon_html, share_js
	import os
	import shutil
	import re

	import numpy as np
	from scipy.io import wavfile
	from scipy.io.wavfile import write, read
	from pydub import AudioSegment

	file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
	MAX_NUMBER_SENTENCES = 10

	with open("characters.json", "r") as file:
	data = json.load(file)
	characters = [
	{
	"image": item["image"],
	"title": item["title"],
	"speaker": item["speaker"]
	}
	for item in data
	]

	tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=False)


	def load_hidden_mic(audio_in):
	print("USER RECORDED A NEW SAMPLE")

	library_path = 'bark_voices'
	folder_name = 'audio-0-100'
	second_folder_name = 'audio-0-100_cleaned'

	folder_path = os.path.join(library_path, folder_name)
	second_folder_path = os.path.join(library_path, second_folder_name)

	print("We need to clean previous util files, if needed:")
	if os.path.exists(folder_path):
	try:
	shutil.rmtree(folder_path)
	print(
	f"Successfully deleted the folder previously created from last raw recorded sample: {folder_path}")
	except OSError as e:
	print(f"Error: {folder_path} - {e.strerror}")
	else:
	print(
	f"OK, the folder a raw recorded sample does not exist: {folder_path}")

	if os.path.exists(second_folder_path):
	try:
	shutil.rmtree(second_folder_path)
	print(
	f"Successfully deleted the folder previously created from last cleaned recorded sample: {second_folder_path}")
	except OSError as e:
	print(f"Error: {second_folder_path} - {e.strerror}")
	else:
	print(
	f"Ok, the folderfor a cleaned recorded sample does not exist: {second_folder_path}")

	return audio_in


	def infer(hidden_numpy_audio):
	print("""
	—————
	NEW INFERENCE:
	———————
	""")

	prompt = "Hi mom, I have a broken tire and need a transfer. Can you send me some money please?"

	gr.Info("Generating audio from prompt")
	tts.tts_to_file(text=prompt,
	file_path="output.wav",
	voice_dir="bark_voices/",
	speaker=f"{file_name}")

	print("Preparing final waveform video ...")
	tts_video = gr.make_waveform(audio="output.wav")
	print(tts_video)
	print("FINISHED")
	return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path


	css = """
	.mic-wrap > button {
	width: 100%;
	height: 60px;
	font-size: 1.4em!important;
	}
	.record-icon.svelte-1thnwz {
	display: flex;
	position: relative;
	margin-right: var(--size-2);
	width: unset;
	height: unset;
	}
	span.record-icon > span.dot.svelte-1thnwz {
	width: 20px!important;
	height: 20px!important;
	}
	"""
	html_header = """
	<h1 style="text-align: center;">Coqui + Bark Voice Cloning</h1>
	<p style="text-align: center;">
	Mimic any voice character in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TTS + Bark</a> demo ! <br />
	Record a clean 20 seconds voice using the microphone provided.<br />
	The hard-coded TTS prompt is: “Hi mom, I have a broken tire and need an e-transfer. Can you send me some money please?”<br />
	</p>
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown(html_header)

	micro_in = gr.Audio(
	label="Record voice to clone",
	type="filepath",
	source="microphone",
	interactive=True
	)
	hidden_audio_numpy = gr.Audio(type="numpy", visible=False)
	micro_submit_btn = gr.Button("Submit")

	micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[
	hidden_audio_numpy], queue=False)

	cloned_out = gr.Audio(
	label="Text to speech output",
	visible=False
	)

	video_out = gr.Video(
	label="Waveform video",
	elem_id="voice-video-out"
	)

	micro_submit_btn.click(
	fn=infer,
	inputs=[hidden_audio_numpy],
	outputs=[cloned_out, video_out]
	)

	demo.queue(api_open=False, max_size=10).launch()