Spaces:
Paused
Paused
| from TTS.api import TTS | |
| import json | |
| import gradio as gr | |
| from share_btn import community_icon_html, loading_icon_html, share_js | |
| import os | |
| import shutil | |
| import re | |
| import numpy as np | |
| from scipy.io import wavfile | |
| from scipy.io.wavfile import write, read | |
| from pydub import AudioSegment | |
| file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD") | |
| MAX_NUMBER_SENTENCES = 10 | |
| with open("characters.json", "r") as file: | |
| data = json.load(file) | |
| characters = [ | |
| { | |
| "image": item["image"], | |
| "title": item["title"], | |
| "speaker": item["speaker"] | |
| } | |
| for item in data | |
| ] | |
| tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=False) | |
| def load_hidden_mic(audio_in): | |
| print("USER RECORDED A NEW SAMPLE") | |
| library_path = 'bark_voices' | |
| folder_name = 'audio-0-100' | |
| second_folder_name = 'audio-0-100_cleaned' | |
| folder_path = os.path.join(library_path, folder_name) | |
| second_folder_path = os.path.join(library_path, second_folder_name) | |
| print("We need to clean previous util files, if needed:") | |
| if os.path.exists(folder_path): | |
| try: | |
| shutil.rmtree(folder_path) | |
| print( | |
| f"Successfully deleted the folder previously created from last raw recorded sample: {folder_path}") | |
| except OSError as e: | |
| print(f"Error: {folder_path} - {e.strerror}") | |
| else: | |
| print( | |
| f"OK, the folder a raw recorded sample does not exist: {folder_path}") | |
| if os.path.exists(second_folder_path): | |
| try: | |
| shutil.rmtree(second_folder_path) | |
| print( | |
| f"Successfully deleted the folder previously created from last cleaned recorded sample: {second_folder_path}") | |
| except OSError as e: | |
| print(f"Error: {second_folder_path} - {e.strerror}") | |
| else: | |
| print( | |
| f"Ok, the folderfor a cleaned recorded sample does not exist: {second_folder_path}") | |
| return audio_in | |
| def infer(hidden_numpy_audio): | |
| print(""" | |
| βββββ | |
| NEW INFERENCE: | |
| βββββββ | |
| """) | |
| prompt = "Hi mom, I have a broken tire and need a transfer. Can you send me some money please?" | |
| gr.Info("Generating audio from prompt") | |
| tts.tts_to_file(text=prompt, | |
| file_path="output.wav", | |
| voice_dir="bark_voices/", | |
| speaker=f"{file_name}") | |
| print("Preparing final waveform video ...") | |
| tts_video = gr.make_waveform(audio="output.wav") | |
| print(tts_video) | |
| print("FINISHED") | |
| return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path | |
| css = """ | |
| .mic-wrap > button { | |
| width: 100%; | |
| height: 60px; | |
| font-size: 1.4em!important; | |
| } | |
| .record-icon.svelte-1thnwz { | |
| display: flex; | |
| position: relative; | |
| margin-right: var(--size-2); | |
| width: unset; | |
| height: unset; | |
| } | |
| span.record-icon > span.dot.svelte-1thnwz { | |
| width: 20px!important; | |
| height: 20px!important; | |
| } | |
| """ | |
| html_header = """ | |
| <h1 style="text-align: center;">Coqui + Bark Voice Cloning</h1> | |
| <p style="text-align: center;"> | |
| Mimic any voice character in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TTS + Bark</a> demo ! <br /> | |
| Record a clean 20 seconds voice using the microphone provided.<br /> | |
| The hard-coded TTS prompt is: βHi mom, I have a broken tire and need an e-transfer. Can you send me some money please?β<br /> | |
| </p> | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| gr.Markdown(html_header) | |
| micro_in = gr.Audio( | |
| label="Record voice to clone", | |
| type="filepath", | |
| source="microphone", | |
| interactive=True | |
| ) | |
| hidden_audio_numpy = gr.Audio(type="numpy", visible=False) | |
| micro_submit_btn = gr.Button("Submit") | |
| micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[ | |
| hidden_audio_numpy], queue=False) | |
| cloned_out = gr.Audio( | |
| label="Text to speech output", | |
| visible=False | |
| ) | |
| video_out = gr.Video( | |
| label="Waveform video", | |
| elem_id="voice-video-out" | |
| ) | |
| micro_submit_btn.click( | |
| fn=infer, | |
| inputs=[hidden_audio_numpy], | |
| outputs=[cloned_out, video_out] | |
| ) | |
| demo.queue(api_open=False, max_size=10).launch() | |