Spaces:
Running
Running
| import torch | |
| import os | |
| import streamlit as st | |
| from TTS.api import TTS | |
| from tempfile import NamedTemporaryFile | |
| # By using XTTS you agree to CPML license https://coqui.ai/cpml | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| def generate_audio(audio_file, text_input): | |
| # Initialize model | |
| model = "tts_models/multilingual/multi-dataset/xtts_v2" #coqui/XTTS-v2 | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| tts = TTS(model).to(device) | |
| with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: | |
| output_path = tmp_file.name | |
| tts.tts_to_file(text=text_input, speaker_wav=audio_file, language='en', file_path=output_path) | |
| return output_path | |
| def main(): | |
| # Title | |
| title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>""" | |
| st.markdown(title, unsafe_allow_html=True) | |
| # Subtitle | |
| title = f"""<h2 align="center" style="font-size: 1.2rem; margin-bottom: 2rem;">Make your favorite characters say anything!</h2>""" | |
| st.markdown(title, unsafe_allow_html=True) | |
| sample_files = { | |
| '': '', | |
| 'Stewie Griffin': 'sample_inputs/stewie.wav', | |
| 'Donald Trump': 'sample_inputs/trump.wav', | |
| 'Joe Rogan': 'sample_inputs/rogan.wav' | |
| } | |
| # Upload audio file | |
| uploaded_file = st.file_uploader('Add an audio (.wav) file of the voice you want to clone...', type=['wav']) | |
| if uploaded_file is None: | |
| selected_sample = st.selectbox('Or choose a sample:', list(sample_files.keys())) | |
| speaker_file = uploaded_file if uploaded_file is not None else sample_files[selected_sample] | |
| if speaker_file: | |
| st.header('Reference Audio') | |
| st.audio(speaker_file, format='audio/wav') | |
| # Input text | |
| text_input = st.text_area('What do you want your character to say? Try to keep the prompt around 2 sentences.') | |
| if st.button('Synthesize'): | |
| if text_input: | |
| try: | |
| with st.spinner('Synthesizing...'): | |
| output_path = generate_audio(speaker_file, text_input) | |
| st.header('Synthesized Audio') | |
| st.audio(output_path, format='audio/wav') | |
| except: | |
| st.error('There was an issue synthesizing the text. Please check the input and try again. Try to keep the input around 2 sentences, and less than 200 characters.') | |
| else: | |
| st.error('Please provide a text input!') | |
| if __name__ == '__main__': | |
| main() | |