Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| from audioldm import text_to_audio, build_model | |
| from share_btn import community_icon_html, loading_icon_html, share_js | |
| model_id="haoheliu/AudioLDM-S-Full" | |
| audioldm = None | |
| current_model_name = None | |
| # def predict(input, history=[]): | |
| # # tokenize the new input sentence | |
| # new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors='pt') | |
| # # append the new user input tokens to the chat history | |
| # bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1) | |
| # # generate a response | |
| # history = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist() | |
| # # convert the tokens to text, and then split the responses into lines | |
| # response = tokenizer.decode(history[0]).split("<|endoftext|>") | |
| # response = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)] # convert to tuples of list | |
| # return response, history | |
| def text2audio(text, duration, guidance_scale, random_seed, n_candidates, model_name="audioldm-m-text-ft"): | |
| global audioldm, current_model_name | |
| if audioldm is None or model_name != current_model_name: | |
| audioldm=build_model(model_name=model_name) | |
| current_model_name = model_name | |
| # print(text, length, guidance_scale) | |
| waveform = text_to_audio( | |
| latent_diffusion=audioldm, | |
| text=text, | |
| seed=random_seed, | |
| duration=duration, | |
| guidance_scale=guidance_scale, | |
| n_candidate_gen_per_text=int(n_candidates), | |
| ) # [bs, 1, samples] | |
| waveform = [ | |
| gr.make_waveform((16000, wave[0]), bg_image="bg.png") for wave in waveform | |
| ] | |
| # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))] | |
| if(len(waveform) == 1): | |
| waveform = waveform[0] | |
| return waveform | |
| iface = gr.Interface(fn=text2audio, inputs=[ | |
| gr.Textbox(value="A man is speaking in a huge room", max_lines=1), | |
| gr.Slider(2.5, 10, value=5, step=2.5), | |
| gr.Slider(0, 5, value=2.5, step=0.5), | |
| gr.Number(value=42), | |
| gr.Number(value=3) | |
| ], outputs="audio", | |
| allow_flagging="never" | |
| ) | |
| iface.launch(share=False) | |
| #iface.queue(max_size=10).launch(debug=True) | |
| # iface.launch(debug=True, share=True) | |