Spaces:
Runtime error
Runtime error
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
app.py
CHANGED
|
@@ -540,15 +540,19 @@ Have a conversation with an AI using your reference voice!
|
|
| 540 |
chatbot_interface = gr.Chatbot(label="Conversation")
|
| 541 |
|
| 542 |
with gr.Row():
|
| 543 |
-
with gr.Column():
|
| 544 |
-
audio_output_chat = gr.Audio(autoplay=True)
|
| 545 |
with gr.Column():
|
| 546 |
audio_input_chat = gr.Microphone(
|
| 547 |
label="Speak your message",
|
| 548 |
type="filepath",
|
| 549 |
)
|
| 550 |
-
|
| 551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
conversation_state = gr.State(
|
| 554 |
value=[
|
|
@@ -561,13 +565,14 @@ Have a conversation with an AI using your reference voice!
|
|
| 561 |
|
| 562 |
# Modify process_audio_input to use model and tokenizer from state
|
| 563 |
@gpu_decorator
|
| 564 |
-
def process_audio_input(audio_path, history, conv_state):
|
| 565 |
-
"""Handle audio input from user"""
|
| 566 |
-
|
|
|
|
| 567 |
return history, conv_state, ""
|
| 568 |
|
| 569 |
-
|
| 570 |
-
|
| 571 |
|
| 572 |
if not text.strip():
|
| 573 |
return history, conv_state, ""
|
|
@@ -621,7 +626,7 @@ Have a conversation with an AI using your reference voice!
|
|
| 621 |
# Handle audio input
|
| 622 |
audio_input_chat.stop_recording(
|
| 623 |
process_audio_input,
|
| 624 |
-
inputs=[audio_input_chat, chatbot_interface, conversation_state],
|
| 625 |
outputs=[chatbot_interface, conversation_state],
|
| 626 |
).then(
|
| 627 |
generate_audio_response,
|
|
@@ -633,6 +638,36 @@ Have a conversation with an AI using your reference voice!
|
|
| 633 |
audio_input_chat,
|
| 634 |
)
|
| 635 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 636 |
# Handle clear button
|
| 637 |
clear_btn_chat.click(
|
| 638 |
clear_conversation,
|
|
|
|
| 540 |
chatbot_interface = gr.Chatbot(label="Conversation")
|
| 541 |
|
| 542 |
with gr.Row():
|
|
|
|
|
|
|
| 543 |
with gr.Column():
|
| 544 |
audio_input_chat = gr.Microphone(
|
| 545 |
label="Speak your message",
|
| 546 |
type="filepath",
|
| 547 |
)
|
| 548 |
+
audio_output_chat = gr.Audio(autoplay=True)
|
| 549 |
+
with gr.Column():
|
| 550 |
+
text_input_chat = gr.Textbox(
|
| 551 |
+
label="Type your message",
|
| 552 |
+
lines=1,
|
| 553 |
+
)
|
| 554 |
+
send_btn_chat = gr.Button("Send")
|
| 555 |
+
clear_btn_chat = gr.Button("Clear Conversation")
|
| 556 |
|
| 557 |
conversation_state = gr.State(
|
| 558 |
value=[
|
|
|
|
| 565 |
|
| 566 |
# Modify process_audio_input to use model and tokenizer from state
|
| 567 |
@gpu_decorator
|
| 568 |
+
def process_audio_input(audio_path, text, history, conv_state):
|
| 569 |
+
"""Handle audio or text input from user"""
|
| 570 |
+
|
| 571 |
+
if not audio_path and not text.strip():
|
| 572 |
return history, conv_state, ""
|
| 573 |
|
| 574 |
+
if audio_path:
|
| 575 |
+
text = preprocess_ref_audio_text(audio_path, text)[1]
|
| 576 |
|
| 577 |
if not text.strip():
|
| 578 |
return history, conv_state, ""
|
|
|
|
| 626 |
# Handle audio input
|
| 627 |
audio_input_chat.stop_recording(
|
| 628 |
process_audio_input,
|
| 629 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
| 630 |
outputs=[chatbot_interface, conversation_state],
|
| 631 |
).then(
|
| 632 |
generate_audio_response,
|
|
|
|
| 638 |
audio_input_chat,
|
| 639 |
)
|
| 640 |
|
| 641 |
+
# Handle text input
|
| 642 |
+
text_input_chat.submit(
|
| 643 |
+
process_audio_input,
|
| 644 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
| 645 |
+
outputs=[chatbot_interface, conversation_state],
|
| 646 |
+
).then(
|
| 647 |
+
generate_audio_response,
|
| 648 |
+
inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
|
| 649 |
+
outputs=[audio_output_chat],
|
| 650 |
+
).then(
|
| 651 |
+
lambda: None,
|
| 652 |
+
None,
|
| 653 |
+
text_input_chat,
|
| 654 |
+
)
|
| 655 |
+
|
| 656 |
+
# Handle send button
|
| 657 |
+
send_btn_chat.click(
|
| 658 |
+
process_audio_input,
|
| 659 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
| 660 |
+
outputs=[chatbot_interface, conversation_state],
|
| 661 |
+
).then(
|
| 662 |
+
generate_audio_response,
|
| 663 |
+
inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
|
| 664 |
+
outputs=[audio_output_chat],
|
| 665 |
+
).then(
|
| 666 |
+
lambda: None,
|
| 667 |
+
None,
|
| 668 |
+
text_input_chat,
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
# Handle clear button
|
| 672 |
clear_btn_chat.click(
|
| 673 |
clear_conversation,
|