Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import logging
|
| 3 |
import os
|
|
@@ -144,60 +146,33 @@ if __name__ == "__main__":
|
|
| 144 |
noise_scale_w = gr.Slider(0.1, 2.0, 0.8, 0.1, label="Noise_W")
|
| 145 |
length_scale = gr.Slider(0.1, 2.0, 1.0, 0.1, label="Length")
|
| 146 |
language = gr.Dropdown(choices=["JP", "ZH", "EN", "mix", "auto"], value="JP", label="Language")
|
| 147 |
-
style_text = gr.Textbox(label="Style Text", placeholder="(leave blank for none)")
|
| 148 |
-
style_weight = gr.Slider(0, 1, 0.7, 0.1, label="Style Weight")
|
| 149 |
btn = gr.Button("Generate Audio", variant="primary")
|
| 150 |
|
| 151 |
-
with gr.Column():
|
| 152 |
-
output_msg = gr.Textbox(label="Output Message")
|
| 153 |
-
output_audio = gr.Audio(label="Output Audio")
|
| 154 |
-
|
| 155 |
-
with gr.Column():
|
| 156 |
-
with gr.Accordion("Semantic Fusion", open=False):
|
| 157 |
-
gr.Markdown(
|
| 158 |
-
value="Use auxiliary text semantics to assist speech generation (the language remains the same as the main text)\n\n"
|
| 159 |
-
"**Note**: Do not use **command-style text** (e.g., 'Happy'), instead use **emotionally expressive text** (e.g., 'I'm so happy!!!')\n\n"
|
| 160 |
-
"Effectiveness is uncertain; leave it blank to disable this feature\n\n"
|
| 161 |
-
"**If the main text is mispronounced, try replacing the mispronounced characters with phonetically correct ones, and input the original text here with weight set to max to retain the original semantic intent while correcting pronunciation.**"
|
| 162 |
-
)
|
| 163 |
-
style_text = gr.Textbox(label="Auxiliary Text")
|
| 164 |
-
style_weight = gr.Slider(
|
| 165 |
-
minimum=0,
|
| 166 |
-
maximum=1,
|
| 167 |
-
value=0.7,
|
| 168 |
-
step=0.1,
|
| 169 |
-
label="Weight",
|
| 170 |
-
info="Mixing ratio between main text and auxiliary text in BERT embedding. 0 means main text only, 1 means auxiliary text only.",
|
| 171 |
-
)
|
| 172 |
-
with gr.Row():
|
| 173 |
-
with gr.Column():
|
| 174 |
-
interval_between_sent = gr.Slider(
|
| 175 |
-
minimum=0,
|
| 176 |
-
maximum=5,
|
| 177 |
-
value=0.2,
|
| 178 |
-
step=0.1,
|
| 179 |
-
label="Pause between sentences (seconds). Effective only when sentence splitting is enabled.",
|
| 180 |
-
)
|
| 181 |
-
interval_between_para = gr.Slider(
|
| 182 |
-
minimum=0,
|
| 183 |
-
maximum=10,
|
| 184 |
-
value=1,
|
| 185 |
-
step=0.1,
|
| 186 |
-
label="Pause between paragraphs (seconds). Must be longer than sentence pause.",
|
| 187 |
-
)
|
| 188 |
-
opt_cut_by_sent = gr.Checkbox(
|
| 189 |
-
label="Split by sentence — further splits text by sentence in addition to paragraph splitting"
|
| 190 |
-
)
|
| 191 |
-
slicer = gr.Button("Split and Generate", variant="primary")
|
| 192 |
-
|
| 193 |
-
|
| 194 |
with gr.Column():
|
| 195 |
output_msg = gr.Textbox(label="Output Message")
|
| 196 |
output_audio = gr.Audio(label="Output Audio")
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
prompt_mode.change(lambda x: gr_util(x), inputs=[prompt_mode], outputs=[text_prompt, audio_prompt])
|
| 199 |
audio_prompt.upload(lambda x: load_audio(x), inputs=[audio_prompt], outputs=[audio_prompt])
|
| 200 |
btn.click(tts_fn, inputs=[input_text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale, language, audio_prompt, text_prompt, prompt_mode, style_text, style_weight], outputs=[output_msg, output_audio])
|
| 201 |
-
slicer.click(lambda: ("Slicing logic not
|
| 202 |
|
| 203 |
app.queue().launch(share=args.share)
|
|
|
|
| 1 |
+
# app.py (fully patched)
|
| 2 |
+
|
| 3 |
import sys
|
| 4 |
import logging
|
| 5 |
import os
|
|
|
|
| 146 |
noise_scale_w = gr.Slider(0.1, 2.0, 0.8, 0.1, label="Noise_W")
|
| 147 |
length_scale = gr.Slider(0.1, 2.0, 1.0, 0.1, label="Length")
|
| 148 |
language = gr.Dropdown(choices=["JP", "ZH", "EN", "mix", "auto"], value="JP", label="Language")
|
|
|
|
|
|
|
| 149 |
btn = gr.Button("Generate Audio", variant="primary")
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
with gr.Column():
|
| 152 |
output_msg = gr.Textbox(label="Output Message")
|
| 153 |
output_audio = gr.Audio(label="Output Audio")
|
| 154 |
|
| 155 |
+
with gr.Column():
|
| 156 |
+
with gr.Accordion("Semantic Fusion", open=False):
|
| 157 |
+
gr.Markdown(
|
| 158 |
+
value="Use auxiliary text semantics to assist speech generation (language remains same as main text)\n\n"
|
| 159 |
+
"**Note**: Avoid using *command-style text* (e.g., 'Happy'). Use *emotionally rich text* (e.g., 'I'm so happy!!!')\n\n"
|
| 160 |
+
"Leave it blank to disable. \n\n"
|
| 161 |
+
"**If mispronunciations occur, try replacing characters and inputting the original here with weight set to 1.0 for semantic retention.**"
|
| 162 |
+
)
|
| 163 |
+
style_text = gr.Textbox(label="Auxiliary Text")
|
| 164 |
+
style_weight = gr.Slider(0, 1, 0.7, 0.1, label="Weight", info="Ratio between main and auxiliary BERT embeddings")
|
| 165 |
+
|
| 166 |
+
with gr.Row():
|
| 167 |
+
with gr.Column():
|
| 168 |
+
interval_between_sent = gr.Slider(0, 5, 0.2, 0.1, label="Pause between sentences (sec)")
|
| 169 |
+
interval_between_para = gr.Slider(0, 10, 1, 0.1, label="Pause between paragraphs (sec)")
|
| 170 |
+
opt_cut_by_sent = gr.Checkbox(label="Split by sentence")
|
| 171 |
+
slicer = gr.Button("Split and Generate", variant="primary")
|
| 172 |
+
|
| 173 |
prompt_mode.change(lambda x: gr_util(x), inputs=[prompt_mode], outputs=[text_prompt, audio_prompt])
|
| 174 |
audio_prompt.upload(lambda x: load_audio(x), inputs=[audio_prompt], outputs=[audio_prompt])
|
| 175 |
btn.click(tts_fn, inputs=[input_text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale, language, audio_prompt, text_prompt, prompt_mode, style_text, style_weight], outputs=[output_msg, output_audio])
|
| 176 |
+
slicer.click(lambda: ("Slicing logic not yet implemented in this version", None), inputs=[], outputs=[output_msg, output_audio])
|
| 177 |
|
| 178 |
app.queue().launch(share=args.share)
|