JotunnBurton commited on
Commit
6c3d54e
·
verified ·
1 Parent(s): da52ef1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -46
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import sys
2
  import logging
3
  import os
@@ -144,60 +146,33 @@ if __name__ == "__main__":
144
  noise_scale_w = gr.Slider(0.1, 2.0, 0.8, 0.1, label="Noise_W")
145
  length_scale = gr.Slider(0.1, 2.0, 1.0, 0.1, label="Length")
146
  language = gr.Dropdown(choices=["JP", "ZH", "EN", "mix", "auto"], value="JP", label="Language")
147
- style_text = gr.Textbox(label="Style Text", placeholder="(leave blank for none)")
148
- style_weight = gr.Slider(0, 1, 0.7, 0.1, label="Style Weight")
149
  btn = gr.Button("Generate Audio", variant="primary")
150
 
151
- with gr.Column():
152
- output_msg = gr.Textbox(label="Output Message")
153
- output_audio = gr.Audio(label="Output Audio")
154
-
155
- with gr.Column():
156
- with gr.Accordion("Semantic Fusion", open=False):
157
- gr.Markdown(
158
- value="Use auxiliary text semantics to assist speech generation (the language remains the same as the main text)\n\n"
159
- "**Note**: Do not use **command-style text** (e.g., 'Happy'), instead use **emotionally expressive text** (e.g., 'I'm so happy!!!')\n\n"
160
- "Effectiveness is uncertain; leave it blank to disable this feature\n\n"
161
- "**If the main text is mispronounced, try replacing the mispronounced characters with phonetically correct ones, and input the original text here with weight set to max to retain the original semantic intent while correcting pronunciation.**"
162
- )
163
- style_text = gr.Textbox(label="Auxiliary Text")
164
- style_weight = gr.Slider(
165
- minimum=0,
166
- maximum=1,
167
- value=0.7,
168
- step=0.1,
169
- label="Weight",
170
- info="Mixing ratio between main text and auxiliary text in BERT embedding. 0 means main text only, 1 means auxiliary text only.",
171
- )
172
- with gr.Row():
173
- with gr.Column():
174
- interval_between_sent = gr.Slider(
175
- minimum=0,
176
- maximum=5,
177
- value=0.2,
178
- step=0.1,
179
- label="Pause between sentences (seconds). Effective only when sentence splitting is enabled.",
180
- )
181
- interval_between_para = gr.Slider(
182
- minimum=0,
183
- maximum=10,
184
- value=1,
185
- step=0.1,
186
- label="Pause between paragraphs (seconds). Must be longer than sentence pause.",
187
- )
188
- opt_cut_by_sent = gr.Checkbox(
189
- label="Split by sentence — further splits text by sentence in addition to paragraph splitting"
190
- )
191
- slicer = gr.Button("Split and Generate", variant="primary")
192
-
193
-
194
  with gr.Column():
195
  output_msg = gr.Textbox(label="Output Message")
196
  output_audio = gr.Audio(label="Output Audio")
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  prompt_mode.change(lambda x: gr_util(x), inputs=[prompt_mode], outputs=[text_prompt, audio_prompt])
199
  audio_prompt.upload(lambda x: load_audio(x), inputs=[audio_prompt], outputs=[audio_prompt])
200
  btn.click(tts_fn, inputs=[input_text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale, language, audio_prompt, text_prompt, prompt_mode, style_text, style_weight], outputs=[output_msg, output_audio])
201
- slicer.click(lambda: ("Slicing logic not fully supported yet in this patch", None), inputs=[], outputs=[output_msg, output_audio])
202
 
203
  app.queue().launch(share=args.share)
 
1
+ # app.py (fully patched)
2
+
3
  import sys
4
  import logging
5
  import os
 
146
  noise_scale_w = gr.Slider(0.1, 2.0, 0.8, 0.1, label="Noise_W")
147
  length_scale = gr.Slider(0.1, 2.0, 1.0, 0.1, label="Length")
148
  language = gr.Dropdown(choices=["JP", "ZH", "EN", "mix", "auto"], value="JP", label="Language")
 
 
149
  btn = gr.Button("Generate Audio", variant="primary")
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  with gr.Column():
152
  output_msg = gr.Textbox(label="Output Message")
153
  output_audio = gr.Audio(label="Output Audio")
154
 
155
+ with gr.Column():
156
+ with gr.Accordion("Semantic Fusion", open=False):
157
+ gr.Markdown(
158
+ value="Use auxiliary text semantics to assist speech generation (language remains same as main text)\n\n"
159
+ "**Note**: Avoid using *command-style text* (e.g., 'Happy'). Use *emotionally rich text* (e.g., 'I'm so happy!!!')\n\n"
160
+ "Leave it blank to disable. \n\n"
161
+ "**If mispronunciations occur, try replacing characters and inputting the original here with weight set to 1.0 for semantic retention.**"
162
+ )
163
+ style_text = gr.Textbox(label="Auxiliary Text")
164
+ style_weight = gr.Slider(0, 1, 0.7, 0.1, label="Weight", info="Ratio between main and auxiliary BERT embeddings")
165
+
166
+ with gr.Row():
167
+ with gr.Column():
168
+ interval_between_sent = gr.Slider(0, 5, 0.2, 0.1, label="Pause between sentences (sec)")
169
+ interval_between_para = gr.Slider(0, 10, 1, 0.1, label="Pause between paragraphs (sec)")
170
+ opt_cut_by_sent = gr.Checkbox(label="Split by sentence")
171
+ slicer = gr.Button("Split and Generate", variant="primary")
172
+
173
  prompt_mode.change(lambda x: gr_util(x), inputs=[prompt_mode], outputs=[text_prompt, audio_prompt])
174
  audio_prompt.upload(lambda x: load_audio(x), inputs=[audio_prompt], outputs=[audio_prompt])
175
  btn.click(tts_fn, inputs=[input_text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale, language, audio_prompt, text_prompt, prompt_mode, style_text, style_weight], outputs=[output_msg, output_audio])
176
+ slicer.click(lambda: ("Slicing logic not yet implemented in this version", None), inputs=[], outputs=[output_msg, output_audio])
177
 
178
  app.queue().launch(share=args.share)