Spaces:
Build error
Build error
Update infer-web.py
Browse files- infer-web.py +352 -347
infer-web.py
CHANGED
|
@@ -69,6 +69,9 @@ import time
|
|
| 69 |
import csv
|
| 70 |
from shlex import quote as SQuote
|
| 71 |
|
|
|
|
|
|
|
|
|
|
| 72 |
logger = logging.getLogger(__name__)
|
| 73 |
|
| 74 |
RQuote = lambda val: SQuote(str(val))
|
|
@@ -2451,384 +2454,386 @@ def GradioSetup():
|
|
| 2451 |
outputs=[advanced_settings_batch],
|
| 2452 |
)
|
| 2453 |
|
| 2454 |
-
with gr.TabItem(i18n("Train"), visible=False):
|
| 2455 |
|
| 2456 |
-
|
| 2457 |
-
|
| 2458 |
-
|
| 2459 |
-
|
| 2460 |
-
|
| 2461 |
-
|
| 2462 |
-
|
| 2463 |
-
|
| 2464 |
-
|
| 2465 |
-
|
| 2466 |
-
|
| 2467 |
-
|
| 2468 |
-
|
| 2469 |
-
|
| 2470 |
-
|
| 2471 |
-
|
| 2472 |
-
|
| 2473 |
-
|
| 2474 |
-
|
| 2475 |
-
label=i18n("Version:"),
|
| 2476 |
-
choices=["v1", "v2"],
|
| 2477 |
-
value="v2",
|
| 2478 |
-
interactive=True,
|
| 2479 |
-
visible=True,
|
| 2480 |
-
)
|
| 2481 |
-
|
| 2482 |
-
with gr.Column():
|
| 2483 |
-
np7 = gr.Slider(
|
| 2484 |
-
minimum=1,
|
| 2485 |
-
maximum=config.n_cpu,
|
| 2486 |
-
step=1,
|
| 2487 |
-
label=i18n("Number of CPU processes:"),
|
| 2488 |
-
value=config.n_cpu,
|
| 2489 |
interactive=True,
|
| 2490 |
)
|
| 2491 |
-
|
| 2492 |
-
|
| 2493 |
-
|
| 2494 |
-
|
| 2495 |
-
label=i18n("Specify the model ID:"),
|
| 2496 |
-
value=0,
|
| 2497 |
interactive=True,
|
|
|
|
| 2498 |
)
|
| 2499 |
-
|
| 2500 |
-
|
| 2501 |
-
|
| 2502 |
-
|
| 2503 |
-
|
| 2504 |
-
|
| 2505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2506 |
)
|
| 2507 |
-
|
| 2508 |
-
|
| 2509 |
-
|
| 2510 |
-
|
|
|
|
|
|
|
|
|
|
| 2511 |
)
|
| 2512 |
-
|
| 2513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2514 |
)
|
| 2515 |
-
|
| 2516 |
-
|
| 2517 |
-
|
| 2518 |
-
|
| 2519 |
-
|
| 2520 |
-
|
| 2521 |
-
|
| 2522 |
-
|
| 2523 |
-
|
| 2524 |
-
|
| 2525 |
-
|
|
|
|
|
|
|
|
|
|
| 2526 |
)
|
| 2527 |
-
|
| 2528 |
-
with gr.Accordion(label=i18n("Step 2: Extracting features")):
|
| 2529 |
with gr.Row():
|
| 2530 |
-
with gr.
|
| 2531 |
-
|
| 2532 |
-
|
| 2533 |
-
|
| 2534 |
-
|
| 2535 |
-
|
| 2536 |
-
|
| 2537 |
-
|
| 2538 |
-
|
| 2539 |
-
|
| 2540 |
-
|
| 2541 |
-
|
| 2542 |
-
|
| 2543 |
-
|
| 2544 |
-
|
| 2545 |
-
|
| 2546 |
-
|
| 2547 |
-
|
| 2548 |
-
|
| 2549 |
-
|
| 2550 |
-
|
| 2551 |
-
|
| 2552 |
-
|
| 2553 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2554 |
],
|
| 2555 |
-
|
| 2556 |
-
|
| 2557 |
)
|
| 2558 |
-
|
| 2559 |
-
|
| 2560 |
-
|
| 2561 |
-
|
| 2562 |
-
|
| 2563 |
-
"Hop Length (lower hop lengths take more time to infer but are more pitch accurate):"
|
| 2564 |
-
),
|
| 2565 |
-
value=64,
|
| 2566 |
-
interactive=True,
|
| 2567 |
-
)
|
| 2568 |
-
|
| 2569 |
with gr.Row():
|
| 2570 |
-
|
| 2571 |
-
|
| 2572 |
-
|
| 2573 |
-
|
| 2574 |
-
|
| 2575 |
-
interactive=False,
|
| 2576 |
-
)
|
| 2577 |
-
|
| 2578 |
-
but2.click(
|
| 2579 |
-
extract_f0_feature,
|
| 2580 |
-
[
|
| 2581 |
-
gpus6,
|
| 2582 |
-
np7,
|
| 2583 |
-
f0method8,
|
| 2584 |
-
if_f0_3,
|
| 2585 |
-
exp_dir1,
|
| 2586 |
-
version19,
|
| 2587 |
-
hop_length,
|
| 2588 |
-
],
|
| 2589 |
-
[info2],
|
| 2590 |
-
api_name="train_extract_f0_feature",
|
| 2591 |
-
)
|
| 2592 |
-
|
| 2593 |
-
with gr.Row():
|
| 2594 |
-
with gr.Accordion(label=i18n("Step 3: Model training started")):
|
| 2595 |
-
with gr.Row():
|
| 2596 |
-
save_epoch10 = gr.Slider(
|
| 2597 |
-
minimum=1,
|
| 2598 |
-
maximum=100,
|
| 2599 |
-
step=1,
|
| 2600 |
-
label=i18n("Save frequency:"),
|
| 2601 |
-
value=10,
|
| 2602 |
-
interactive=True,
|
| 2603 |
-
visible=True,
|
| 2604 |
-
)
|
| 2605 |
-
total_epoch11 = gr.Slider(
|
| 2606 |
-
minimum=1,
|
| 2607 |
-
maximum=10000,
|
| 2608 |
-
step=2,
|
| 2609 |
-
label=i18n("Training epochs:"),
|
| 2610 |
-
value=750,
|
| 2611 |
-
interactive=True,
|
| 2612 |
-
)
|
| 2613 |
-
batch_size12 = gr.Slider(
|
| 2614 |
-
minimum=1,
|
| 2615 |
-
maximum=50,
|
| 2616 |
-
step=1,
|
| 2617 |
-
label=i18n("Batch size per GPU:"),
|
| 2618 |
-
value=default_batch_size,
|
| 2619 |
-
# value=20,
|
| 2620 |
-
interactive=True,
|
| 2621 |
-
)
|
| 2622 |
-
|
| 2623 |
-
with gr.Row():
|
| 2624 |
-
if_save_latest13 = gr.Checkbox(
|
| 2625 |
-
label=i18n(
|
| 2626 |
-
"Whether to save only the latest .ckpt file to save hard drive space"
|
| 2627 |
-
),
|
| 2628 |
-
value=True,
|
| 2629 |
interactive=True,
|
| 2630 |
)
|
| 2631 |
-
|
| 2632 |
label=i18n(
|
| 2633 |
-
"
|
| 2634 |
),
|
| 2635 |
-
value=
|
| 2636 |
-
interactive=True,
|
| 2637 |
)
|
| 2638 |
-
|
|
|
|
| 2639 |
label=i18n(
|
| 2640 |
-
"
|
| 2641 |
),
|
| 2642 |
-
value=True,
|
| 2643 |
-
interactive=True,
|
| 2644 |
)
|
|
|
|
| 2645 |
with gr.Column():
|
| 2646 |
-
|
| 2647 |
-
|
| 2648 |
-
label=i18n("Load pre-trained base model G path:"),
|
| 2649 |
-
value="assets/pretrained_v2/f0G40k.pth",
|
| 2650 |
-
interactive=True,
|
| 2651 |
-
)
|
| 2652 |
-
pretrained_D15 = gr.Textbox(
|
| 2653 |
-
label=i18n("Load pre-trained base model D path:"),
|
| 2654 |
-
value="assets/pretrained_v2/f0D40k.pth",
|
| 2655 |
-
interactive=True,
|
| 2656 |
-
)
|
| 2657 |
-
with gr.Row():
|
| 2658 |
-
gpus16 = gr.Textbox(
|
| 2659 |
-
label=i18n(
|
| 2660 |
-
"Provide the GPU index(es) separated by '-', like 0-1-2 for using GPUs 0, 1, and 2:"
|
| 2661 |
-
),
|
| 2662 |
-
value=gpus,
|
| 2663 |
-
interactive=True,
|
| 2664 |
-
)
|
| 2665 |
-
sr2.change(
|
| 2666 |
-
change_sr2,
|
| 2667 |
-
[sr2, if_f0_3, version19],
|
| 2668 |
-
[pretrained_G14, pretrained_D15],
|
| 2669 |
)
|
| 2670 |
-
|
| 2671 |
-
|
| 2672 |
-
|
| 2673 |
-
|
| 2674 |
-
|
| 2675 |
-
|
| 2676 |
-
|
| 2677 |
-
inputs=[if_f0_3, sr2, version19],
|
| 2678 |
-
outputs=[f0method8, pretrained_G14, pretrained_D15],
|
| 2679 |
-
)
|
| 2680 |
-
with gr.Row():
|
| 2681 |
-
butstop = gr.Button(
|
| 2682 |
-
i18n("Stop training"),
|
| 2683 |
-
variant="primary",
|
| 2684 |
visible=False,
|
| 2685 |
)
|
| 2686 |
-
|
| 2687 |
-
i18n("
|
| 2688 |
-
|
| 2689 |
-
but3.click(
|
| 2690 |
-
fn=stoptraining,
|
| 2691 |
-
inputs=[gr.Number(value=0, visible=False)],
|
| 2692 |
-
outputs=[but3, butstop],
|
| 2693 |
-
api_name="train_stop",
|
| 2694 |
-
)
|
| 2695 |
-
butstop.click(
|
| 2696 |
-
fn=stoptraining,
|
| 2697 |
-
inputs=[gr.Number(value=1, visible=False)],
|
| 2698 |
-
outputs=[but3, butstop],
|
| 2699 |
)
|
| 2700 |
-
|
| 2701 |
-
label=i18n("
|
| 2702 |
-
value="",
|
| 2703 |
-
lines=4,
|
| 2704 |
-
max_lines=4,
|
| 2705 |
)
|
| 2706 |
-
|
| 2707 |
-
|
| 2708 |
-
|
| 2709 |
-
|
| 2710 |
-
|
| 2711 |
-
i18n("Save all"),
|
| 2712 |
-
i18n("Save D and G"),
|
| 2713 |
-
i18n("Save voice"),
|
| 2714 |
-
],
|
| 2715 |
-
value=i18n("Choose the method"),
|
| 2716 |
-
interactive=True,
|
| 2717 |
-
)
|
| 2718 |
-
but4 = gr.Button(
|
| 2719 |
-
i18n("Train feature index"), variant="primary"
|
| 2720 |
-
)
|
| 2721 |
-
|
| 2722 |
-
but7 = gr.Button(i18n("Save model"), variant="primary")
|
| 2723 |
-
|
| 2724 |
-
if_save_every_weights18.change(
|
| 2725 |
-
fn=lambda if_save_every_weights: (
|
| 2726 |
-
{
|
| 2727 |
-
"visible": if_save_every_weights,
|
| 2728 |
-
"__type__": "update",
|
| 2729 |
-
}
|
| 2730 |
-
),
|
| 2731 |
-
inputs=[if_save_every_weights18],
|
| 2732 |
-
outputs=[save_epoch10],
|
| 2733 |
)
|
| 2734 |
-
|
| 2735 |
-
|
| 2736 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2737 |
[
|
| 2738 |
-
|
| 2739 |
-
|
| 2740 |
-
|
| 2741 |
-
|
| 2742 |
-
|
| 2743 |
-
|
| 2744 |
-
|
| 2745 |
-
|
| 2746 |
-
pretrained_G14,
|
| 2747 |
-
pretrained_D15,
|
| 2748 |
-
gpus16,
|
| 2749 |
-
if_cache_gpu17,
|
| 2750 |
-
if_save_every_weights18,
|
| 2751 |
-
version19,
|
| 2752 |
],
|
| 2753 |
-
[
|
| 2754 |
-
api_name="
|
| 2755 |
-
)
|
| 2756 |
-
|
| 2757 |
-
but4.click(train_index, [exp_dir1, version19], info3)
|
| 2758 |
-
but7.click(resources.save_model, [exp_dir1, save_action], info3)
|
| 2759 |
-
|
| 2760 |
-
with gr.TabItem(i18n("UVR5")): # UVR section
|
| 2761 |
-
with gr.Row():
|
| 2762 |
-
with gr.Column():
|
| 2763 |
-
model_select = gr.Radio(
|
| 2764 |
-
label=i18n("Model Architecture:"),
|
| 2765 |
-
choices=["VR", "MDX", "Demucs (Beta)"],
|
| 2766 |
-
value="VR",
|
| 2767 |
-
interactive=True,
|
| 2768 |
-
)
|
| 2769 |
-
dir_wav_input = gr.Textbox(
|
| 2770 |
-
label=i18n(
|
| 2771 |
-
"Enter the path of the audio folder to be processed:"
|
| 2772 |
-
),
|
| 2773 |
-
value=os.path.join(now_dir, "assets", "audios"),
|
| 2774 |
-
)
|
| 2775 |
-
wav_inputs = gr.File(
|
| 2776 |
-
file_count="multiple",
|
| 2777 |
-
label=i18n(
|
| 2778 |
-
"You can also input audio files in batches. Choose one of the two options. Priority is given to reading from the folder."
|
| 2779 |
-
),
|
| 2780 |
-
)
|
| 2781 |
-
|
| 2782 |
-
with gr.Column():
|
| 2783 |
-
model_choose = gr.Dropdown(
|
| 2784 |
-
label=i18n("Model:"), choices=uvr5_names
|
| 2785 |
)
|
| 2786 |
-
agg = gr.Slider(
|
| 2787 |
-
minimum=0,
|
| 2788 |
-
maximum=20,
|
| 2789 |
-
step=1,
|
| 2790 |
-
label="Vocal Extraction Aggressive",
|
| 2791 |
-
value=10,
|
| 2792 |
-
interactive=True,
|
| 2793 |
-
visible=False,
|
| 2794 |
-
)
|
| 2795 |
-
opt_vocal_root = gr.Textbox(
|
| 2796 |
-
label=i18n("Specify the output folder for vocals:"),
|
| 2797 |
-
value="assets/audios",
|
| 2798 |
-
)
|
| 2799 |
-
opt_ins_root = gr.Textbox(
|
| 2800 |
-
label=i18n("Specify the output folder for accompaniment:"),
|
| 2801 |
-
value="assets/audios/audio-others",
|
| 2802 |
-
)
|
| 2803 |
-
format0 = gr.Radio(
|
| 2804 |
-
label=i18n("Export file format:"),
|
| 2805 |
-
choices=["wav", "flac", "mp3", "m4a"],
|
| 2806 |
-
value="flac",
|
| 2807 |
-
interactive=True,
|
| 2808 |
-
)
|
| 2809 |
-
model_select.change(
|
| 2810 |
-
fn=update_model_choices,
|
| 2811 |
-
inputs=model_select,
|
| 2812 |
-
outputs=model_choose,
|
| 2813 |
-
)
|
| 2814 |
-
but2 = gr.Button(i18n("Convert"), variant="primary")
|
| 2815 |
-
vc_output4 = gr.Textbox(label=i18n("Output information:"))
|
| 2816 |
-
# wav_inputs.upload(fn=save_to_wav2_edited, inputs=[wav_inputs], outputs=[])
|
| 2817 |
-
but2.click(
|
| 2818 |
-
uvr,
|
| 2819 |
-
[
|
| 2820 |
-
model_choose,
|
| 2821 |
-
dir_wav_input,
|
| 2822 |
-
opt_vocal_root,
|
| 2823 |
-
wav_inputs,
|
| 2824 |
-
opt_ins_root,
|
| 2825 |
-
agg,
|
| 2826 |
-
format0,
|
| 2827 |
-
model_select,
|
| 2828 |
-
],
|
| 2829 |
-
[vc_output4],
|
| 2830 |
-
api_name="uvr_convert",
|
| 2831 |
-
)
|
| 2832 |
with gr.TabItem(i18n("TTS")):
|
| 2833 |
with gr.Column():
|
| 2834 |
text_test = gr.Textbox(
|
|
|
|
| 69 |
import csv
|
| 70 |
from shlex import quote as SQuote
|
| 71 |
|
| 72 |
+
import torch
|
| 73 |
+
cpu_flag = torch.cuda.is_available()
|
| 74 |
+
|
| 75 |
logger = logging.getLogger(__name__)
|
| 76 |
|
| 77 |
RQuote = lambda val: SQuote(str(val))
|
|
|
|
| 2454 |
outputs=[advanced_settings_batch],
|
| 2455 |
)
|
| 2456 |
|
|
|
|
| 2457 |
|
| 2458 |
+
with gr.Tabs(visible=cpu_flag) as tabs:
|
| 2459 |
+
with gr.TabItem(i18n("Train"), visible=False):
|
| 2460 |
+
|
| 2461 |
+
with gr.Accordion(label=i18n("Step 1: Processing data")):
|
| 2462 |
+
with gr.Row():
|
| 2463 |
+
with gr.Column():
|
| 2464 |
+
exp_dir1 = gr.Textbox(
|
| 2465 |
+
label=i18n("Enter the model name:"),
|
| 2466 |
+
value=i18n("Model_Name"),
|
| 2467 |
+
)
|
| 2468 |
+
if_f0_3 = gr.Checkbox(
|
| 2469 |
+
label=i18n("Whether the model has pitch guidance."),
|
| 2470 |
+
value=True,
|
| 2471 |
+
interactive=True,
|
| 2472 |
+
)
|
| 2473 |
+
sr2 = gr.Radio(
|
| 2474 |
+
label=i18n("Target sample rate:"),
|
| 2475 |
+
choices=["40k", "48k", "32k"],
|
| 2476 |
+
value="40k",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2477 |
interactive=True,
|
| 2478 |
)
|
| 2479 |
+
version19 = gr.Radio(
|
| 2480 |
+
label=i18n("Version:"),
|
| 2481 |
+
choices=["v1", "v2"],
|
| 2482 |
+
value="v2",
|
|
|
|
|
|
|
| 2483 |
interactive=True,
|
| 2484 |
+
visible=True,
|
| 2485 |
)
|
| 2486 |
+
|
| 2487 |
+
with gr.Column():
|
| 2488 |
+
np7 = gr.Slider(
|
| 2489 |
+
minimum=1,
|
| 2490 |
+
maximum=config.n_cpu,
|
| 2491 |
+
step=1,
|
| 2492 |
+
label=i18n("Number of CPU processes:"),
|
| 2493 |
+
value=config.n_cpu,
|
| 2494 |
+
interactive=True,
|
| 2495 |
+
)
|
| 2496 |
+
spk_id5 = gr.Slider(
|
| 2497 |
+
minimum=0,
|
| 2498 |
+
maximum=4,
|
| 2499 |
+
step=1,
|
| 2500 |
+
label=i18n("Specify the model ID:"),
|
| 2501 |
+
value=0,
|
| 2502 |
+
interactive=True,
|
| 2503 |
+
)
|
| 2504 |
+
|
| 2505 |
+
with gr.Row():
|
| 2506 |
+
with gr.Column():
|
| 2507 |
+
trainset_dir4 = gr.Dropdown(
|
| 2508 |
+
choices=sorted(datasets),
|
| 2509 |
+
label=i18n("Select your dataset:"),
|
| 2510 |
+
value=get_dataset(),
|
| 2511 |
+
)
|
| 2512 |
+
|
| 2513 |
+
dataset_path = gr.Textbox(
|
| 2514 |
+
label=i18n("Or add your dataset path:"),
|
| 2515 |
+
interactive=True,
|
| 2516 |
+
)
|
| 2517 |
+
btn_update_dataset_list = gr.Button(
|
| 2518 |
+
i18n("Update list"), variant="primary"
|
| 2519 |
+
)
|
| 2520 |
+
|
| 2521 |
+
btn_update_dataset_list.click(
|
| 2522 |
+
resources.update_dataset_list, [spk_id5], trainset_dir4
|
| 2523 |
)
|
| 2524 |
+
but1 = gr.Button(i18n("Process data"), variant="primary")
|
| 2525 |
+
info1 = gr.Textbox(label=i18n("Output information:"), value="")
|
| 2526 |
+
but1.click(
|
| 2527 |
+
preprocess_dataset,
|
| 2528 |
+
[trainset_dir4, exp_dir1, sr2, np7, dataset_path],
|
| 2529 |
+
[info1],
|
| 2530 |
+
api_name="train_preprocess",
|
| 2531 |
)
|
| 2532 |
+
|
| 2533 |
+
with gr.Accordion(label=i18n("Step 2: Extracting features")):
|
| 2534 |
+
with gr.Row():
|
| 2535 |
+
with gr.Column():
|
| 2536 |
+
gpus6 = gr.Textbox(
|
| 2537 |
+
label=i18n(
|
| 2538 |
+
"Provide the GPU index(es) separated by '-', like 0-1-2 for using GPUs 0, 1, and 2:"
|
| 2539 |
+
),
|
| 2540 |
+
value=gpus,
|
| 2541 |
+
interactive=True,
|
| 2542 |
+
)
|
| 2543 |
+
gpu_info9 = gr.Textbox(
|
| 2544 |
+
label=i18n("GPU Information:"),
|
| 2545 |
+
value=gpu_info,
|
| 2546 |
+
visible=F0GPUVisible,
|
| 2547 |
+
)
|
| 2548 |
+
with gr.Column():
|
| 2549 |
+
f0method8 = gr.Radio(
|
| 2550 |
+
label=i18n("Select the pitch extraction algorithm:"),
|
| 2551 |
+
choices=[
|
| 2552 |
+
"pm",
|
| 2553 |
+
"harvest",
|
| 2554 |
+
"dio",
|
| 2555 |
+
"crepe",
|
| 2556 |
+
"mangio-crepe",
|
| 2557 |
+
"rmvpe",
|
| 2558 |
+
"rmvpe_gpu",
|
| 2559 |
+
],
|
| 2560 |
+
value="rmvpe",
|
| 2561 |
+
interactive=True,
|
| 2562 |
+
)
|
| 2563 |
+
hop_length = gr.Slider(
|
| 2564 |
+
minimum=1,
|
| 2565 |
+
maximum=512,
|
| 2566 |
+
step=1,
|
| 2567 |
+
label=i18n(
|
| 2568 |
+
"Hop Length (lower hop lengths take more time to infer but are more pitch accurate):"
|
| 2569 |
+
),
|
| 2570 |
+
value=64,
|
| 2571 |
+
interactive=True,
|
| 2572 |
+
)
|
| 2573 |
+
|
| 2574 |
+
with gr.Row():
|
| 2575 |
+
but2 = gr.Button(i18n("Feature extraction"), variant="primary")
|
| 2576 |
+
info2 = gr.Textbox(
|
| 2577 |
+
label=i18n("Output information:"),
|
| 2578 |
+
value="",
|
| 2579 |
+
max_lines=8,
|
| 2580 |
+
interactive=False,
|
| 2581 |
)
|
| 2582 |
+
|
| 2583 |
+
but2.click(
|
| 2584 |
+
extract_f0_feature,
|
| 2585 |
+
[
|
| 2586 |
+
gpus6,
|
| 2587 |
+
np7,
|
| 2588 |
+
f0method8,
|
| 2589 |
+
if_f0_3,
|
| 2590 |
+
exp_dir1,
|
| 2591 |
+
version19,
|
| 2592 |
+
hop_length,
|
| 2593 |
+
],
|
| 2594 |
+
[info2],
|
| 2595 |
+
api_name="train_extract_f0_feature",
|
| 2596 |
)
|
| 2597 |
+
|
|
|
|
| 2598 |
with gr.Row():
|
| 2599 |
+
with gr.Accordion(label=i18n("Step 3: Model training started")):
|
| 2600 |
+
with gr.Row():
|
| 2601 |
+
save_epoch10 = gr.Slider(
|
| 2602 |
+
minimum=1,
|
| 2603 |
+
maximum=100,
|
| 2604 |
+
step=1,
|
| 2605 |
+
label=i18n("Save frequency:"),
|
| 2606 |
+
value=10,
|
| 2607 |
+
interactive=True,
|
| 2608 |
+
visible=True,
|
| 2609 |
+
)
|
| 2610 |
+
total_epoch11 = gr.Slider(
|
| 2611 |
+
minimum=1,
|
| 2612 |
+
maximum=10000,
|
| 2613 |
+
step=2,
|
| 2614 |
+
label=i18n("Training epochs:"),
|
| 2615 |
+
value=750,
|
| 2616 |
+
interactive=True,
|
| 2617 |
+
)
|
| 2618 |
+
batch_size12 = gr.Slider(
|
| 2619 |
+
minimum=1,
|
| 2620 |
+
maximum=50,
|
| 2621 |
+
step=1,
|
| 2622 |
+
label=i18n("Batch size per GPU:"),
|
| 2623 |
+
value=default_batch_size,
|
| 2624 |
+
# value=20,
|
| 2625 |
+
interactive=True,
|
| 2626 |
+
)
|
| 2627 |
+
|
| 2628 |
+
with gr.Row():
|
| 2629 |
+
if_save_latest13 = gr.Checkbox(
|
| 2630 |
+
label=i18n(
|
| 2631 |
+
"Whether to save only the latest .ckpt file to save hard drive space"
|
| 2632 |
+
),
|
| 2633 |
+
value=True,
|
| 2634 |
+
interactive=True,
|
| 2635 |
+
)
|
| 2636 |
+
if_cache_gpu17 = gr.Checkbox(
|
| 2637 |
+
label=i18n(
|
| 2638 |
+
"Cache all training sets to GPU memory. Caching small datasets (less than 10 minutes) can speed up training"
|
| 2639 |
+
),
|
| 2640 |
+
value=False,
|
| 2641 |
+
interactive=True,
|
| 2642 |
+
)
|
| 2643 |
+
if_save_every_weights18 = gr.Checkbox(
|
| 2644 |
+
label=i18n(
|
| 2645 |
+
"Save a small final model to the 'weights' folder at each save point"
|
| 2646 |
+
),
|
| 2647 |
+
value=True,
|
| 2648 |
+
interactive=True,
|
| 2649 |
+
)
|
| 2650 |
+
with gr.Column():
|
| 2651 |
+
with gr.Row():
|
| 2652 |
+
pretrained_G14 = gr.Textbox(
|
| 2653 |
+
label=i18n("Load pre-trained base model G path:"),
|
| 2654 |
+
value="assets/pretrained_v2/f0G40k.pth",
|
| 2655 |
+
interactive=True,
|
| 2656 |
+
)
|
| 2657 |
+
pretrained_D15 = gr.Textbox(
|
| 2658 |
+
label=i18n("Load pre-trained base model D path:"),
|
| 2659 |
+
value="assets/pretrained_v2/f0D40k.pth",
|
| 2660 |
+
interactive=True,
|
| 2661 |
+
)
|
| 2662 |
+
with gr.Row():
|
| 2663 |
+
gpus16 = gr.Textbox(
|
| 2664 |
+
label=i18n(
|
| 2665 |
+
"Provide the GPU index(es) separated by '-', like 0-1-2 for using GPUs 0, 1, and 2:"
|
| 2666 |
+
),
|
| 2667 |
+
value=gpus,
|
| 2668 |
+
interactive=True,
|
| 2669 |
+
)
|
| 2670 |
+
sr2.change(
|
| 2671 |
+
change_sr2,
|
| 2672 |
+
[sr2, if_f0_3, version19],
|
| 2673 |
+
[pretrained_G14, pretrained_D15],
|
| 2674 |
+
)
|
| 2675 |
+
version19.change(
|
| 2676 |
+
change_version19,
|
| 2677 |
+
[sr2, if_f0_3, version19],
|
| 2678 |
+
[pretrained_G14, pretrained_D15, sr2],
|
| 2679 |
+
)
|
| 2680 |
+
if_f0_3.change(
|
| 2681 |
+
fn=change_f0,
|
| 2682 |
+
inputs=[if_f0_3, sr2, version19],
|
| 2683 |
+
outputs=[f0method8, pretrained_G14, pretrained_D15],
|
| 2684 |
+
)
|
| 2685 |
+
with gr.Row():
|
| 2686 |
+
butstop = gr.Button(
|
| 2687 |
+
i18n("Stop training"),
|
| 2688 |
+
variant="primary",
|
| 2689 |
+
visible=False,
|
| 2690 |
+
)
|
| 2691 |
+
but3 = gr.Button(
|
| 2692 |
+
i18n("Train model"), variant="primary", visible=True
|
| 2693 |
+
)
|
| 2694 |
+
but3.click(
|
| 2695 |
+
fn=stoptraining,
|
| 2696 |
+
inputs=[gr.Number(value=0, visible=False)],
|
| 2697 |
+
outputs=[but3, butstop],
|
| 2698 |
+
api_name="train_stop",
|
| 2699 |
+
)
|
| 2700 |
+
butstop.click(
|
| 2701 |
+
fn=stoptraining,
|
| 2702 |
+
inputs=[gr.Number(value=1, visible=False)],
|
| 2703 |
+
outputs=[but3, butstop],
|
| 2704 |
+
)
|
| 2705 |
+
info3 = gr.Textbox(
|
| 2706 |
+
label=i18n("Output information:"),
|
| 2707 |
+
value="",
|
| 2708 |
+
lines=4,
|
| 2709 |
+
max_lines=4,
|
| 2710 |
+
)
|
| 2711 |
+
|
| 2712 |
+
with gr.Column():
|
| 2713 |
+
save_action = gr.Dropdown(
|
| 2714 |
+
label=i18n("Save type"),
|
| 2715 |
+
choices=[
|
| 2716 |
+
i18n("Save all"),
|
| 2717 |
+
i18n("Save D and G"),
|
| 2718 |
+
i18n("Save voice"),
|
| 2719 |
+
],
|
| 2720 |
+
value=i18n("Choose the method"),
|
| 2721 |
+
interactive=True,
|
| 2722 |
+
)
|
| 2723 |
+
but4 = gr.Button(
|
| 2724 |
+
i18n("Train feature index"), variant="primary"
|
| 2725 |
+
)
|
| 2726 |
+
|
| 2727 |
+
but7 = gr.Button(i18n("Save model"), variant="primary")
|
| 2728 |
+
|
| 2729 |
+
if_save_every_weights18.change(
|
| 2730 |
+
fn=lambda if_save_every_weights: (
|
| 2731 |
+
{
|
| 2732 |
+
"visible": if_save_every_weights,
|
| 2733 |
+
"__type__": "update",
|
| 2734 |
+
}
|
| 2735 |
+
),
|
| 2736 |
+
inputs=[if_save_every_weights18],
|
| 2737 |
+
outputs=[save_epoch10],
|
| 2738 |
+
)
|
| 2739 |
+
|
| 2740 |
+
but3.click(
|
| 2741 |
+
click_train,
|
| 2742 |
+
[
|
| 2743 |
+
exp_dir1,
|
| 2744 |
+
sr2,
|
| 2745 |
+
if_f0_3,
|
| 2746 |
+
spk_id5,
|
| 2747 |
+
save_epoch10,
|
| 2748 |
+
total_epoch11,
|
| 2749 |
+
batch_size12,
|
| 2750 |
+
if_save_latest13,
|
| 2751 |
+
pretrained_G14,
|
| 2752 |
+
pretrained_D15,
|
| 2753 |
+
gpus16,
|
| 2754 |
+
if_cache_gpu17,
|
| 2755 |
+
if_save_every_weights18,
|
| 2756 |
+
version19,
|
| 2757 |
],
|
| 2758 |
+
[info3, butstop, but3],
|
| 2759 |
+
api_name="train_start",
|
| 2760 |
)
|
| 2761 |
+
|
| 2762 |
+
but4.click(train_index, [exp_dir1, version19], info3)
|
| 2763 |
+
but7.click(resources.save_model, [exp_dir1, save_action], info3)
|
| 2764 |
+
|
| 2765 |
+
with gr.TabItem(i18n("UVR5")): # UVR section
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2766 |
with gr.Row():
|
| 2767 |
+
with gr.Column():
|
| 2768 |
+
model_select = gr.Radio(
|
| 2769 |
+
label=i18n("Model Architecture:"),
|
| 2770 |
+
choices=["VR", "MDX", "Demucs (Beta)"],
|
| 2771 |
+
value="VR",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2772 |
interactive=True,
|
| 2773 |
)
|
| 2774 |
+
dir_wav_input = gr.Textbox(
|
| 2775 |
label=i18n(
|
| 2776 |
+
"Enter the path of the audio folder to be processed:"
|
| 2777 |
),
|
| 2778 |
+
value=os.path.join(now_dir, "assets", "audios"),
|
|
|
|
| 2779 |
)
|
| 2780 |
+
wav_inputs = gr.File(
|
| 2781 |
+
file_count="multiple",
|
| 2782 |
label=i18n(
|
| 2783 |
+
"You can also input audio files in batches. Choose one of the two options. Priority is given to reading from the folder."
|
| 2784 |
),
|
|
|
|
|
|
|
| 2785 |
)
|
| 2786 |
+
|
| 2787 |
with gr.Column():
|
| 2788 |
+
model_choose = gr.Dropdown(
|
| 2789 |
+
label=i18n("Model:"), choices=uvr5_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2790 |
)
|
| 2791 |
+
agg = gr.Slider(
|
| 2792 |
+
minimum=0,
|
| 2793 |
+
maximum=20,
|
| 2794 |
+
step=1,
|
| 2795 |
+
label="Vocal Extraction Aggressive",
|
| 2796 |
+
value=10,
|
| 2797 |
+
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2798 |
visible=False,
|
| 2799 |
)
|
| 2800 |
+
opt_vocal_root = gr.Textbox(
|
| 2801 |
+
label=i18n("Specify the output folder for vocals:"),
|
| 2802 |
+
value="assets/audios",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2803 |
)
|
| 2804 |
+
opt_ins_root = gr.Textbox(
|
| 2805 |
+
label=i18n("Specify the output folder for accompaniment:"),
|
| 2806 |
+
value="assets/audios/audio-others",
|
|
|
|
|
|
|
| 2807 |
)
|
| 2808 |
+
format0 = gr.Radio(
|
| 2809 |
+
label=i18n("Export file format:"),
|
| 2810 |
+
choices=["wav", "flac", "mp3", "m4a"],
|
| 2811 |
+
value="flac",
|
| 2812 |
+
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2813 |
)
|
| 2814 |
+
model_select.change(
|
| 2815 |
+
fn=update_model_choices,
|
| 2816 |
+
inputs=model_select,
|
| 2817 |
+
outputs=model_choose,
|
| 2818 |
+
)
|
| 2819 |
+
but2 = gr.Button(i18n("Convert"), variant="primary")
|
| 2820 |
+
vc_output4 = gr.Textbox(label=i18n("Output information:"))
|
| 2821 |
+
# wav_inputs.upload(fn=save_to_wav2_edited, inputs=[wav_inputs], outputs=[])
|
| 2822 |
+
but2.click(
|
| 2823 |
+
uvr,
|
| 2824 |
[
|
| 2825 |
+
model_choose,
|
| 2826 |
+
dir_wav_input,
|
| 2827 |
+
opt_vocal_root,
|
| 2828 |
+
wav_inputs,
|
| 2829 |
+
opt_ins_root,
|
| 2830 |
+
agg,
|
| 2831 |
+
format0,
|
| 2832 |
+
model_select,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2833 |
],
|
| 2834 |
+
[vc_output4],
|
| 2835 |
+
api_name="uvr_convert",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2836 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2837 |
with gr.TabItem(i18n("TTS")):
|
| 2838 |
with gr.Column():
|
| 2839 |
text_test = gr.Textbox(
|