Upload folder using huggingface_hub
Browse files- .gitattributes +3 -33
- README.md +12 -0
- chinese-hubert-base/config.json +72 -0
- chinese-hubert-base/model.safetensors +3 -0
- chinese-roberta-wwm-ext-large/config.json +34 -0
- chinese-roberta-wwm-ext-large/pytorch_model.bin +3 -0
- chinese-roberta-wwm-ext-large/tokenizer.json +0 -0
- dict/cmudict.msgpack +3 -0
- dict/ja.dict +3 -0
- dict/opencpop-strict.msgpack +3 -0
- dict/polyphonic.msgpack +3 -0
- fast_langdetect/lid.176.bin +3 -0
- g2pw_model/bopomofo_to_pinyin_wo_tune_dict.msgpack +3 -0
- g2pw_model/char2phonemes.msgpack +3 -0
- g2pw_model/char_bopomofo_dict.msgpack +3 -0
- g2pw_model/labels.msgpack +3 -0
- g2pw_model/model.onnx +3 -0
- g2pw_model/monophonic_chars_dict.msgpack +3 -0
- gsv/config.json +38 -0
- gsv/model.safetensors +3 -0
- sv/model.safetensors +3 -0
- v2pro/v2pro.json +56 -0
- v2pro/v2pro.safetensors +3 -0
- v2pro/v2proplus.json +56 -0
- v2pro/v2proplus.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,5 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
|
| 27 |
-
*.
|
| 28 |
-
*.
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
| 1 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.dict filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
language:
|
| 4 |
+
- ja
|
| 5 |
+
- zh
|
| 6 |
+
- ko
|
| 7 |
+
- en
|
| 8 |
+
base_model:
|
| 9 |
+
- lj1995/GPT-SoVITS
|
| 10 |
+
pipeline_tag: text-to-speech
|
| 11 |
+
---
|
| 12 |
+
Look at: https://github.com/neodyland/gpt-sovits-v2-pro-simple
|
chinese-hubert-base/config.json
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "chinese-hubert-base",
|
| 3 |
+
"activation_dropout": 0.1,
|
| 4 |
+
"apply_spec_augment": true,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"HubertModel"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.1,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"classifier_proj_size": 256,
|
| 11 |
+
"conv_bias": false,
|
| 12 |
+
"conv_dim": [
|
| 13 |
+
512,
|
| 14 |
+
512,
|
| 15 |
+
512,
|
| 16 |
+
512,
|
| 17 |
+
512,
|
| 18 |
+
512,
|
| 19 |
+
512
|
| 20 |
+
],
|
| 21 |
+
"conv_kernel": [
|
| 22 |
+
10,
|
| 23 |
+
3,
|
| 24 |
+
3,
|
| 25 |
+
3,
|
| 26 |
+
3,
|
| 27 |
+
2,
|
| 28 |
+
2
|
| 29 |
+
],
|
| 30 |
+
"conv_stride": [
|
| 31 |
+
5,
|
| 32 |
+
2,
|
| 33 |
+
2,
|
| 34 |
+
2,
|
| 35 |
+
2,
|
| 36 |
+
2,
|
| 37 |
+
2
|
| 38 |
+
],
|
| 39 |
+
"ctc_loss_reduction": "sum",
|
| 40 |
+
"ctc_zero_infinity": false,
|
| 41 |
+
"do_stable_layer_norm": false,
|
| 42 |
+
"eos_token_id": 2,
|
| 43 |
+
"feat_extract_activation": "gelu",
|
| 44 |
+
"feat_extract_norm": "group",
|
| 45 |
+
"feat_proj_dropout": 0.0,
|
| 46 |
+
"feat_proj_layer_norm": true,
|
| 47 |
+
"final_dropout": 0.1,
|
| 48 |
+
"hidden_act": "gelu",
|
| 49 |
+
"hidden_dropout": 0.1,
|
| 50 |
+
"hidden_size": 768,
|
| 51 |
+
"initializer_range": 0.02,
|
| 52 |
+
"intermediate_size": 3072,
|
| 53 |
+
"layer_norm_eps": 1e-05,
|
| 54 |
+
"layerdrop": 0.1,
|
| 55 |
+
"mask_feature_length": 10,
|
| 56 |
+
"mask_feature_min_masks": 0,
|
| 57 |
+
"mask_feature_prob": 0.0,
|
| 58 |
+
"mask_time_length": 10,
|
| 59 |
+
"mask_time_min_masks": 2,
|
| 60 |
+
"mask_time_prob": 0.05,
|
| 61 |
+
"model_type": "hubert",
|
| 62 |
+
"num_attention_heads": 12,
|
| 63 |
+
"num_conv_pos_embedding_groups": 16,
|
| 64 |
+
"num_conv_pos_embeddings": 128,
|
| 65 |
+
"num_feat_extract_layers": 7,
|
| 66 |
+
"num_hidden_layers": 12,
|
| 67 |
+
"pad_token_id": 0,
|
| 68 |
+
"torch_dtype": "float16",
|
| 69 |
+
"transformers_version": "4.30.2",
|
| 70 |
+
"use_weighted_layer_sum": false,
|
| 71 |
+
"vocab_size": 32
|
| 72 |
+
}
|
chinese-hubert-base/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5f323dd2c87570864910fe48a21222a894dfb9200a2a5e243f43dde1b1b20b8
|
| 3 |
+
size 188767008
|
chinese-roberta-wwm-ext-large/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "chinese-roberta-wwm-ext-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForMaskedLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"directionality": "bidi",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 4096,
|
| 16 |
+
"layer_norm_eps": 1e-12,
|
| 17 |
+
"max_position_embeddings": 512,
|
| 18 |
+
"model_type": "bert",
|
| 19 |
+
"num_attention_heads": 16,
|
| 20 |
+
"num_hidden_layers": 24,
|
| 21 |
+
"output_past": true,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"pooler_fc_size": 768,
|
| 24 |
+
"pooler_num_attention_heads": 12,
|
| 25 |
+
"pooler_num_fc_layers": 3,
|
| 26 |
+
"pooler_size_per_head": 128,
|
| 27 |
+
"pooler_type": "first_token_transform",
|
| 28 |
+
"position_embedding_type": "absolute",
|
| 29 |
+
"torch_dtype": "float16",
|
| 30 |
+
"transformers_version": "4.30.2",
|
| 31 |
+
"type_vocab_size": 2,
|
| 32 |
+
"use_cache": true,
|
| 33 |
+
"vocab_size": 21128
|
| 34 |
+
}
|
chinese-roberta-wwm-ext-large/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e53a693acc59ace251d143d068096ae0d7b79e4b1b503fa84c9dcf576448c1d8
|
| 3 |
+
size 651225145
|
chinese-roberta-wwm-ext-large/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dict/cmudict.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6aa06b6e2ce28181623ac59f7be7a7b154b0bd23dd0b1e0db634e64196545d0
|
| 3 |
+
size 4157837
|
dict/ja.dict
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b44817ce96e24be7bcfdd009d834b5237fe044dc9ed5f2f9709f71da9d506fed
|
| 3 |
+
size 21321666
|
dict/opencpop-strict.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f469a624aa8726cd0e34eb5985f3c82ea7552550b3d07971602d2a975888021
|
| 3 |
+
size 4516
|
dict/polyphonic.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8e65b6be61bfa5e6a5c7435276a7057ad6d54e6c67bca0e8ecfb32b71c3c994
|
| 3 |
+
size 1203541
|
fast_langdetect/lid.176.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e69ec5451bc261cc7844e49e4792a85d7f09c06789ec800fc4a44aec362764e
|
| 3 |
+
size 131266198
|
g2pw_model/bopomofo_to_pinyin_wo_tune_dict.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5adf0b644d95f9f5335f4d2976f7f7ffd0874f4c9f3bb0eeeda2557a10c59632
|
| 3 |
+
size 5198
|
g2pw_model/char2phonemes.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34e96db6363efc97dcbf24cfaa0e301f358730b9b9dea8eb64ff4ea076ab4f88
|
| 3 |
+
size 39732
|
g2pw_model/char_bopomofo_dict.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:985a38e7a663cb854246abcd839b08a5c77453916781b0a67c881e05f7fc32dd
|
| 3 |
+
size 687053
|
g2pw_model/labels.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf3b8a266b2bc91b9a68c41b24aa77f602c204a6a7478d0ef7e0db4b444ff50a
|
| 3 |
+
size 11697
|
g2pw_model/model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bee7f91aaa0dbcd0d61d0bce0ede36b68952de4c6b5d4319a8c949b6d654c70
|
| 3 |
+
size 635213222
|
g2pw_model/monophonic_chars_dict.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ea810e70723937971df2d77f92aa110111426ab69d60ff3d510e875a2cdb4aa
|
| 3 |
+
size 120747
|
gsv/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"seed": 1234,
|
| 4 |
+
"epochs": 200,
|
| 5 |
+
"batch_size": 5,
|
| 6 |
+
"save_every_n_epoch": 1,
|
| 7 |
+
"precision": "16-mixed",
|
| 8 |
+
"gradient_clip": 1.0
|
| 9 |
+
},
|
| 10 |
+
"optimizer": {
|
| 11 |
+
"lr": 0.01,
|
| 12 |
+
"lr_init": 1e-05,
|
| 13 |
+
"lr_end": 0.0001,
|
| 14 |
+
"warmup_steps": 2000,
|
| 15 |
+
"decay_steps": 40000
|
| 16 |
+
},
|
| 17 |
+
"data": {
|
| 18 |
+
"max_eval_sample": 8,
|
| 19 |
+
"max_sec": 57,
|
| 20 |
+
"num_workers": 4,
|
| 21 |
+
"pad_val": 1024
|
| 22 |
+
},
|
| 23 |
+
"model": {
|
| 24 |
+
"vocab_size": 1025,
|
| 25 |
+
"phoneme_vocab_size": 732,
|
| 26 |
+
"embedding_dim": 512,
|
| 27 |
+
"hidden_dim": 512,
|
| 28 |
+
"head": 16,
|
| 29 |
+
"linear_units": 2048,
|
| 30 |
+
"n_layer": 24,
|
| 31 |
+
"dropout": 0,
|
| 32 |
+
"EOS": 1024,
|
| 33 |
+
"random_bert": 0
|
| 34 |
+
},
|
| 35 |
+
"inference": {
|
| 36 |
+
"top_k": 5
|
| 37 |
+
}
|
| 38 |
+
}
|
gsv/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af3e6a748c2cac88e637d4059a9b1a70e3d9b5f808185fb66c30818299487082
|
| 3 |
+
size 155240396
|
sv/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8a55b64a81b035db936a7508df4b7e86133738b76fa272e86fd05711630fa0c
|
| 3 |
+
size 91546698
|
v2pro/v2pro.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"data": {
|
| 3 |
+
"sampling_rate": 32000,
|
| 4 |
+
"filter_length": 2048,
|
| 5 |
+
"hop_length": 640,
|
| 6 |
+
"win_length": 2048
|
| 7 |
+
},
|
| 8 |
+
"model": {
|
| 9 |
+
"inter_channels": 192,
|
| 10 |
+
"hidden_channels": 192,
|
| 11 |
+
"filter_channels": 768,
|
| 12 |
+
"n_heads": 2,
|
| 13 |
+
"n_layers": 6,
|
| 14 |
+
"kernel_size": 3,
|
| 15 |
+
"p_dropout": 0.0,
|
| 16 |
+
"resblock": "1",
|
| 17 |
+
"resblock_kernel_sizes": [
|
| 18 |
+
3,
|
| 19 |
+
7,
|
| 20 |
+
11
|
| 21 |
+
],
|
| 22 |
+
"resblock_dilation_sizes": [
|
| 23 |
+
[
|
| 24 |
+
1,
|
| 25 |
+
3,
|
| 26 |
+
5
|
| 27 |
+
],
|
| 28 |
+
[
|
| 29 |
+
1,
|
| 30 |
+
3,
|
| 31 |
+
5
|
| 32 |
+
],
|
| 33 |
+
[
|
| 34 |
+
1,
|
| 35 |
+
3,
|
| 36 |
+
5
|
| 37 |
+
]
|
| 38 |
+
],
|
| 39 |
+
"upsample_rates": [
|
| 40 |
+
10,
|
| 41 |
+
8,
|
| 42 |
+
2,
|
| 43 |
+
2,
|
| 44 |
+
2
|
| 45 |
+
],
|
| 46 |
+
"upsample_initial_channel": 512,
|
| 47 |
+
"upsample_kernel_sizes": [
|
| 48 |
+
16,
|
| 49 |
+
16,
|
| 50 |
+
8,
|
| 51 |
+
2,
|
| 52 |
+
2
|
| 53 |
+
],
|
| 54 |
+
"gin_channels": 1024
|
| 55 |
+
}
|
| 56 |
+
}
|
v2pro/v2pro.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4e23be437cacee6846c545e533f42ea9f3ce72baa6fafb38ee21dd81e28bdd2
|
| 3 |
+
size 133220264
|
v2pro/v2proplus.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"data": {
|
| 3 |
+
"sampling_rate": 32000,
|
| 4 |
+
"filter_length": 2048,
|
| 5 |
+
"hop_length": 640,
|
| 6 |
+
"win_length": 2048
|
| 7 |
+
},
|
| 8 |
+
"model": {
|
| 9 |
+
"inter_channels": 192,
|
| 10 |
+
"hidden_channels": 192,
|
| 11 |
+
"filter_channels": 768,
|
| 12 |
+
"n_heads": 2,
|
| 13 |
+
"n_layers": 6,
|
| 14 |
+
"kernel_size": 3,
|
| 15 |
+
"p_dropout": 0.0,
|
| 16 |
+
"resblock": "1",
|
| 17 |
+
"resblock_kernel_sizes": [
|
| 18 |
+
3,
|
| 19 |
+
7,
|
| 20 |
+
11
|
| 21 |
+
],
|
| 22 |
+
"resblock_dilation_sizes": [
|
| 23 |
+
[
|
| 24 |
+
1,
|
| 25 |
+
3,
|
| 26 |
+
5
|
| 27 |
+
],
|
| 28 |
+
[
|
| 29 |
+
1,
|
| 30 |
+
3,
|
| 31 |
+
5
|
| 32 |
+
],
|
| 33 |
+
[
|
| 34 |
+
1,
|
| 35 |
+
3,
|
| 36 |
+
5
|
| 37 |
+
]
|
| 38 |
+
],
|
| 39 |
+
"upsample_rates": [
|
| 40 |
+
10,
|
| 41 |
+
8,
|
| 42 |
+
2,
|
| 43 |
+
2,
|
| 44 |
+
2
|
| 45 |
+
],
|
| 46 |
+
"upsample_initial_channel": 768,
|
| 47 |
+
"upsample_kernel_sizes": [
|
| 48 |
+
20,
|
| 49 |
+
16,
|
| 50 |
+
8,
|
| 51 |
+
2,
|
| 52 |
+
2
|
| 53 |
+
],
|
| 54 |
+
"gin_channels": 1024
|
| 55 |
+
}
|
| 56 |
+
}
|
v2pro/v2proplus.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:229b8a25376a7bf3ce2e4fc6c49c04982b4c40bb5496999782a78e76c4577707
|
| 3 |
+
size 171041248
|