| { | |
| "architectures": [ | |
| "SpeechT5HifiGan" | |
| ], | |
| "initializer_range": 0.01, | |
| "leaky_relu_slope": 0.1, | |
| "model_in_dim": 64, | |
| "model_type": "hifigan", | |
| "normalize_before": false, | |
| "resblock_dilation_sizes": [ | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ] | |
| ], | |
| "resblock_kernel_sizes": [ | |
| 3, | |
| 7, | |
| 11 | |
| ], | |
| "sampling_rate": 16000, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.32.0.dev0", | |
| "upsample_initial_channel": 1024, | |
| "upsample_kernel_sizes": [ | |
| 16, | |
| 16, | |
| 8, | |
| 4, | |
| 4 | |
| ], | |
| "upsample_rates": [ | |
| 5, | |
| 4, | |
| 2, | |
| 2, | |
| 2 | |
| ] | |
| } | |