| { | |
| "architectures": [ | |
| "ToneForCTC" | |
| ], | |
| "ctc_loss_reduction": "mean", | |
| "ctc_zero_infinity": true, | |
| "decoder_params": { | |
| "feat_in": 384, | |
| "vocabulary": [ | |
| "а", | |
| "б", | |
| "в", | |
| "г", | |
| "д", | |
| "е", | |
| "ё", | |
| "ж", | |
| "з", | |
| "и", | |
| "й", | |
| "к", | |
| "л", | |
| "м", | |
| "н", | |
| "о", | |
| "п", | |
| "р", | |
| "с", | |
| "т", | |
| "у", | |
| "ф", | |
| "х", | |
| "ц", | |
| "ч", | |
| "ш", | |
| "щ", | |
| "ъ", | |
| "ы", | |
| "ь", | |
| "э", | |
| "ю", | |
| "я", | |
| " " | |
| ] | |
| }, | |
| "encoder_params": { | |
| "chunk_size": 10, | |
| "conv_kernel_size": 31, | |
| "d_model": 384, | |
| "dropout": 0.1, | |
| "dropout_att": 0.1, | |
| "feat_in": 64, | |
| "ff_expansion_factor": 4, | |
| "mhsa_state_size": 30, | |
| "mhsa_stateless_layers": 14, | |
| "n_heads": 8, | |
| "n_layers": 16, | |
| "reduction_factor": 2, | |
| "reduction_kernel_size": 3, | |
| "reduction_position": 6, | |
| "rope_dim": 32, | |
| "should_recompute_att_scores": [ | |
| true, | |
| false, | |
| false, | |
| false, | |
| false, | |
| false, | |
| false, | |
| true, | |
| false, | |
| false, | |
| false, | |
| false, | |
| false, | |
| false, | |
| true, | |
| true | |
| ], | |
| "subsampling_conv_channels": [ | |
| 32, | |
| 64 | |
| ], | |
| "subsampling_kernel_size": [ | |
| [ | |
| 11, | |
| 21 | |
| ], | |
| [ | |
| 11, | |
| 11 | |
| ] | |
| ], | |
| "subsampling_strides": [ | |
| [ | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 1 | |
| ] | |
| ], | |
| "upsample_position": 14 | |
| }, | |
| "feature_extraction_params": { | |
| "n_fft": 160, | |
| "n_mels": 64, | |
| "preemphasis_coefficient": 0.97, | |
| "sample_rate": 8000, | |
| "window_size": 0.02, | |
| "window_stride": 0.01 | |
| }, | |
| "pad_token_id": 34, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.41.2" | |
| } | |