| { | |
| "architectures": [ | |
| "VibeVoiceDiffusionHead" | |
| ], | |
| "ddpm_batch_mul": 4, | |
| "ddpm_beta_schedule": "cosine", | |
| "ddpm_num_inference_steps": 20, | |
| "ddpm_num_steps": 1000, | |
| "diffusion_type": "ddpm", | |
| "head_ffn_ratio": 3.0, | |
| "head_layers": 4, | |
| "hidden_size": 3584, | |
| "latent_size": 64, | |
| "model_type": "vibevoice_diffusion_head", | |
| "prediction_type": "v_prediction", | |
| "rms_norm_eps": 1e-05, | |
| "speech_vae_dim": 64, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.51.3" | |
| } | |