| { | |
| "crop_size": { | |
| "height": 384, | |
| "width": 384 | |
| }, | |
| "data_format": "channels_first", | |
| "image_mean": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "image_processor_type": "SigLipImageProcessor", | |
| "image_std": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "processor_class": "Qwen2VLProcessor", | |
| "resample": 3, | |
| "rescale_factor": 0.00392156862745098, | |
| "size": [ | |
| 384, | |
| 384 | |
| ], | |
| "chunk_length": 30, | |
| "feature_extractor_type": "WhisperFeatureExtractor", | |
| "feature_size": 128, | |
| "hop_length": 160, | |
| "n_fft": 400, | |
| "n_samples": 480000, | |
| "nb_max_frames": 3000, | |
| "padding_side": "right", | |
| "padding_value": 0.0, | |
| "return_attention_mask": false, | |
| "sampling_rate": 16000 | |
| } | |