File size: 1,422 Bytes
2bbfbb7 0393dfa 2bbfbb7 0393dfa 2bbfbb7 0393dfa 2bbfbb7 0393dfa 2bbfbb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
//! Audio processing module for IndexTTS
//!
//! Provides mel-spectrogram computation, audio I/O, and DSP operations.
mod dsp;
mod io;
pub mod mel;
mod resample;
pub use dsp::{
apply_fade, apply_preemphasis, dynamic_range_compression, dynamic_range_decompression,
normalize_audio, normalize_audio_peak,
};
pub use io::{load_audio, save_audio, AudioData};
pub use mel::{mel_spectrogram, mel_to_linear, MelFilterbank};
pub use resample::resample;
use crate::Result;
/// Audio processing configuration
#[derive(Debug, Clone)]
pub struct AudioConfig {
/// Sample rate
pub sample_rate: u32,
/// FFT size
pub n_fft: usize,
/// Hop length for STFT
pub hop_length: usize,
/// Window length
pub win_length: usize,
/// Number of mel bands
pub n_mels: usize,
/// Minimum frequency
pub fmin: f32,
/// Maximum frequency
pub fmax: f32,
}
impl Default for AudioConfig {
fn default() -> Self {
Self {
sample_rate: 22050,
n_fft: 1024,
hop_length: 256,
win_length: 1024,
n_mels: 80,
fmin: 0.0,
fmax: 8000.0,
}
}
}
/// Compute mel spectrogram from audio file
pub fn compute_mel_from_file(path: &str, config: &AudioConfig) -> Result<ndarray::Array2<f32>> {
let audio = load_audio(path, Some(config.sample_rate))?;
mel_spectrogram(&audio.samples, config)
}
|