# Core dependencies flask>=3.0.0 flask-cors>=4.0.0 gradio>=4.0.0 huggingface-hub==0.24.0 # Required by transformers 4.47.1 numpy>=1.24.0 scipy>=1.10.0 librosa>=0.10.0 soundfile>=0.12.0 pydantic>=2.0.0 python-dotenv>=1.0.0 pyyaml>=6.0 requests>=2.31.0 # PyTorch - CPU mode for compatibility # Note: DiffRhythm2 requires torch>=2.4 which is incompatible with torch-directml # Using CPU mode to avoid version conflicts. For GPU acceleration, use NVIDIA CUDA. torch==2.4.0 torchaudio==2.4.0 # DiffRhythm 2 core dependencies torchdiffeq>=0.2.4 # Required for CFM (flow matching) phonemizer>=3.2.0 muq>=0.1.0 # MuQ-MuLan style encoder for music generation jieba>=0.42.0 # Chinese text segmentation pypinyin>=0.50.0 # Chinese to pinyin conversion cn2an>=0.5.0 # Chinese number to text onnxruntime>=1.15.0 # For g2p Chinese model pykakasi>=2.3.0 # Japanese text processing pyopenjtalk; python_version < "3.12" # Japanese phonetics (Python 3.11 compatible) unidecode>=1.3.0 # Text normalization py3langid>=0.2.2 # Language detection inflect>=7.0.0 # English text normalization (required by g2p) # AI Model dependencies transformers==4.47.1 # Pinned for DiffRhythm2 compatibility diffusers>=0.21.0 sentencepiece>=0.1.99 protobuf>=3.20.0 accelerate==0.33.0 einops==0.8.0 omegaconf==2.3.0 # Audio processing pedalboard==0.9.9 pydub==0.25.1 resampy==0.4.3 mutagen==1.47.0 # Audio quality enhancement demucs==4.0.1 # Stem separation # MSD Integration (v1.0.2) h5py==3.10.0 # For reading MSD HDF5 files tqdm==4.66.5 # Progress bars for data import noisereduce==3.0.2 # Noise reduction # LoRA Training dependencies peft==0.11.0 # Parameter-Efficient Fine-Tuning (LoRA adapters) datasets==2.19.0 # HuggingFace datasets for training data management tensorboard==2.16.2 # Training monitoring and visualization wandb==0.17.0 # Optional: Advanced experiment tracking # Utilities safetensors==0.4.3 gitpython==3.1.43