gradio>=3.0 transformers>=4.30 torch>=1.13 # CPU-only is fine; if Spaces gives GPU, it uses it automatically soundfile librosa ffmpeg-python