MoyaDahlink's picture
Create app.py
6495706 verified
# app.py
# Minimal Gradio app to classify music genre using a Hugging Face pretrained model.
# Paste into a Hugging Face Space (Gradio) or run locally.
from transformers import pipeline
import gradio as gr
import os
import math
# Load a pretrained music genre classifier from the HF Hub.
# Model used here: ccmusic-database/music_genre (public). Might change over time.
MODEL_ID = "ccmusic-database/music_genre"
# Initialize pipeline (this may take a few seconds on first load)
classifier = pipeline("audio-classification", model=MODEL_ID)
def pretty_results(result_list, top_k=5):
# result_list is usually a list of dicts like [{'label': 'rock', 'score': 0.72}, ...]
# Return formatted string and a dict for Gradio's label or gallery output.
out_lines = []
for i, r in enumerate(result_list[:top_k]):
label = r.get("label", "unknown")
score = r.get("score", 0.0)
out_lines.append(f"{i+1}. {label} β€” {score*100:.1f}%")
return "\n".join(out_lines)
def classify_audio(audio_file):
"""
Gradio audio component usually passes a filepath (when uploading).
We pass that file path into the HF pipeline, get predictions and return them.
"""
if audio_file is None:
return "No audio provided.", None
# audio_file is typically a string file path in Spaces
try:
# Run the classifier; many HF audio models accept a filepath directly
res = classifier(audio_file, top_k=5)
except Exception as e:
return f"Model inference failed: {e}", None
text = pretty_results(res, top_k=5)
# Also return a simple dict of label->score for nicer UI (optional)
scores = {r.get("label", "unknown"): float(r.get("score", 0.0)) for r in res}
return text, scores
# Build the Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🎧 Genre Guessr β€” Upload a song, get a genre")
gr.Markdown("Free hosted: Hugging Face Spaces + pretrained model. Expect ~10 common genres (e.g. rock, pop, jazz).")
with gr.Row():
audio_in = gr.Audio(label="Upload song (MP3/WAV/OGG) or record", type="filepath")
classify_btn = gr.Button("Classify")
output_text = gr.Textbox(label="Top predictions", interactive=False)
output_scores = gr.Label(num_top_classes=5, label="Probabilities")
classify_btn.click(fn=classify_audio, inputs=audio_in, outputs=[output_text, output_scores])
gr.Markdown(
"""
**Notes:**
- This uses a public pretrained model fine-tuned on common datasets (GTZAN-style 10-genre set). Expect mistakes on short clips, remixes or genre-blends.
- If you want more genres or better accuracy, we can swap to a bigger model or fine-tune later.
"""
)
if __name__ == "__main__":
demo.launch()