Spaces:
Build error
Build error
| import os | |
| os.system("python3 -m pip install -e .") | |
| import gradio as gr | |
| import note_seq | |
| from pytube import YouTube | |
| from pydub import AudioSegment | |
| from music21 import converter, environment | |
| from inferencemodel import InferenceModel | |
| from utils import upload_audio, create_image_from_note_sequence | |
| import nest_asyncio | |
| nest_asyncio.apply() | |
| SAMPLE_RATE = 16000 | |
| SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2" | |
| # Set up music21 with musescore | |
| us = environment.UserSettings() | |
| us["musescoreDirectPNGPath"] = "/usr/bin/mscore3" | |
| os.putenv("QT_QPA_PLATFORM", "offscreen") | |
| os.putenv("XDG_RUNTIME_DIR", environment.Environment().getRootTempDir()) | |
| def load_model(model=str): | |
| checkpoint_path = f"/home/user/app/checkpoints/{model}/" | |
| # Start inference model | |
| inference_model = InferenceModel(checkpoint_path, model) | |
| return inference_model | |
| # Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper | |
| def get_audio(url): | |
| yt = YouTube(url) | |
| video = yt.streams.filter(only_audio=True).first() | |
| out_file = video.download(output_path=".") | |
| base, ext = os.path.splitext(out_file) | |
| new_file = base + ".wav" | |
| os.rename(out_file, new_file) | |
| a = new_file | |
| return a | |
| # Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer | |
| def populate_metadata(link): | |
| yt = YouTube(link) | |
| audio = get_audio(link) | |
| return yt.thumbnail_url, yt.title, audio, audio | |
| def inference(yt_audio_path, model): | |
| with open(yt_audio_path, 'rb') as fd: | |
| contents = fd.read() | |
| audio = upload_audio(contents,sample_rate=SAMPLE_RATE) | |
| inference_model = load_model(model) | |
| est_ns = inference_model(audio) | |
| note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid") | |
| synth = note_seq.midi_synth.fluidsynth | |
| array_of_floats = synth(est_ns, sample_rate=SAMPLE_RATE, sf2_path=SF2_PATH) | |
| int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats) | |
| piano_roll = create_image_from_note_sequence(est_ns) | |
| parsed = converter.parse("./transcribed.mid") | |
| score = parsed.write("musicxml.png") | |
| return "./transcribed.mid", (SAMPLE_RATE, int16_data), piano_roll, score | |
| title = "Transcribe music from YouTube videos using Transformers." | |
| description = """ | |
| Gradio demo for Music Transcription with Transformers. Read more in the links below. | |
| To use this demo, just add a YouTube link with the music you want to transcribe. | |
| """ | |
| article = "<p style='text-align: center'><a href='https://magenta.tensorflow.org/transcription-with-transformers' target='_blank'>Blog: Music Transcription with Transformers</a> | <a href='https://github.com/magenta/mt3' target='_blank'>Github Repo</a></p>" | |
| # Create a block object | |
| demo = gr.Blocks() | |
| # Use your Block object as a context | |
| with demo: | |
| gr.Markdown("<h1 style='text-align: center'>" | |
| + title | |
| + "</h1>") | |
| gr.Markdown(description) | |
| with gr.Box(): | |
| with gr.Box(): | |
| model_label = """ | |
| What kind of model you want to use? | |
| The ismir2021 model transcribes piano only, with note velocities. | |
| The mt3 model transcribes multiple simultaneous instruments, but without velocities. | |
| """ | |
| model = gr.Radio( | |
| ["mt3"], | |
| label=model_label, | |
| value="mt3" | |
| ) | |
| with gr.Row(): | |
| link = gr.Textbox(label="YouTube Link") | |
| with gr.Row(): | |
| preview_btn = gr.Button("Preview") | |
| with gr.Box(): | |
| with gr.Row().style(mobile_collapse=False, equal_height=True): | |
| title = gr.Label(label="Video Title", placeholder="Title") | |
| img = gr.Image(label="Thumbnail") | |
| with gr.Row(): | |
| yt_audio = gr.Audio() | |
| yt_audio_path = gr.Textbox(visible=False) | |
| preview_btn.click(fn=populate_metadata, | |
| inputs=[link], | |
| outputs=[img, title, yt_audio, yt_audio_path]) | |
| with gr.Row(): | |
| btn = gr.Button("Transcribe music") | |
| with gr.Row(): | |
| midi_file = gr.File() | |
| midi_audio = gr.Audio() | |
| with gr.Row(): | |
| piano_roll = gr.Image() | |
| score = gr.Image() | |
| btn.click(inference, | |
| inputs=[yt_audio_path, model], | |
| outputs=[midi_file, midi_audio, piano_roll, score], | |
| api_name="transcribe_wav_to_midi") | |
| gr.Markdown(''' | |
| [](https://twitter.com/juancopi81) | |
|  | |
| ''') | |
| gr.Markdown(article) | |
| demo.launch() |