Spaces:
Runtime error
Runtime error
| # coding=utf-8 | |
| import gradio as gr | |
| import numpy as np | |
| import soundfile as sf | |
| import spaces | |
| import torch | |
| import torchaudio | |
| from gradio.themes import Base | |
| from sv import process_audio | |
| def model_inference(input_wav, language): | |
| # Simplify language selection | |
| language = language if language else "auto" | |
| # Handle input_wav format | |
| if isinstance(input_wav, tuple): | |
| fs, input_wav = input_wav | |
| input_wav = input_wav.astype(np.float32) / np.iinfo(np.int16).max | |
| input_wav = input_wav.mean(-1) if len(input_wav.shape) > 1 else input_wav | |
| if fs != 16000: | |
| resampler = torchaudio.transforms.Resample(fs, 16000) | |
| input_wav = resampler(torch.from_numpy(input_wav).float()[None, :])[ | |
| 0 | |
| ].numpy() | |
| # Process audio | |
| with sf.SoundFile("temp.wav", "w", samplerate=16000, channels=1) as f: | |
| f.write(input_wav) | |
| result = process_audio("temp.wav", language=language) | |
| return result | |
| def launch(): | |
| # Create a custom theme | |
| custom_css = """ | |
| .gradio-container {color: rgb(70, 70, 70);} | |
| """ | |
| with gr.Blocks(css=custom_css) as demo: | |
| gr.Markdown("# Cantonese Call Transcriber") | |
| gr.Markdown( | |
| """ | |
| This tool transcribes Cantonese audio calls into text. | |
| ## How to use: | |
| 1. Upload an audio file or use the example provided at the bottom of the page. | |
| 2. Click the 'Process Audio' button. | |
| 3. The transcription will appear in the output box. | |
| """ | |
| ) | |
| # Define components | |
| audio_input = gr.Audio(label="Input") | |
| text_output = gr.Textbox(lines=10, label="Output") | |
| # Custom render function for Examples | |
| def render_example(example): | |
| return gr.Button("Try Example Audio") | |
| # Update the Examples component | |
| gr.Examples( | |
| examples=[["example/scb.mp3"]], | |
| inputs=[audio_input], | |
| outputs=[text_output], | |
| fn=lambda x: model_inference(x, "yue"), | |
| examples_per_page=1, | |
| ) | |
| # Main interface | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| audio_input | |
| fn_button = gr.Button("Process Audio", variant="primary") | |
| with gr.Column(scale=3): | |
| text_output | |
| # Set up event handler | |
| fn_button.click( | |
| fn=lambda x: model_inference(x, "yue"), | |
| inputs=[audio_input], | |
| outputs=[text_output], | |
| ) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| launch() | |