Spaces:
Running
Running
| import gradio as gr | |
| import requests | |
| import soundfile as sf | |
| import tempfile | |
| import os | |
| # URL of your dedicated processing server. | |
| # Adjust the SERVER_URL (and port/endpoint) as needed. | |
| SERVER_URL = "http://204.12.245.139:5000/process_audio" | |
| def process_audio(audio): | |
| """ | |
| Receives audio from the browser (a tuple: (sample_rate, numpy_array)), | |
| writes it to a temporary WAV file, then sends it in a POST request | |
| to your GPU-accelerated server endpoint. Expects a JSON response with either | |
| a 'transcription' or 'response' key. | |
| """ | |
| if audio is None: | |
| return "No audio provided. Please record something." | |
| sample_rate, audio_data = audio | |
| # Write audio to a temporary WAV file | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: | |
| wav_path = tmp_file.name | |
| sf.write(wav_path, audio_data, sample_rate) | |
| try: | |
| with open(wav_path, "rb") as f: | |
| files = {"file": f} | |
| response = requests.post(SERVER_URL, files=files, timeout=30) | |
| if response.status_code == 200: | |
| json_data = response.json() | |
| # Try to retrieve 'transcription' then fallback to 'response' | |
| result = json_data.get("transcription") or json_data.get("response") | |
| if not result: | |
| result = "Server processed the audio, but did not return a result." | |
| else: | |
| result = f"Server error {response.status_code}: {response.text}" | |
| except Exception as e: | |
| result = f"Exception during processing: {e}" | |
| finally: | |
| os.remove(wav_path) | |
| return result | |
| # Create a Gradio interface. | |
| # Note: The "source" keyword is omitted because your installed Gradio version does not accept it. | |
| iface = gr.Interface( | |
| fn=process_audio, | |
| inputs=gr.Audio(type="numpy", label="Record Your Voice"), | |
| outputs=gr.Textbox(label="Server Response"), | |
| title="Live AI Call Agent – Browser Mic Frontend", | |
| description=( | |
| "Record audio using your browser microphone. The audio will be sent to our dedicated " | |
| "server for processing with GPU acceleration. Your server should return a transcription or " | |
| "an AI-generated response." | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| # Launch the app so that it listens on all interfaces. Adjust the port if needed. | |
| iface.launch(server_name="0.0.0.0", server_port=7860) |