""" Audio Tool - Transcribe audio với Groq Whisper API """ import os import tempfile import requests from typing import Optional def download_audio_file(task_id: str) -> Optional[str]: """ Download audio file from API """ try: api_url = "https://agents-course-unit4-scoring.hf.space" file_url = f"{api_url}/files/{task_id}" response = requests.get(file_url, timeout=30) if response.status_code == 200: # Determine file extension content_type = response.headers.get('content-type', '') if 'audio' in content_type: if 'mp3' in content_type: suffix = '.mp3' elif 'wav' in content_type: suffix = '.wav' elif 'ogg' in content_type: suffix = '.ogg' elif 'm4a' in content_type: suffix = '.m4a' else: suffix = '.mp3' # Default else: suffix = '.mp3' # Default for unknown audio types with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: tmp_file.write(response.content) return tmp_file.name else: return None except Exception as e: print(f"Error downloading audio: {e}") return None def transcribe_audio_groq(task_id: str = "", audio_path: str = "", language: str = "en") -> str: """ Main function: Transcribe audio với Groq Whisper API - model whisper-large-v3 Args: task_id: ID để download file từ API audio_path: Đường dẫn file audio local (nếu có) language: Ngôn ngữ transcription (default: "en") Returns: Transcribed text """ target_audio_path = None try: # Initialize Groq client from groq import Groq groq_api_key = os.environ.get("GROQ_API_KEY") if not groq_api_key: return "Error: GROQ_API_KEY not found in environment variables" groq_client = Groq(api_key=groq_api_key) # Xác định đường dẫn audio if audio_path and os.path.exists(audio_path): target_audio_path = audio_path elif task_id: target_audio_path = download_audio_file(task_id) if not target_audio_path: return "Error: Could not download audio file" else: return "Error: No audio path or task_id provided" # Kiểm tra file audio tồn tại if not os.path.exists(target_audio_path): return "Error: Audio file not found" # Transcribe với Groq Whisper with open(target_audio_path, "rb") as audio_file: transcription = groq_client.audio.transcriptions.create( file=(os.path.basename(target_audio_path), audio_file.read()), model="whisper-large-v3", response_format="text", language=language, temperature=0.0 # Deterministic results ) # Lấy kết quả if hasattr(transcription, 'text'): result = transcription.text else: result = str(transcription) # Cleanup downloaded file nếu cần if task_id and target_audio_path != audio_path: try: os.unlink(target_audio_path) except: pass return result.strip() except Exception as e: # Cleanup file nếu có lỗi if task_id and target_audio_path and target_audio_path != audio_path: try: os.unlink(target_audio_path) except: pass return f"Audio transcription error: {str(e)}" def transcribe_audio_with_details(task_id: str = "", audio_path: str = "", language: str = "en") -> dict: """ Transcribe audio với thêm chi tiết metadata Returns: Dict chứa transcription và metadata """ try: # Lấy transcription text = transcribe_audio_groq(task_id, audio_path, language) # Metadata cơ bản metadata = { "model": "whisper-large-v3", "language": language, "provider": "groq" } # Nếu có file local, lấy thêm thông tin if audio_path and os.path.exists(audio_path): file_size = os.path.getsize(audio_path) metadata["file_size"] = file_size metadata["file_path"] = audio_path return { "transcription": text, "metadata": metadata, "success": not text.startswith("Error:") } except Exception as e: return { "transcription": f"Error: {str(e)}", "metadata": {}, "success": False } # Fallback function nếu Groq không khả dụng def fallback_audio_info(task_id: str = "", audio_path: str = "") -> str: """ Fallback function khi không thể transcribe audio """ try: target_audio_path = None if audio_path and os.path.exists(audio_path): target_audio_path = audio_path elif task_id: target_audio_path = download_audio_file(task_id) if not target_audio_path: return "Error: Could not download audio file" else: return "Error: No audio path or task_id provided" # Basic file info file_size = os.path.getsize(target_audio_path) result = f"Audio file detected - Size: {file_size} bytes. Groq transcription not available. Please describe the audio content." # Cleanup if task_id and target_audio_path != audio_path: try: os.unlink(target_audio_path) except: pass return result except Exception as e: return f"Audio processing error: {str(e)}" # Test function if __name__ == "__main__": # Test với file audio local (nếu có) test_audio = "/path/to/test/audio.mp3" if os.path.exists(test_audio): result = transcribe_audio_groq(audio_path=test_audio) print("Transcription Result:", result) else: print("No test audio found") # Test với task_id (cần API key) # result = transcribe_audio_groq(task_id="some_task_id") # print("Transcription Result:", result)