Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv, find_dotenv | |
| import pytest | |
| import jiwer | |
| from openai import OpenAI | |
| import logging | |
| import tempfile | |
| load_dotenv(find_dotenv(), override=True) | |
| logging.basicConfig( | |
| filename="../crisis_log.txt", | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S" | |
| ) | |
| AUDIO_DIR = "audio_samples" | |
| TRANSCRIPT_DIR = "transcripts" | |
| def transcribe_audio(audio_data, reference_text): | |
| """Transcribe audio using OpenAI's Whisper model with language detection based on reference.""" | |
| try: | |
| client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
| if not client.api_key: | |
| raise ValueError("OPENAI_API_KEY not set") | |
| language = "ar" if any(c in "ابتثجحخدذرزسشصضطظعغفقكلمنهوي" for c in reference_text) else "en" | |
| logging.info(f"Detected language for transcription: {language}") | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
| temp_file.write(audio_data) | |
| temp_file_path = temp_file.name | |
| logging.info(f"Temporary audio file created: {temp_file_path}") | |
| with open(temp_file_path, "rb") as audio_file: | |
| transcript = client.audio.transcriptions.create( | |
| model="whisper-1", | |
| file=audio_file, | |
| language=language | |
| ) | |
| os.unlink(temp_file_path) | |
| logging.info(f"Transcription result: {transcript.text.strip()}") | |
| return transcript.text.strip() | |
| except Exception as e: | |
| logging.error(f"Transcription failed: {str(e)}") | |
| return "" | |
| def read_transcript(file_path): | |
| """Read transcript file with UTF-8 encoding.""" | |
| try: | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| text = f.read().strip() | |
| logging.info(f"Reference transcript from {file_path}: {text}") | |
| return text | |
| except Exception as e: | |
| logging.error(f"Failed to read transcript {file_path}: {str(e)}") | |
| return "" | |
| def test_wer_for_audio_transcript_pairs(): | |
| """Test WER for each audio-transcript pair.""" | |
| logging.info("Starting WER test for audio-transcript pairs") | |
| audio_files = sorted([f for f in os.listdir(AUDIO_DIR) if f.endswith(".wav")]) | |
| transcript_files = sorted([f for f in os.listdir(TRANSCRIPT_DIR) if f.endswith(".txt")]) | |
| logging.info(f"Found audio files: {audio_files}") | |
| logging.info(f"Found transcript files: {transcript_files}") | |
| assert len(audio_files) == 5, f"Expected 5 audio files, found {len(audio_files)}" | |
| assert len(transcript_files) == 5, f"Expected 5 transcript files, found {len(transcript_files)}" | |
| results = [] | |
| for i, (audio_file, transcript_file) in enumerate(zip(audio_files, transcript_files), 1): | |
| logging.info(f"Processing pair {i}: {audio_file} vs {transcript_file}") | |
| audio_path = os.path.join(AUDIO_DIR, audio_file) | |
| transcript_path = os.path.join(TRANSCRIPT_DIR, transcript_file) | |
| reference = read_transcript(transcript_path) | |
| if not reference: | |
| logging.error(f"No reference transcript for {transcript_file}") | |
| pytest.fail(f"No reference transcript for {transcript_file}") | |
| try: | |
| with open(audio_path, "rb") as audio: | |
| audio_data = audio.read() | |
| if not audio_data: | |
| logging.error(f"Empty audio file: {audio_file}") | |
| pytest.fail(f"Empty audio file: {audio_file}") | |
| hypothesis = transcribe_audio(audio_data, reference) | |
| if not hypothesis: | |
| logging.error(f"Transcription failed for {audio_file}") | |
| pytest.fail(f"Transcription failed for {audio_file}") | |
| except Exception as e: | |
| logging.error(f"Transcription error for {audio_file}: {str(e)}") | |
| pytest.fail(f"Transcription error for {audio_file}: {str(e)}") | |
| # Compute WER | |
| try: | |
| wer = jiwer.wer(reference, hypothesis) | |
| logging.info(f"WER for {audio_file} vs {transcript_file}: {wer:.4f}") | |
| print(f"WER for {audio_file} vs {transcript_file}: {wer:.4f}") | |
| results.append((audio_file, transcript_file, wer)) | |
| except Exception as e: | |
| logging.error(f"WER calculation failed for {audio_file}: {str(e)}") | |
| pytest.fail(f"WER calculation failed for {audio_file}: {str(e)}") | |
| logging.info(f"All WER results: {results}") | |
| if not results: | |
| logging.warning("No WER results collected") | |
| print("No WER results collected") | |
| else: | |
| print("Final WER Results:") | |
| for audio_file, transcript_file, wer in results: | |
| print(f"WER for {audio_file} vs {transcript_file}: {wer:.4f}") | |
| for audio_file, transcript_file, wer in results: | |
| if wer > 1.0: | |
| logging.warning(f"High WER {wer:.4f} for {audio_file} vs {transcript_file}. Check audio-transcript match.") | |
| if __name__ == "__main__": | |
| pytest.main(["-v", __file__]) | |