#!/usr/bin/env python3 """Multilingual summarization & title tests (LLM heavy by default). Set VOXSUM_SKIP_LLM_TESTS=1 to skip these tests (mocked LLM in conftest). Optionally set VOXSUM_GGUF_MODEL to force a specific GGUF model. """ from __future__ import annotations import os import sys import pytest from pathlib import Path if os.getenv("VOXSUM_SKIP_LLM_TESTS") == "1": # opt-out mechanism pytest.skip("LLM tests skipped (unset VOXSUM_SKIP_LLM_TESTS to run)", allow_module_level=True) # Ensure repository root on path ROOT = Path(__file__).resolve().parent.parent if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from src.summarization import summarize_transcript, generate_title # noqa: E402 from src.utils import available_gguf_llms # noqa: E402 def _select_model(): env_choice = os.getenv("VOXSUM_GGUF_MODEL") if env_choice and env_choice in available_gguf_llms: return env_choice for cand in ["Gemma-3-270M", "Gemma-3-3N-E2B", "Gemma-3-3N-E4B", "Gemma-3-1B"]: if cand in available_gguf_llms: return cand return next(iter(available_gguf_llms)) # Test transcripts in different languages TEST_TRANSCRIPTS = { "english": """ Hello everyone, today we're going to discuss artificial intelligence and its impact on modern society. AI has become increasingly important in our daily lives, from voice assistants like Siri and Alexa, to recommendation systems on Netflix and YouTube. The technology is advancing rapidly, with machine learning algorithms becoming more sophisticated every day. However, we must also consider the ethical implications of AI development, including privacy concerns, job displacement, and the potential for bias in automated decision-making systems. It's crucial that we develop AI responsibly to ensure it benefits all of humanity rather than just a select few. """, "french": """ Bonjour à tous, aujourd'hui nous allons discuter de l'intelligence artificielle et de son impact sur la société moderne. L'IA est devenue de plus en plus importante dans notre vie quotidienne, des assistants vocaux comme Siri et Alexa, aux systèmes de recommandation sur Netflix et YouTube. La technologie progresse rapidement, avec des algorithmes d'apprentissage automatique devenant plus sophistiqués chaque jour. Cependant, nous devons également considérer les implications éthiques du développement de l'IA, y compris les préoccupations de confidentialité, le déplacement d'emplois, et le potentiel de biais dans les systèmes de prise de décision automatisée. Il est crucial que nous développions l'IA de manière responsable pour assurer qu'elle bénéficie à toute l'humanité plutôt qu'à une élite. """, } def test_multilingual_summarization(): model_name = _select_model() for language, transcript in TEST_TRANSCRIPTS.items(): parts = list(summarize_transcript(transcript, model_name, "Summarize this transcript")) summary = "".join(parts) assert summary, f"Empty summary for {language}" def test_language_consistency(): model_name = _select_model() for language, transcript in TEST_TRANSCRIPTS.items(): title = generate_title(transcript, model_name) parts = list(summarize_transcript(transcript, model_name, "Summarize this transcript")) summary = "".join(parts) assert title and summary assert len(title) < 120