|
|
|
|
|
"""Quick multilingual title smoke tests (LLM).""" |
|
|
from __future__ import annotations |
|
|
import os, sys, pytest |
|
|
from pathlib import Path |
|
|
|
|
|
if os.getenv("VOXSUM_SKIP_LLM_TESTS") == "1": |
|
|
pytest.skip("LLM tests skipped (unset VOXSUM_SKIP_LLM_TESTS to run)", allow_module_level=True) |
|
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent |
|
|
if str(ROOT) not in sys.path: |
|
|
sys.path.insert(0, str(ROOT)) |
|
|
|
|
|
from src.summarization import generate_title |
|
|
from src.utils import available_gguf_llms |
|
|
|
|
|
def _select_model(): |
|
|
env_choice = os.getenv("VOXSUM_GGUF_MODEL") |
|
|
if env_choice and env_choice in available_gguf_llms: |
|
|
return env_choice |
|
|
for cand in ["Gemma-3-270M", "Gemma-3-3N-E2B", "Gemma-3-3N-E4B", "Gemma-3-1B"]: |
|
|
if cand in available_gguf_llms: |
|
|
return cand |
|
|
return next(iter(available_gguf_llms)) |
|
|
|
|
|
TEST_TRANSCRIPTS = { |
|
|
"english": "Hello everyone, today we're going to discuss artificial intelligence and its impact.", |
|
|
"french": "Bonjour à tous, aujourd'hui nous allons discuter de l'intelligence artificielle.", |
|
|
} |
|
|
|
|
|
def test_multilingual_titles(): |
|
|
model_name = _select_model() |
|
|
for language, transcript in TEST_TRANSCRIPTS.items(): |
|
|
title = generate_title(transcript, model_name) |
|
|
assert title, f"Empty title for {language}" |
|
|
assert len(title.split()) <= 15 |
|
|
|