"""Pytest configuration & lightweight LLM mocking.

By default (when VOXSUM_RUN_LLM_TESTS != '1'), we *mock* heavy LLM loading
from `llama_cpp` to avoid native model initialization (which caused segfaults
in CI / constrained environments).

Set VOXSUM_RUN_LLM_TESTS=1 to run the real LLM-dependent tests.
"""
from __future__ import annotations

import os
import types
import pytest
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

# Only install mocks when user explicitly wants to skip heavy LLM tests
if os.getenv("VOXSUM_SKIP_LLM_TESTS") == "1":
    # Patch src.summarization.get_llm to return a dummy object with needed interface
    import src.summarization as summarization  # type: ignore

    class _DummyLlama:
        def __init__(self):
            self._calls = []
        def create_chat_completion(self, messages, stream=False, **kwargs):  # pragma: no cover - simple mock
            # Return a deterministic short response using last user content
            user_content = ""
            for m in messages[::-1]:
                if m.get("role") == "user":
                    user_content = m.get("content", "")
                    break
            # Provide a minimal plausible answer
            text = "[MOCK] " + (user_content[:80].replace('\n', ' ') if user_content else "Summary")
            return {"choices": [{"message": {"content": text}}]}
        def tokenize(self, data: bytes):  # pragma: no cover - trivial
            return list(data[:16])  # pretend small token list
        def detokenize(self, tokens):  # pragma: no cover - trivial
            return bytes(tokens)

    def _mock_get_llm(selected_gguf_model: str):  # pragma: no cover - trivial
        return _DummyLlama()

    # Install the mock only if not already swapped
    if getattr(summarization.get_llm, "__name__", "") != "_mock_get_llm":
        summarization.get_llm = _mock_get_llm  # type: ignore

@pytest.fixture
def dummy_llm():
    """Fixture exposing a dummy LLM (even when real tests run)."""
    if os.getenv("VOXSUM_SKIP_LLM_TESTS") != "1":
        import src.summarization as summarization  # type: ignore
        yield summarization.get_llm(list(summarization.available_gguf_llms.keys())[0])  # type: ignore
    else:
        # Provide a standalone dummy consistent with the mock
        class _Faux:
            def create_chat_completion(self, messages, stream=False, **kwargs):
                return {"choices": [{"message": {"content": "[MOCK FIXTURE RESPONSE]"}}]}
        yield _Faux()