OCR-SPRIN-SERVICE/tests/unit/test_llm_client.py

"""Unit tests for the Ollama HTTP client wrapper.

We swap ``httpx.Client`` inside ``ocr_sprint.llm.client`` for a builder that
returns a real ``httpx.Client`` wrapping a ``MockTransport``. Capturing the
original constructor *before* patching avoids infinite recursion in the
patched callable.
"""

from __future__ import annotations

from typing import Any

import httpx
import pytest
from pydantic import BaseModel

import ocr_sprint.llm.client as llm_client_module
from ocr_sprint.llm.client import LLMUnavailableError, OllamaClient


class _Schema(BaseModel):
    foo: str
    bar: int


def _ollama_envelope(content: str) -> dict[str, object]:
    """Mimic the shape Ollama's /api/chat returns."""
    return {"message": {"role": "assistant", "content": content}, "done": True}


def _patch_transport(
    monkeypatch: pytest.MonkeyPatch,
    handler: Any,
) -> None:
    transport = httpx.MockTransport(handler)
    real_client = httpx.Client  # capture before patching

    def _factory(*_args: object, **kwargs: object) -> httpx.Client:
        # Strip any caller-provided transport kwarg; we always inject ours.
        kwargs.pop("transport", None)
        return real_client(transport=transport, **kwargs)

    monkeypatch.setattr(llm_client_module.httpx, "Client", _factory)


def test_chat_json_returns_validated_model(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict[str, object] = {}

    def _handler(request: httpx.Request) -> httpx.Response:
        captured["url"] = str(request.url)
        captured["body"] = request.read()
        return httpx.Response(200, json=_ollama_envelope('{"foo": "x", "bar": 7}'))

    _patch_transport(monkeypatch, _handler)

    client = OllamaClient(base_url="http://ollama:11434", model="m", timeout_s=5)
    out = client.chat_json("system msg", "user msg", _Schema)

    assert out == _Schema(foo="x", bar=7)
    assert captured["url"] == "http://ollama:11434/api/chat"
    body = captured["body"]
    assert isinstance(body, bytes)
    assert b'"format":"json"' in body
    assert b'"system msg"' in body


def test_chat_json_raises_on_http_error(monkeypatch: pytest.MonkeyPatch) -> None:
    def _handler(_request: httpx.Request) -> httpx.Response:
        return httpx.Response(500, text="boom")

    _patch_transport(monkeypatch, _handler)

    client = OllamaClient(base_url="http://x", model="m", timeout_s=5)
    with pytest.raises(LLMUnavailableError, match="Ollama request failed"):
        client.chat_json("s", "u", _Schema)


def test_chat_json_raises_on_invalid_json(monkeypatch: pytest.MonkeyPatch) -> None:
    def _handler(_request: httpx.Request) -> httpx.Response:
        return httpx.Response(200, json=_ollama_envelope("this is not json"))

    _patch_transport(monkeypatch, _handler)

    client = OllamaClient(base_url="http://x", model="m", timeout_s=5)
    with pytest.raises(LLMUnavailableError, match="schema"):
        client.chat_json("s", "u", _Schema)


def test_chat_json_raises_on_missing_envelope(monkeypatch: pytest.MonkeyPatch) -> None:
    def _handler(_request: httpx.Request) -> httpx.Response:
        return httpx.Response(200, json={"oops": True})

    _patch_transport(monkeypatch, _handler)

    client = OllamaClient(base_url="http://x", model="m", timeout_s=5)
    with pytest.raises(LLMUnavailableError, match=r"message\.content"):
        client.chat_json("s", "u", _Schema)


def test_chat_json_raises_on_connection_error(monkeypatch: pytest.MonkeyPatch) -> None:
    def _handler(request: httpx.Request) -> httpx.Response:
        raise httpx.ConnectError("nobody home", request=request)

    _patch_transport(monkeypatch, _handler)

    client = OllamaClient(base_url="http://x", model="m", timeout_s=1)
    with pytest.raises(LLMUnavailableError):
        client.chat_json("s", "u", _Schema)