Adds a small Ollama HTTP client (httpx-based, no extra runtime deps),
prompt builders, and a hybrid header extractor that runs *after* the
deterministic regex layer. The merger never overwrites a regex-filled
field — the LLM only fills gaps. If LLM_ENABLED=false (the default), or
the Ollama server is unreachable, the pipeline degrades gracefully:
- LLM_ENABLED=false -> no LLM call at all, no flag.
- LLM_ENABLED=true,
header complete -> no LLM call.
- LLM_ENABLED=true,
header has gaps,
LLM responded ok -> merge + LLM_FALLBACK flag (review hint).
- LLM_ENABLED=true,
header has gaps,
LLM unavailable -> keep regex result + LLM_UNAVAILABLE flag.
Default model qwen2.5:1.5b on http://localhost:11434 — chosen for CPU
throughput (~5-15s per call) at acceptable accuracy. The LLM only fills
the *header* (nomor, tanggal, satuan, perihal, dasar). Personnel rows
stay with PP-Structure since that's more accurate and doesn't need LLM.
Tests:
- test_llm_client.py: httpx MockTransport-driven tests for the wire
format, error paths (HTTP 5xx, malformed JSON, missing envelope,
ConnectError), and request shape.
- test_llm_extractor.py: merge policy + None-on-unavailable behaviour.
- test_orchestrator_llm.py: end-to-end orchestrator wiring with stubs
for ingest/preprocess/OCR/table — verifies LLM is skipped when
disabled, skipped when header is complete, called and flagged when
gaps exist, and marked unavailable when the client returns None.
162 unit tests pass total (was 146).
Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
109 lines
3.7 KiB
Python
109 lines
3.7 KiB
Python
"""Unit tests for the Ollama HTTP client wrapper.
|
|
|
|
We swap ``httpx.Client`` inside ``ocr_sprint.llm.client`` for a builder that
|
|
returns a real ``httpx.Client`` wrapping a ``MockTransport``. Capturing the
|
|
original constructor *before* patching avoids infinite recursion in the
|
|
patched callable.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
import httpx
|
|
import pytest
|
|
from pydantic import BaseModel
|
|
|
|
import ocr_sprint.llm.client as llm_client_module
|
|
from ocr_sprint.llm.client import LLMUnavailableError, OllamaClient
|
|
|
|
|
|
class _Schema(BaseModel):
|
|
foo: str
|
|
bar: int
|
|
|
|
|
|
def _ollama_envelope(content: str) -> dict[str, object]:
|
|
"""Mimic the shape Ollama's /api/chat returns."""
|
|
return {"message": {"role": "assistant", "content": content}, "done": True}
|
|
|
|
|
|
def _patch_transport(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
handler: Any,
|
|
) -> None:
|
|
transport = httpx.MockTransport(handler)
|
|
real_client = httpx.Client # capture before patching
|
|
|
|
def _factory(*_args: object, **kwargs: object) -> httpx.Client:
|
|
# Strip any caller-provided transport kwarg; we always inject ours.
|
|
kwargs.pop("transport", None)
|
|
return real_client(transport=transport, **kwargs)
|
|
|
|
monkeypatch.setattr(llm_client_module.httpx, "Client", _factory)
|
|
|
|
|
|
def test_chat_json_returns_validated_model(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
captured: dict[str, object] = {}
|
|
|
|
def _handler(request: httpx.Request) -> httpx.Response:
|
|
captured["url"] = str(request.url)
|
|
captured["body"] = request.read()
|
|
return httpx.Response(200, json=_ollama_envelope('{"foo": "x", "bar": 7}'))
|
|
|
|
_patch_transport(monkeypatch, _handler)
|
|
|
|
client = OllamaClient(base_url="http://ollama:11434", model="m", timeout_s=5)
|
|
out = client.chat_json("system msg", "user msg", _Schema)
|
|
|
|
assert out == _Schema(foo="x", bar=7)
|
|
assert captured["url"] == "http://ollama:11434/api/chat"
|
|
body = captured["body"]
|
|
assert isinstance(body, bytes)
|
|
assert b'"format":"json"' in body
|
|
assert b'"system msg"' in body
|
|
|
|
|
|
def test_chat_json_raises_on_http_error(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
def _handler(_request: httpx.Request) -> httpx.Response:
|
|
return httpx.Response(500, text="boom")
|
|
|
|
_patch_transport(monkeypatch, _handler)
|
|
|
|
client = OllamaClient(base_url="http://x", model="m", timeout_s=5)
|
|
with pytest.raises(LLMUnavailableError, match="Ollama request failed"):
|
|
client.chat_json("s", "u", _Schema)
|
|
|
|
|
|
def test_chat_json_raises_on_invalid_json(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
def _handler(_request: httpx.Request) -> httpx.Response:
|
|
return httpx.Response(200, json=_ollama_envelope("this is not json"))
|
|
|
|
_patch_transport(monkeypatch, _handler)
|
|
|
|
client = OllamaClient(base_url="http://x", model="m", timeout_s=5)
|
|
with pytest.raises(LLMUnavailableError, match="schema"):
|
|
client.chat_json("s", "u", _Schema)
|
|
|
|
|
|
def test_chat_json_raises_on_missing_envelope(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
def _handler(_request: httpx.Request) -> httpx.Response:
|
|
return httpx.Response(200, json={"oops": True})
|
|
|
|
_patch_transport(monkeypatch, _handler)
|
|
|
|
client = OllamaClient(base_url="http://x", model="m", timeout_s=5)
|
|
with pytest.raises(LLMUnavailableError, match=r"message\.content"):
|
|
client.chat_json("s", "u", _Schema)
|
|
|
|
|
|
def test_chat_json_raises_on_connection_error(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
def _handler(request: httpx.Request) -> httpx.Response:
|
|
raise httpx.ConnectError("nobody home", request=request)
|
|
|
|
_patch_transport(monkeypatch, _handler)
|
|
|
|
client = OllamaClient(base_url="http://x", model="m", timeout_s=1)
|
|
with pytest.raises(LLMUnavailableError):
|
|
client.chat_json("s", "u", _Schema)
|