"""API tests with the OCR engine mocked. These tests do NOT load PaddleOCR — instead they monkeypatch the orchestrator so we can exercise the FastAPI surface without the heavy ML init cost. """ from __future__ import annotations from datetime import date import pytest from fastapi.testclient import TestClient from ocr_sprint.main import create_app from ocr_sprint.pipeline import orchestrator as orch_module from ocr_sprint.pipeline.orchestrator import PipelineOutput from ocr_sprint.schemas.document import DocumentStatus, SourceKind from ocr_sprint.schemas.extraction import ExtractionResult, HeaderFields @pytest.fixture def client() -> TestClient: return TestClient(create_app()) def test_health_endpoint(client: TestClient) -> None: response = client.get("/api/v1/health") assert response.status_code == 200 assert response.json()["status"] == "ok" def test_documents_rejects_empty_upload(client: TestClient) -> None: response = client.post( "/api/v1/documents", files={"file": ("empty.pdf", b"", "application/pdf")}, ) assert response.status_code == 400 def test_documents_rejects_unknown_format( client: TestClient, monkeypatch: pytest.MonkeyPatch, ) -> None: response = client.post( "/api/v1/documents", files={"file": ("x.bin", b"random garbage bytes here", "application/octet-stream")}, ) assert response.status_code == 400 def test_documents_returns_pipeline_output( client: TestClient, monkeypatch: pytest.MonkeyPatch, ) -> None: fake_result = ExtractionResult( header=HeaderFields( nomor_sprint="Sprin/1/I/2025", tanggal=date(2025, 1, 1), satuan_penerbit="POLRES TEST", ), confidence=0.97, ) fake_output = PipelineOutput( source_kind=SourceKind.PDF, status=DocumentStatus.COMPLETED, confidence=0.97, result=fake_result, ) def _fake_run(_content: bytes) -> PipelineOutput: return fake_output # Patch the symbol *imported into* the routes module. monkeypatch.setattr(orch_module, "run_pipeline", _fake_run) from ocr_sprint.api.routes import documents as docs_module monkeypatch.setattr(docs_module, "run_pipeline", _fake_run) response = client.post( "/api/v1/documents", files={"file": ("x.pdf", b"%PDF-1.4\n%fake", "application/pdf")}, ) assert response.status_code == 200 body = response.json() assert body["status"] == "completed" assert body["confidence"] == 0.97 assert body["data"]["header"]["nomor_sprint"] == "Sprin/1/I/2025"