Phase 6: HITL review endpoints + audit trail

- New job_corrections table (append-only audit log) + migration
- Add approved / reviewed_by / reviewed_at columns to jobs
- PATCH  /documents/{id}         apply field-level corrections
- GET    /documents/{id}/history return chronological audit trail
- POST   /documents/{id}/approve lock final version (idempotent)
- Dotted field-path applier with root allow-list + list-index support
- Auto-clear `missing_field` review flag when required header keys filled
- Atomic batch apply: malformed path in batch rolls back all changes
- 22 new tests (11 repository-level, 11 API-level); 184 total passing

Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
This commit is contained in:
Devin AI
2026-04-25 20:12:04 +00:00
parent 45fbfdabb7
commit 66247e39a5
9 changed files with 1058 additions and 22 deletions

248
tests/unit/test_api_hitl.py Normal file
View File

@@ -0,0 +1,248 @@
"""End-to-end HTTP tests for the HITL endpoints.
We re-use the ``fake_pipeline`` style from ``test_api.py`` so we don't pay
the PaddleOCR init cost; the orchestrator is monkey-patched to return a
synthetic ``ExtractionResult``.
"""
from __future__ import annotations
from datetime import date
import pytest
from fastapi.testclient import TestClient
from ocr_sprint.main import create_app
from ocr_sprint.pipeline import orchestrator as orch_module
from ocr_sprint.pipeline.orchestrator import PipelineOutput
from ocr_sprint.schemas.document import DocumentStatus, SourceKind
from ocr_sprint.schemas.extraction import (
ExtractionResult,
HeaderFields,
PersonnelEntry,
ReviewFlag,
)
@pytest.fixture
def client() -> TestClient:
return TestClient(create_app())
@pytest.fixture
def fake_pipeline(monkeypatch: pytest.MonkeyPatch) -> PipelineOutput:
result = ExtractionResult(
header=HeaderFields(
nomor_sprint="Sprin/1/I/2025",
tanggal=date(2025, 1, 1),
satuan_penerbit="POLRES TEST",
perihal=None, # intentional gap so a PATCH can fill it
),
personel=[
PersonnelEntry(pangkat="AIPDA", nrp="77060000", nama="BUDI", jabatan="ANGGOTA"),
],
review_flags=[ReviewFlag.MISSING_FIELD],
confidence=0.7,
)
output = PipelineOutput(
source_kind=SourceKind.PDF,
status=DocumentStatus.NEEDS_REVIEW,
confidence=0.7,
result=result,
)
def _fake_run(_content: bytes) -> PipelineOutput:
return output
monkeypatch.setattr(orch_module, "run_pipeline", _fake_run)
from ocr_sprint.api.routes import documents as docs_module
monkeypatch.setattr(docs_module, "run_pipeline", _fake_run)
from ocr_sprint.worker import tasks as tasks_module
monkeypatch.setattr(tasks_module, "run_pipeline", _fake_run)
return output
def _create_job(client: TestClient) -> str:
post = client.post(
"/api/v1/documents?sync=true",
files={"file": ("x.pdf", b"%PDF-1.4\n%fake", "application/pdf")},
)
assert post.status_code == 200, post.text
body = post.json()
assert body["status"] == "needs_review"
return str(body["job_id"])
def test_patch_applies_correction_and_clears_missing_field(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
patched = client.patch(
f"/api/v1/documents/{job_id}",
json={
"corrections": [
{
"path": "header.perihal",
"value": "Penyelidikan kasus X",
"reason": "LLM missed it",
}
]
},
headers={"X-User-Id": "reviewer-a"},
)
assert patched.status_code == 200, patched.text
body = patched.json()
assert body["data"]["header"]["perihal"] == "Penyelidikan kasus X"
# The fake pipeline has both required header fields filled, so the
# ``missing_field`` flag is auto-cleared as soon as any correction
# lands (the policy re-evaluates required-field coverage on every
# edit).
assert "missing_field" not in body["review_flags"]
def test_patch_returns_400_for_unknown_path(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
resp = client.patch(
f"/api/v1/documents/{job_id}",
json={"corrections": [{"path": "bogus.field", "value": "x"}]},
)
assert resp.status_code == 400
def test_patch_is_atomic_on_partial_failure(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
resp = client.patch(
f"/api/v1/documents/{job_id}",
json={
"corrections": [
{"path": "header.perihal", "value": "OK"},
{"path": "bogus.root", "value": "X"},
]
},
)
assert resp.status_code == 400
# The first correction must not have persisted.
got = client.get(f"/api/v1/documents/{job_id}")
assert got.json()["data"]["header"]["perihal"] is None
def test_history_returns_corrections_in_order(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
client.patch(
f"/api/v1/documents/{job_id}",
json={"corrections": [{"path": "header.perihal", "value": "first"}]},
headers={"X-User-Id": "reviewer-a"},
)
client.patch(
f"/api/v1/documents/{job_id}",
json={"corrections": [{"path": "header.perihal", "value": "second"}]},
headers={"X-User-Id": "reviewer-b"},
)
history = client.get(f"/api/v1/documents/{job_id}/history")
assert history.status_code == 200
events = history.json()
assert [e["new_value"] for e in events] == ["first", "second"]
assert [e["corrected_by"] for e in events] == ["reviewer-a", "reviewer-b"]
# old_value of the second event should reflect the first edit.
assert events[1]["old_value"] == "first"
def test_history_returns_empty_list_for_untouched_job(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
history = client.get(f"/api/v1/documents/{job_id}/history")
assert history.status_code == 200
assert history.json() == []
def test_history_returns_404_for_unknown_job(client: TestClient) -> None:
resp = client.get("/api/v1/documents/00000000-0000-0000-0000-000000000000/history")
assert resp.status_code == 404
def test_approve_locks_subsequent_patches(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
approved = client.post(
f"/api/v1/documents/{job_id}/approve",
headers={"X-User-Id": "reviewer-a"},
)
assert approved.status_code == 200, approved.text
body = approved.json()
assert body["approved"] is True
assert body["reviewed_by"] == "reviewer-a"
assert body["reviewed_at"] # non-empty timestamp
# GET reflects the approval state.
got = client.get(f"/api/v1/documents/{job_id}").json()
assert got["approved"] is True
# PATCH after approve must be rejected with 409.
patched = client.patch(
f"/api/v1/documents/{job_id}",
json={"corrections": [{"path": "header.perihal", "value": "X"}]},
)
assert patched.status_code == 409
def test_approve_is_idempotent(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
first = client.post(
f"/api/v1/documents/{job_id}/approve",
headers={"X-User-Id": "reviewer-a"},
)
second = client.post(
f"/api/v1/documents/{job_id}/approve",
headers={"X-User-Id": "reviewer-b"},
)
assert first.status_code == 200
assert second.status_code == 200
# Second approve must NOT change the attribution. (SQLite drops tzinfo
# on roundtrip, which changes Pydantic's serialization between the two
# calls; compare the naive components.)
assert second.json()["reviewed_by"] == "reviewer-a"
assert (
second.json()["reviewed_at"].rstrip("Z").split("+")[0]
== (first.json()["reviewed_at"].rstrip("Z").split("+")[0])
)
def test_patch_requires_at_least_one_correction(
client: TestClient,
fake_pipeline: PipelineOutput,
) -> None:
job_id = _create_job(client)
resp = client.patch(
f"/api/v1/documents/{job_id}",
json={"corrections": []},
)
assert resp.status_code == 422 # Pydantic min_length=1 violation
def test_patch_missing_job_returns_404(client: TestClient) -> None:
resp = client.patch(
"/api/v1/documents/00000000-0000-0000-0000-000000000000",
json={"corrections": [{"path": "header.perihal", "value": "X"}]},
)
assert resp.status_code == 404

238
tests/unit/test_db_hitl.py Normal file
View File

@@ -0,0 +1,238 @@
"""Repository tests for Phase 6 HITL helpers."""
from __future__ import annotations
from uuid import uuid4
import pytest
from ocr_sprint.db.base import Base, get_engine, session_scope
from ocr_sprint.db.repositories import (
InvalidFieldPathError,
JobAlreadyApprovedError,
JobNotCompletedError,
JobNotFoundError,
JobRepository,
)
from ocr_sprint.schemas.document import DocumentStatus, SourceKind
@pytest.fixture
def db_ready() -> None:
Base.metadata.create_all(bind=get_engine())
def _seed_completed_job(
*,
result: dict[str, object] | None = None,
flags: list[str] | None = None,
) -> uuid4: # type: ignore[type-arg]
jid = uuid4()
with session_scope() as session:
repo = JobRepository(session)
repo.create(
job_id=jid,
filename="x.pdf",
source_kind=SourceKind.PDF,
blob_key="k",
)
with session_scope() as session:
JobRepository(session).mark_completed(
jid,
status=DocumentStatus.NEEDS_REVIEW,
confidence=0.7,
result=result
or {
"header": {
"nomor_sprint": "Sprin/1/I/2025",
"satuan_penerbit": "POLRES X",
"perihal": None,
},
"personel": [
{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"},
],
"untuk": ["Melaksanakan tugas"],
},
review_flags=flags or [],
)
return jid
def test_apply_corrections_updates_nested_header_field(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
repo = JobRepository(session)
repo.apply_corrections(
jid,
corrections=[("header.perihal", "Penyelidikan kasus X", "regex miss")],
corrected_by="reviewer-a",
)
row = repo.get_or_raise(jid)
assert row.result is not None
assert row.result["header"]["perihal"] == "Penyelidikan kasus X"
def test_apply_corrections_writes_audit_row(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "Penyelidikan", None)],
corrected_by="reviewer-a",
)
with session_scope() as session:
events = JobRepository(session).list_corrections(jid)
assert len(events) == 1
assert events[0].field_path == "header.perihal"
assert events[0].old_value is None
assert events[0].new_value == "Penyelidikan"
assert events[0].corrected_by == "reviewer-a"
def test_apply_corrections_supports_list_index(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("personel[0].nrp", "77060001", None)],
corrected_by=None,
)
row = JobRepository(session).get_or_raise(jid)
assert row.result is not None
assert row.result["personel"][0]["nrp"] == "77060001"
def test_apply_corrections_is_atomic_on_invalid_path(db_ready: None) -> None:
"""A second-correction failure must roll back the first one."""
jid = _seed_completed_job()
with session_scope() as session, pytest.raises(InvalidFieldPathError):
JobRepository(session).apply_corrections(
jid,
corrections=[
("header.perihal", "OK", None),
("bogus.root", "X", None),
],
corrected_by=None,
)
# The first correction must not have persisted.
with session_scope() as session:
row = JobRepository(session).get_or_raise(jid)
assert row.result is not None
assert row.result["header"].get("perihal") is None
def test_apply_corrections_rejects_out_of_range_index(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session, pytest.raises(InvalidFieldPathError):
JobRepository(session).apply_corrections(
jid,
corrections=[("personel[99].nrp", "77060001", None)],
corrected_by=None,
)
def test_apply_corrections_rejects_after_approve(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).approve(jid, reviewed_by="reviewer-a")
with session_scope() as session, pytest.raises(JobAlreadyApprovedError):
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "X", None)],
corrected_by="reviewer-a",
)
def test_apply_corrections_rejects_missing_job(db_ready: None) -> None:
with session_scope() as session, pytest.raises(JobNotFoundError):
JobRepository(session).apply_corrections(
uuid4(),
corrections=[("header.perihal", "X", None)],
corrected_by=None,
)
def test_apply_corrections_rejects_pending_job(db_ready: None) -> None:
jid = uuid4()
with session_scope() as session:
JobRepository(session).create(
job_id=jid, filename="x", source_kind=SourceKind.PDF, blob_key="k"
)
with session_scope() as session, pytest.raises(JobNotCompletedError):
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "X", None)],
corrected_by=None,
)
def test_missing_field_flag_cleared_when_header_gap_filled(db_ready: None) -> None:
jid = _seed_completed_job(
result={
"header": {
"nomor_sprint": None,
"satuan_penerbit": "POLRES X",
}
},
flags=["missing_field", "low_ocr_confidence"],
)
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("header.nomor_sprint", "Sprin/2/I/2025", None)],
corrected_by="reviewer-a",
)
row = JobRepository(session).get_or_raise(jid)
# ``low_ocr_confidence`` stays (correction doesn't resolve that signal),
# but ``missing_field`` is gone because every required header key is
# now non-empty.
assert list(row.review_flags) == ["low_ocr_confidence"]
def test_approve_sets_timestamps_and_is_idempotent(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
row = JobRepository(session).approve(jid, reviewed_by="reviewer-a")
first_at = row.reviewed_at
assert first_at is not None
with session_scope() as session:
row = JobRepository(session).approve(jid, reviewed_by="reviewer-b")
# Second call must NOT overwrite reviewed_by or reviewed_at.
# SQLite drops tzinfo on roundtrip, so compare the naive components.
assert row.approved is True
assert row.reviewed_by == "reviewer-a"
assert row.reviewed_at is not None
assert row.reviewed_at.replace(tzinfo=None) == first_at.replace(tzinfo=None)
def test_approve_rejects_pending_job(db_ready: None) -> None:
jid = uuid4()
with session_scope() as session:
JobRepository(session).create(
job_id=jid, filename="x", source_kind=SourceKind.PDF, blob_key="k"
)
with session_scope() as session, pytest.raises(JobNotCompletedError):
JobRepository(session).approve(jid, reviewed_by="rev")
def test_history_returns_events_in_order(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "one", None)],
corrected_by="r1",
)
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[
("header.perihal", "two", None),
("personel[0].nama", "ANDI", None),
],
corrected_by="r2",
)
with session_scope() as session:
events = JobRepository(session).list_corrections(jid)
assert [e.new_value for e in events] == ["one", "two", "ANDI"]
assert [e.corrected_by for e in events] == ["r1", "r2", "r2"]