Files
OCR-SPRIN-SERVICE/tests/unit/test_db_hitl.py
Devin AI 66247e39a5 Phase 6: HITL review endpoints + audit trail
- New job_corrections table (append-only audit log) + migration
- Add approved / reviewed_by / reviewed_at columns to jobs
- PATCH  /documents/{id}         apply field-level corrections
- GET    /documents/{id}/history return chronological audit trail
- POST   /documents/{id}/approve lock final version (idempotent)
- Dotted field-path applier with root allow-list + list-index support
- Auto-clear `missing_field` review flag when required header keys filled
- Atomic batch apply: malformed path in batch rolls back all changes
- 22 new tests (11 repository-level, 11 API-level); 184 total passing

Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
2026-04-25 20:12:04 +00:00

239 lines
8.1 KiB
Python

"""Repository tests for Phase 6 HITL helpers."""
from __future__ import annotations
from uuid import uuid4
import pytest
from ocr_sprint.db.base import Base, get_engine, session_scope
from ocr_sprint.db.repositories import (
InvalidFieldPathError,
JobAlreadyApprovedError,
JobNotCompletedError,
JobNotFoundError,
JobRepository,
)
from ocr_sprint.schemas.document import DocumentStatus, SourceKind
@pytest.fixture
def db_ready() -> None:
Base.metadata.create_all(bind=get_engine())
def _seed_completed_job(
*,
result: dict[str, object] | None = None,
flags: list[str] | None = None,
) -> uuid4: # type: ignore[type-arg]
jid = uuid4()
with session_scope() as session:
repo = JobRepository(session)
repo.create(
job_id=jid,
filename="x.pdf",
source_kind=SourceKind.PDF,
blob_key="k",
)
with session_scope() as session:
JobRepository(session).mark_completed(
jid,
status=DocumentStatus.NEEDS_REVIEW,
confidence=0.7,
result=result
or {
"header": {
"nomor_sprint": "Sprin/1/I/2025",
"satuan_penerbit": "POLRES X",
"perihal": None,
},
"personel": [
{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"},
],
"untuk": ["Melaksanakan tugas"],
},
review_flags=flags or [],
)
return jid
def test_apply_corrections_updates_nested_header_field(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
repo = JobRepository(session)
repo.apply_corrections(
jid,
corrections=[("header.perihal", "Penyelidikan kasus X", "regex miss")],
corrected_by="reviewer-a",
)
row = repo.get_or_raise(jid)
assert row.result is not None
assert row.result["header"]["perihal"] == "Penyelidikan kasus X"
def test_apply_corrections_writes_audit_row(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "Penyelidikan", None)],
corrected_by="reviewer-a",
)
with session_scope() as session:
events = JobRepository(session).list_corrections(jid)
assert len(events) == 1
assert events[0].field_path == "header.perihal"
assert events[0].old_value is None
assert events[0].new_value == "Penyelidikan"
assert events[0].corrected_by == "reviewer-a"
def test_apply_corrections_supports_list_index(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("personel[0].nrp", "77060001", None)],
corrected_by=None,
)
row = JobRepository(session).get_or_raise(jid)
assert row.result is not None
assert row.result["personel"][0]["nrp"] == "77060001"
def test_apply_corrections_is_atomic_on_invalid_path(db_ready: None) -> None:
"""A second-correction failure must roll back the first one."""
jid = _seed_completed_job()
with session_scope() as session, pytest.raises(InvalidFieldPathError):
JobRepository(session).apply_corrections(
jid,
corrections=[
("header.perihal", "OK", None),
("bogus.root", "X", None),
],
corrected_by=None,
)
# The first correction must not have persisted.
with session_scope() as session:
row = JobRepository(session).get_or_raise(jid)
assert row.result is not None
assert row.result["header"].get("perihal") is None
def test_apply_corrections_rejects_out_of_range_index(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session, pytest.raises(InvalidFieldPathError):
JobRepository(session).apply_corrections(
jid,
corrections=[("personel[99].nrp", "77060001", None)],
corrected_by=None,
)
def test_apply_corrections_rejects_after_approve(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).approve(jid, reviewed_by="reviewer-a")
with session_scope() as session, pytest.raises(JobAlreadyApprovedError):
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "X", None)],
corrected_by="reviewer-a",
)
def test_apply_corrections_rejects_missing_job(db_ready: None) -> None:
with session_scope() as session, pytest.raises(JobNotFoundError):
JobRepository(session).apply_corrections(
uuid4(),
corrections=[("header.perihal", "X", None)],
corrected_by=None,
)
def test_apply_corrections_rejects_pending_job(db_ready: None) -> None:
jid = uuid4()
with session_scope() as session:
JobRepository(session).create(
job_id=jid, filename="x", source_kind=SourceKind.PDF, blob_key="k"
)
with session_scope() as session, pytest.raises(JobNotCompletedError):
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "X", None)],
corrected_by=None,
)
def test_missing_field_flag_cleared_when_header_gap_filled(db_ready: None) -> None:
jid = _seed_completed_job(
result={
"header": {
"nomor_sprint": None,
"satuan_penerbit": "POLRES X",
}
},
flags=["missing_field", "low_ocr_confidence"],
)
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("header.nomor_sprint", "Sprin/2/I/2025", None)],
corrected_by="reviewer-a",
)
row = JobRepository(session).get_or_raise(jid)
# ``low_ocr_confidence`` stays (correction doesn't resolve that signal),
# but ``missing_field`` is gone because every required header key is
# now non-empty.
assert list(row.review_flags) == ["low_ocr_confidence"]
def test_approve_sets_timestamps_and_is_idempotent(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
row = JobRepository(session).approve(jid, reviewed_by="reviewer-a")
first_at = row.reviewed_at
assert first_at is not None
with session_scope() as session:
row = JobRepository(session).approve(jid, reviewed_by="reviewer-b")
# Second call must NOT overwrite reviewed_by or reviewed_at.
# SQLite drops tzinfo on roundtrip, so compare the naive components.
assert row.approved is True
assert row.reviewed_by == "reviewer-a"
assert row.reviewed_at is not None
assert row.reviewed_at.replace(tzinfo=None) == first_at.replace(tzinfo=None)
def test_approve_rejects_pending_job(db_ready: None) -> None:
jid = uuid4()
with session_scope() as session:
JobRepository(session).create(
job_id=jid, filename="x", source_kind=SourceKind.PDF, blob_key="k"
)
with session_scope() as session, pytest.raises(JobNotCompletedError):
JobRepository(session).approve(jid, reviewed_by="rev")
def test_history_returns_events_in_order(db_ready: None) -> None:
jid = _seed_completed_job()
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[("header.perihal", "one", None)],
corrected_by="r1",
)
with session_scope() as session:
JobRepository(session).apply_corrections(
jid,
corrections=[
("header.perihal", "two", None),
("personel[0].nama", "ANDI", None),
],
corrected_by="r2",
)
with session_scope() as session:
events = JobRepository(session).list_corrections(jid)
assert [e.new_value for e in events] == ["one", "two", "ANDI"]
assert [e.corrected_by for e in events] == ["r1", "r2", "r2"]