"""Repository tests for Phase 6 HITL helpers.""" from __future__ import annotations from uuid import uuid4 import pytest from ocr_sprint.db.base import Base, get_engine, session_scope from ocr_sprint.db.repositories import ( InvalidFieldPathError, JobAlreadyApprovedError, JobNotCompletedError, JobNotFoundError, JobRepository, ) from ocr_sprint.schemas.document import DocumentStatus, SourceKind @pytest.fixture def db_ready() -> None: Base.metadata.create_all(bind=get_engine()) def _seed_completed_job( *, result: dict[str, object] | None = None, flags: list[str] | None = None, ) -> uuid4: # type: ignore[type-arg] jid = uuid4() with session_scope() as session: repo = JobRepository(session) repo.create( job_id=jid, filename="x.pdf", source_kind=SourceKind.PDF, blob_key="k", ) with session_scope() as session: JobRepository(session).mark_completed( jid, status=DocumentStatus.NEEDS_REVIEW, confidence=0.7, result=result or { "header": { "nomor_sprint": "Sprin/1/I/2025", "satuan_penerbit": "POLRES X", "perihal": None, }, "personel": [ {"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"}, ], "untuk": ["Melaksanakan tugas"], }, review_flags=flags or [], ) return jid def test_apply_corrections_updates_nested_header_field(db_ready: None) -> None: jid = _seed_completed_job() with session_scope() as session: repo = JobRepository(session) repo.apply_corrections( jid, corrections=[("header.perihal", "Penyelidikan kasus X", "regex miss")], corrected_by="reviewer-a", ) row = repo.get_or_raise(jid) assert row.result is not None assert row.result["header"]["perihal"] == "Penyelidikan kasus X" def test_apply_corrections_writes_audit_row(db_ready: None) -> None: jid = _seed_completed_job() with session_scope() as session: JobRepository(session).apply_corrections( jid, corrections=[("header.perihal", "Penyelidikan", None)], corrected_by="reviewer-a", ) with session_scope() as session: events = JobRepository(session).list_corrections(jid) assert len(events) == 1 assert events[0].field_path == "header.perihal" assert events[0].old_value is None assert events[0].new_value == "Penyelidikan" assert events[0].corrected_by == "reviewer-a" def test_apply_corrections_supports_list_index(db_ready: None) -> None: jid = _seed_completed_job() with session_scope() as session: JobRepository(session).apply_corrections( jid, corrections=[("personel[0].nrp", "77060001", None)], corrected_by=None, ) row = JobRepository(session).get_or_raise(jid) assert row.result is not None assert row.result["personel"][0]["nrp"] == "77060001" def test_apply_corrections_is_atomic_on_invalid_path(db_ready: None) -> None: """A second-correction failure must roll back the first one.""" jid = _seed_completed_job() with session_scope() as session, pytest.raises(InvalidFieldPathError): JobRepository(session).apply_corrections( jid, corrections=[ ("header.perihal", "OK", None), ("bogus.root", "X", None), ], corrected_by=None, ) # The first correction must not have persisted. with session_scope() as session: row = JobRepository(session).get_or_raise(jid) assert row.result is not None assert row.result["header"].get("perihal") is None def test_apply_corrections_rejects_out_of_range_index(db_ready: None) -> None: jid = _seed_completed_job() with session_scope() as session, pytest.raises(InvalidFieldPathError): JobRepository(session).apply_corrections( jid, corrections=[("personel[99].nrp", "77060001", None)], corrected_by=None, ) def test_apply_corrections_rejects_after_approve(db_ready: None) -> None: jid = _seed_completed_job() with session_scope() as session: JobRepository(session).approve(jid, reviewed_by="reviewer-a") with session_scope() as session, pytest.raises(JobAlreadyApprovedError): JobRepository(session).apply_corrections( jid, corrections=[("header.perihal", "X", None)], corrected_by="reviewer-a", ) def test_apply_corrections_rejects_missing_job(db_ready: None) -> None: with session_scope() as session, pytest.raises(JobNotFoundError): JobRepository(session).apply_corrections( uuid4(), corrections=[("header.perihal", "X", None)], corrected_by=None, ) def test_apply_corrections_rejects_pending_job(db_ready: None) -> None: jid = uuid4() with session_scope() as session: JobRepository(session).create( job_id=jid, filename="x", source_kind=SourceKind.PDF, blob_key="k" ) with session_scope() as session, pytest.raises(JobNotCompletedError): JobRepository(session).apply_corrections( jid, corrections=[("header.perihal", "X", None)], corrected_by=None, ) def test_missing_field_flag_cleared_when_header_gap_filled(db_ready: None) -> None: jid = _seed_completed_job( result={ "header": { "nomor_sprint": None, "satuan_penerbit": "POLRES X", } }, flags=["missing_field", "low_ocr_confidence"], ) with session_scope() as session: JobRepository(session).apply_corrections( jid, corrections=[("header.nomor_sprint", "Sprin/2/I/2025", None)], corrected_by="reviewer-a", ) row = JobRepository(session).get_or_raise(jid) # ``low_ocr_confidence`` stays (correction doesn't resolve that signal), # but ``missing_field`` is gone because every required header key is # now non-empty. assert list(row.review_flags) == ["low_ocr_confidence"] def test_approve_sets_timestamps_and_is_idempotent(db_ready: None) -> None: jid = _seed_completed_job() with session_scope() as session: row = JobRepository(session).approve(jid, reviewed_by="reviewer-a") first_at = row.reviewed_at assert first_at is not None with session_scope() as session: row = JobRepository(session).approve(jid, reviewed_by="reviewer-b") # Second call must NOT overwrite reviewed_by or reviewed_at. # SQLite drops tzinfo on roundtrip, so compare the naive components. assert row.approved is True assert row.reviewed_by == "reviewer-a" assert row.reviewed_at is not None assert row.reviewed_at.replace(tzinfo=None) == first_at.replace(tzinfo=None) def test_approve_rejects_pending_job(db_ready: None) -> None: jid = uuid4() with session_scope() as session: JobRepository(session).create( job_id=jid, filename="x", source_kind=SourceKind.PDF, blob_key="k" ) with session_scope() as session, pytest.raises(JobNotCompletedError): JobRepository(session).approve(jid, reviewed_by="rev") def test_history_returns_events_in_order(db_ready: None) -> None: jid = _seed_completed_job() with session_scope() as session: JobRepository(session).apply_corrections( jid, corrections=[("header.perihal", "one", None)], corrected_by="r1", ) with session_scope() as session: JobRepository(session).apply_corrections( jid, corrections=[ ("header.perihal", "two", None), ("personel[0].nama", "ANDI", None), ], corrected_by="r2", ) with session_scope() as session: events = JobRepository(session).list_corrections(jid) assert [e.new_value for e in events] == ["one", "two", "ANDI"] assert [e.corrected_by for e in events] == ["r1", "r2", "r2"]