Phase 6: HITL review endpoints + audit trail

- New job_corrections table (append-only audit log) + migration
- Add approved / reviewed_by / reviewed_at columns to jobs
- PATCH  /documents/{id}         apply field-level corrections
- GET    /documents/{id}/history return chronological audit trail
- POST   /documents/{id}/approve lock final version (idempotent)
- Dotted field-path applier with root allow-list + list-index support
- Auto-clear `missing_field` review flag when required header keys filled
- Atomic batch apply: malformed path in batch rolls back all changes
- 22 new tests (11 repository-level, 11 API-level); 184 total passing

Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
This commit is contained in:
Devin AI
2026-04-25 20:12:04 +00:00
parent 45fbfdabb7
commit 66247e39a5
9 changed files with 1058 additions and 22 deletions

View File

@@ -22,7 +22,17 @@ from __future__ import annotations
from typing import Annotated
from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, File, HTTPException, Query, Response, UploadFile, status
from fastapi import (
APIRouter,
Depends,
File,
Header,
HTTPException,
Query,
Response,
UploadFile,
status,
)
from sqlalchemy.orm import Session
from ocr_sprint.api.deps.auth import require_api_key
@@ -31,11 +41,22 @@ from ocr_sprint.api.errors import UnsupportedDocumentError
from ocr_sprint.api.metrics import JOB_PROCESSING_SECONDS
from ocr_sprint.config import get_settings
from ocr_sprint.db.base import session_scope
from ocr_sprint.db.repositories import JobNotFoundError, JobRepository
from ocr_sprint.db.repositories import (
InvalidFieldPathError,
JobAlreadyApprovedError,
JobNotCompletedError,
JobNotFoundError,
JobRepository,
)
from ocr_sprint.pipeline.ingest import detect_source_kind
from ocr_sprint.pipeline.orchestrator import run_pipeline
from ocr_sprint.schemas.document import DocumentResponse, DocumentStatus
from ocr_sprint.schemas.extraction import ExtractionResult
from ocr_sprint.schemas.review import (
ApprovalResponse,
CorrectionEventResponse,
CorrectionRequest,
)
from ocr_sprint.storage.blob import get_blob_storage
from ocr_sprint.utils.logging import get_logger
@@ -75,6 +96,9 @@ def _row_to_response(row: object) -> DocumentResponse:
data=result_obj,
review_flags=list(row.review_flags or []),
error=row.error,
approved=bool(row.approved),
reviewed_by=row.reviewed_by,
reviewed_at=row.reviewed_at,
)
@@ -192,3 +216,116 @@ async def get_document(
except JobNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
return _row_to_response(row)
# ---------- Phase 6 — HITL ----------
def _correction_row_to_response(row: object) -> CorrectionEventResponse:
# Local import to avoid a cyclic import at module load time.
from ocr_sprint.db.models import JobCorrectionRow
assert isinstance(row, JobCorrectionRow)
return CorrectionEventResponse(
id=row.id,
job_id=row.job_id,
field_path=row.field_path,
old_value=row.old_value,
new_value=row.new_value,
corrected_by=row.corrected_by,
reason=row.reason,
corrected_at=row.corrected_at,
)
@router.patch(
"/{job_id}",
response_model=DocumentResponse,
)
async def patch_document(
job_id: UUID,
body: CorrectionRequest,
session: Annotated[Session, Depends(get_session)],
x_user_id: Annotated[
str | None,
Header(description="Free-form reviewer identifier recorded on the audit row."),
] = None,
) -> DocumentResponse:
"""Apply one or more field-level corrections and record an audit trail.
The whole batch is applied atomically — if any path is invalid the
request fails with 400 and no side effects are written. Returns the
updated document so the client doesn't need a follow-up GET.
"""
repo = JobRepository(session)
try:
repo.apply_corrections(
job_id,
corrections=[(c.path, c.value, c.reason) for c in body.corrections],
corrected_by=x_user_id,
)
row = repo.get_or_raise(job_id)
except JobNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
except InvalidFieldPathError as exc:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
except JobAlreadyApprovedError as exc:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc)) from exc
except JobNotCompletedError as exc:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc)) from exc
_logger.info(
"documents.patched",
job_id=str(job_id),
count=len(body.corrections),
corrected_by=x_user_id or "",
)
return _row_to_response(row)
@router.get(
"/{job_id}/history",
response_model=list[CorrectionEventResponse],
)
async def get_history(
job_id: UUID,
session: Annotated[Session, Depends(get_session)],
) -> list[CorrectionEventResponse]:
repo = JobRepository(session)
try:
rows = repo.list_corrections(job_id)
except JobNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
return [_correction_row_to_response(r) for r in rows]
@router.post(
"/{job_id}/approve",
response_model=ApprovalResponse,
)
async def approve_document(
job_id: UUID,
session: Annotated[Session, Depends(get_session)],
x_user_id: Annotated[
str | None,
Header(description="Free-form reviewer identifier recorded on the job."),
] = None,
) -> ApprovalResponse:
"""Lock a job's final version. Idempotent: re-approving returns the
existing row without overwriting ``reviewed_at``.
"""
repo = JobRepository(session)
try:
row = repo.approve(job_id, reviewed_by=x_user_id)
except JobNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
except JobNotCompletedError as exc:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc)) from exc
_logger.info("documents.approved", job_id=str(job_id), reviewed_by=row.reviewed_by or "")
return ApprovalResponse(
job_id=row.job_id,
approved=bool(row.approved),
reviewed_by=row.reviewed_by,
reviewed_at=row.reviewed_at,
)

View File

@@ -16,7 +16,7 @@ from datetime import datetime, timezone
from typing import Any
from uuid import UUID, uuid4
from sqlalchemy import JSON, DateTime, Float, String, Uuid
from sqlalchemy import JSON, Boolean, DateTime, Float, ForeignKey, Integer, String, Uuid
from sqlalchemy.orm import Mapped, mapped_column
from ocr_sprint.db.base import Base
@@ -42,6 +42,15 @@ class JobRow(Base):
result: Mapped[dict[str, Any] | None] = mapped_column(JSON, nullable=True)
error: Mapped[str | None] = mapped_column(String(2048), nullable=True)
# Phase 6 — HITL review state.
# Once ``approved=True`` the row is immutable except to admin users;
# corrections after that point are rejected by the route. ``reviewed_by``
# stores the free-form user identifier the reviewer sent via the
# ``X-User-Id`` header (best-effort attribution — no full RBAC yet).
approved: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
reviewed_by: Mapped[str | None] = mapped_column(String(128), nullable=True)
reviewed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=_utcnow
)
@@ -51,3 +60,37 @@ class JobRow(Base):
def __repr__(self) -> str:
return f"JobRow(job_id={self.job_id!s}, status={self.status!r})"
class JobCorrectionRow(Base):
"""One correction event on a job's ``result``.
Each PATCH call writes one row per changed field path so we have a
full audit trail. Rows are append-only — never updated, never
deleted — so the history is reproducible and usable as ground-truth
data for future fine-tuning.
"""
__tablename__ = "job_corrections"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
job_id: Mapped[UUID] = mapped_column(
Uuid, ForeignKey("jobs.job_id", ondelete="CASCADE"), nullable=False, index=True
)
# Dotted JSON path into ExtractionResult, e.g. "header.nomor_sprint" or
# "personel[3].nrp". Kept as a plain string for simplicity — we don't
# parse it server-side beyond the allow-list check in the repository.
field_path: Mapped[str] = mapped_column(String(256), nullable=False)
old_value: Mapped[Any | None] = mapped_column(JSON, nullable=True)
new_value: Mapped[Any | None] = mapped_column(JSON, nullable=True)
corrected_by: Mapped[str | None] = mapped_column(String(128), nullable=True)
reason: Mapped[str | None] = mapped_column(String(512), nullable=True)
corrected_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=_utcnow
)
def __repr__(self) -> str:
return (
f"JobCorrectionRow(job_id={self.job_id!s}, "
f"field={self.field_path!r}, by={self.corrected_by!r})"
)

View File

@@ -6,6 +6,9 @@ know about sessions, transactions, or the row → schema mapping.
from __future__ import annotations
import copy
import re
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any
from uuid import UUID
@@ -13,7 +16,7 @@ from uuid import UUID
from sqlalchemy import select
from sqlalchemy.orm import Session
from ocr_sprint.db.models import JobRow
from ocr_sprint.db.models import JobCorrectionRow, JobRow
from ocr_sprint.schemas.document import DocumentStatus, SourceKind
@@ -25,6 +28,110 @@ class JobNotFoundError(LookupError):
"""Raised by API code when GET /documents/{id} hits a missing row."""
class InvalidFieldPathError(ValueError):
"""Raised when a PATCH request references an unsupported field path."""
class JobAlreadyApprovedError(RuntimeError):
"""Raised when a PATCH is attempted against an already-approved job."""
class JobNotCompletedError(RuntimeError):
"""Raised when a PATCH/approve is attempted against a job that hasn't
produced a ``result`` payload yet (e.g. still pending or failed).
"""
@dataclass(frozen=True)
class AppliedCorrection:
"""Internal record of a correction that successfully applied to the
in-memory ``result`` dict. The repository turns this into a persisted
``JobCorrectionRow`` after the whole batch is validated.
"""
field_path: str
old_value: Any
new_value: Any
reason: str | None
# Allow-list of top-level keys we let reviewers edit. Keeps the attack
# surface small: they can't inject arbitrary fields into the JSON blob.
_ALLOWED_ROOTS: frozenset[str] = frozenset({"header", "ttd", "personel", "untuk"})
# Matches a single path segment like ``personel[3]`` — supports at most one
# index per segment, enough for the list fields we care about.
_SEGMENT_RE = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)(?:\[(\d+)\])?$")
def _split_path(path: str) -> list[tuple[str, int | None]]:
"""Parse ``header.nomor_sprint`` or ``personel[2].nrp`` into segments.
Returns list of ``(name, index_or_none)`` tuples. Raises
``InvalidFieldPathError`` on malformed input so the caller can surface
a 400 to the client.
"""
if not path or path.startswith(".") or path.endswith("."):
raise InvalidFieldPathError(f"Invalid field path: {path!r}")
parts = path.split(".")
out: list[tuple[str, int | None]] = []
for part in parts:
match = _SEGMENT_RE.match(part)
if match is None:
raise InvalidFieldPathError(f"Invalid segment in path: {part!r}")
name = match.group(1)
idx_raw = match.group(2)
idx = int(idx_raw) if idx_raw is not None else None
out.append((name, idx))
if out[0][0] not in _ALLOWED_ROOTS:
raise InvalidFieldPathError(
f"Field path root {out[0][0]!r} not in allowed roots {sorted(_ALLOWED_ROOTS)!r}"
)
return out
def _apply_path(data: dict[str, Any], path: str, new_value: Any) -> Any:
"""Apply a single correction to ``data`` in place. Returns the old
value so the caller can record it in the audit row.
Does NOT validate that the new value matches the field's expected
type — that's the reviewer's responsibility; the whole point of HITL
is to let humans override the model's typing.
"""
segments = _split_path(path)
cursor: Any = data
for name, idx in segments[:-1]:
if not isinstance(cursor, dict) or name not in cursor:
raise InvalidFieldPathError(f"Cannot traverse to {path!r}: missing {name!r}")
cursor = cursor[name]
if idx is not None:
if not isinstance(cursor, list) or idx >= len(cursor):
raise InvalidFieldPathError(
f"Cannot traverse to {path!r}: index [{idx}] out of range"
)
cursor = cursor[idx]
name, idx = segments[-1]
if idx is not None:
# Terminal segment is a list-element, e.g. ``untuk[2]``.
if not isinstance(cursor, dict) or name not in cursor:
raise InvalidFieldPathError(f"Cannot apply to {path!r}: missing container {name!r}")
container = cursor[name]
if not isinstance(container, list) or idx >= len(container):
raise InvalidFieldPathError(f"Cannot apply to {path!r}: index [{idx}] out of range")
old = container[idx]
container[idx] = new_value
return old
if not isinstance(cursor, dict):
raise InvalidFieldPathError(f"Cannot apply to {path!r}: parent is not an object")
old = cursor.get(name)
cursor[name] = new_value
return old
class JobRepository:
"""SQL-backed repository for `jobs` rows."""
@@ -94,3 +201,139 @@ class JobRepository:
if row is None:
raise JobNotFoundError(f"Job not found: {job_id}")
return row
# ---------- Phase 6 — HITL ----------
def apply_corrections(
self,
job_id: UUID,
*,
corrections: list[tuple[str, Any, str | None]],
corrected_by: str | None,
) -> list[JobCorrectionRow]:
"""Apply a batch of field corrections atomically.
``corrections`` is a list of ``(path, new_value, reason)`` tuples.
Returns the persisted audit rows so the caller can surface them in
the response.
Raises
------
JobNotFoundError
If the row doesn't exist.
JobNotCompletedError
If the job hasn't produced a result yet (status pending /
processing / failed).
JobAlreadyApprovedError
If the job has been approved — edits are locked.
InvalidFieldPathError
If any path is malformed or references a disallowed root.
"""
row = self._get_or_raise(job_id)
if row.result is None:
raise JobNotCompletedError(
f"Job {job_id} has no result to correct (status={row.status})"
)
if row.approved:
raise JobAlreadyApprovedError(f"Job {job_id} is already approved; edits are locked")
# Deep-copy so we can roll back in memory if any correction fails.
# The underlying JSON column will only be re-assigned once every
# path applied cleanly.
working = copy.deepcopy(row.result)
applied: list[AppliedCorrection] = []
for path, new_value, reason in corrections:
old_value = _apply_path(working, path, new_value)
applied.append(
AppliedCorrection(
field_path=path, old_value=old_value, new_value=new_value, reason=reason
)
)
# Persist audit rows first; if they fail the session rollback also
# undoes the result-column update we're about to do.
persisted: list[JobCorrectionRow] = []
for event in applied:
row_event = JobCorrectionRow(
job_id=job_id,
field_path=event.field_path,
old_value=event.old_value,
new_value=event.new_value,
corrected_by=corrected_by,
reason=event.reason,
)
self.session.add(row_event)
persisted.append(row_event)
# Clear review flags that the correction has resolved. Right now we
# only auto-clear MISSING_FIELD when any corrected field previously
# held a null/empty value — the reviewer explicitly filled a gap.
row.result = working
row.review_flags = _recompute_flags(
original_flags=list(row.review_flags or []),
applied=applied,
working_result=working,
)
row.updated_at = _utcnow()
self.session.flush()
return persisted
def list_corrections(self, job_id: UUID) -> list[JobCorrectionRow]:
"""Return the full audit trail for ``job_id`` in chronological order."""
# ``get_or_raise`` so callers get a 404 instead of an empty list
# when the job itself doesn't exist.
self._get_or_raise(job_id)
stmt = (
select(JobCorrectionRow)
.where(JobCorrectionRow.job_id == job_id)
.order_by(JobCorrectionRow.corrected_at, JobCorrectionRow.id)
)
return list(self.session.scalars(stmt))
def approve(self, job_id: UUID, *, reviewed_by: str | None) -> JobRow:
"""Mark a job as approved. Idempotent — re-approving is a no-op
that keeps the original ``reviewed_at`` (so the audit trail stays
intact).
"""
row = self._get_or_raise(job_id)
if row.result is None:
raise JobNotCompletedError(
f"Job {job_id} has no result to approve (status={row.status})"
)
if row.approved:
return row
row.approved = True
row.reviewed_by = reviewed_by
row.reviewed_at = _utcnow()
row.updated_at = row.reviewed_at
return row
def _recompute_flags(
*,
original_flags: list[str],
applied: list[AppliedCorrection],
working_result: dict[str, Any],
) -> list[str]:
"""Update review flags in light of the corrections just applied.
Keeps the policy simple on purpose:
* ``missing_field`` is removed if after the edit every required
header field is non-empty.
* Other flags stay untouched — the reviewer should either correct the
underlying issue (which this helper can detect) or explicitly
approve the result as-is (which bypasses the flag list).
"""
flags = list(original_flags)
if "missing_field" in flags:
header = working_result.get("header") or {}
filled = all(bool(header.get(key)) for key in ("nomor_sprint", "satuan_penerbit"))
if filled:
flags = [f for f in flags if f != "missing_field"]
# ``applied`` isn't used directly in this MVP rule, but we keep the
# parameter so future policies can inspect exactly what changed
# without re-diffing the blob.
_ = applied
return flags

View File

@@ -55,3 +55,7 @@ class DocumentResponse(BaseModel):
data: ExtractionResult | None = None
review_flags: list[str] = Field(default_factory=list)
error: str | None = None
# Phase 6 — HITL review state.
approved: bool = False
reviewed_by: str | None = None
reviewed_at: datetime | None = None

View File

@@ -0,0 +1,62 @@
"""Request / response schemas for the HITL review endpoints (Phase 6).
The API surface is deliberately small:
* ``CorrectionRequest`` — body of ``PATCH /documents/{id}``. A list of
``FieldCorrection`` entries; each one is applied atomically (all-or-
nothing) and recorded in the audit trail.
* ``CorrectionEventResponse`` — single row in ``GET /documents/{id}/history``.
* ``ApprovalResponse`` — echo back after ``POST /documents/{id}/approve``.
"""
from __future__ import annotations
from datetime import datetime
from typing import Any
from uuid import UUID
from pydantic import BaseModel, Field
class FieldCorrection(BaseModel):
"""One field-level correction.
``path`` is a dotted JSON path into ``ExtractionResult``. Supported
roots: ``header``, ``ttd``, ``personel[n]`` (n is a 0-based index),
``untuk``. The path is validated by the repository before being
applied; unknown roots return 400.
"""
path: str = Field(..., description="Dotted JSON path, e.g. 'header.nomor_sprint'.")
value: Any = Field(..., description="New value (any JSON-serialisable payload).")
reason: str | None = Field(
None, max_length=512, description="Optional free-form reason for the correction."
)
class CorrectionRequest(BaseModel):
"""PATCH body — one or more field corrections, applied atomically."""
corrections: list[FieldCorrection] = Field(..., min_length=1)
class CorrectionEventResponse(BaseModel):
"""One row of the audit log surfaced by GET /history."""
id: int
job_id: UUID
field_path: str
old_value: Any | None = None
new_value: Any | None = None
corrected_by: str | None = None
reason: str | None = None
corrected_at: datetime
class ApprovalResponse(BaseModel):
"""Echo returned after a job is approved."""
job_id: UUID
approved: bool
reviewed_by: str | None = None
reviewed_at: datetime | None = None