Phase 6: HITL review endpoints + audit trail
- New job_corrections table (append-only audit log) + migration
- Add approved / reviewed_by / reviewed_at columns to jobs
- PATCH /documents/{id} apply field-level corrections
- GET /documents/{id}/history return chronological audit trail
- POST /documents/{id}/approve lock final version (idempotent)
- Dotted field-path applier with root allow-list + list-index support
- Auto-clear `missing_field` review flag when required header keys filled
- Atomic batch apply: malformed path in batch rolls back all changes
- 22 new tests (11 repository-level, 11 API-level); 184 total passing
Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
This commit is contained in:
@@ -22,7 +22,17 @@ from __future__ import annotations
|
||||
from typing import Annotated
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from fastapi import APIRouter, Depends, File, HTTPException, Query, Response, UploadFile, status
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
File,
|
||||
Header,
|
||||
HTTPException,
|
||||
Query,
|
||||
Response,
|
||||
UploadFile,
|
||||
status,
|
||||
)
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ocr_sprint.api.deps.auth import require_api_key
|
||||
@@ -31,11 +41,22 @@ from ocr_sprint.api.errors import UnsupportedDocumentError
|
||||
from ocr_sprint.api.metrics import JOB_PROCESSING_SECONDS
|
||||
from ocr_sprint.config import get_settings
|
||||
from ocr_sprint.db.base import session_scope
|
||||
from ocr_sprint.db.repositories import JobNotFoundError, JobRepository
|
||||
from ocr_sprint.db.repositories import (
|
||||
InvalidFieldPathError,
|
||||
JobAlreadyApprovedError,
|
||||
JobNotCompletedError,
|
||||
JobNotFoundError,
|
||||
JobRepository,
|
||||
)
|
||||
from ocr_sprint.pipeline.ingest import detect_source_kind
|
||||
from ocr_sprint.pipeline.orchestrator import run_pipeline
|
||||
from ocr_sprint.schemas.document import DocumentResponse, DocumentStatus
|
||||
from ocr_sprint.schemas.extraction import ExtractionResult
|
||||
from ocr_sprint.schemas.review import (
|
||||
ApprovalResponse,
|
||||
CorrectionEventResponse,
|
||||
CorrectionRequest,
|
||||
)
|
||||
from ocr_sprint.storage.blob import get_blob_storage
|
||||
from ocr_sprint.utils.logging import get_logger
|
||||
|
||||
@@ -75,6 +96,9 @@ def _row_to_response(row: object) -> DocumentResponse:
|
||||
data=result_obj,
|
||||
review_flags=list(row.review_flags or []),
|
||||
error=row.error,
|
||||
approved=bool(row.approved),
|
||||
reviewed_by=row.reviewed_by,
|
||||
reviewed_at=row.reviewed_at,
|
||||
)
|
||||
|
||||
|
||||
@@ -192,3 +216,116 @@ async def get_document(
|
||||
except JobNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
|
||||
return _row_to_response(row)
|
||||
|
||||
|
||||
# ---------- Phase 6 — HITL ----------
|
||||
|
||||
|
||||
def _correction_row_to_response(row: object) -> CorrectionEventResponse:
|
||||
# Local import to avoid a cyclic import at module load time.
|
||||
from ocr_sprint.db.models import JobCorrectionRow
|
||||
|
||||
assert isinstance(row, JobCorrectionRow)
|
||||
return CorrectionEventResponse(
|
||||
id=row.id,
|
||||
job_id=row.job_id,
|
||||
field_path=row.field_path,
|
||||
old_value=row.old_value,
|
||||
new_value=row.new_value,
|
||||
corrected_by=row.corrected_by,
|
||||
reason=row.reason,
|
||||
corrected_at=row.corrected_at,
|
||||
)
|
||||
|
||||
|
||||
@router.patch(
|
||||
"/{job_id}",
|
||||
response_model=DocumentResponse,
|
||||
)
|
||||
async def patch_document(
|
||||
job_id: UUID,
|
||||
body: CorrectionRequest,
|
||||
session: Annotated[Session, Depends(get_session)],
|
||||
x_user_id: Annotated[
|
||||
str | None,
|
||||
Header(description="Free-form reviewer identifier recorded on the audit row."),
|
||||
] = None,
|
||||
) -> DocumentResponse:
|
||||
"""Apply one or more field-level corrections and record an audit trail.
|
||||
|
||||
The whole batch is applied atomically — if any path is invalid the
|
||||
request fails with 400 and no side effects are written. Returns the
|
||||
updated document so the client doesn't need a follow-up GET.
|
||||
"""
|
||||
repo = JobRepository(session)
|
||||
try:
|
||||
repo.apply_corrections(
|
||||
job_id,
|
||||
corrections=[(c.path, c.value, c.reason) for c in body.corrections],
|
||||
corrected_by=x_user_id,
|
||||
)
|
||||
row = repo.get_or_raise(job_id)
|
||||
except JobNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
|
||||
except InvalidFieldPathError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
|
||||
except JobAlreadyApprovedError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc)) from exc
|
||||
except JobNotCompletedError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc)) from exc
|
||||
|
||||
_logger.info(
|
||||
"documents.patched",
|
||||
job_id=str(job_id),
|
||||
count=len(body.corrections),
|
||||
corrected_by=x_user_id or "",
|
||||
)
|
||||
return _row_to_response(row)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{job_id}/history",
|
||||
response_model=list[CorrectionEventResponse],
|
||||
)
|
||||
async def get_history(
|
||||
job_id: UUID,
|
||||
session: Annotated[Session, Depends(get_session)],
|
||||
) -> list[CorrectionEventResponse]:
|
||||
repo = JobRepository(session)
|
||||
try:
|
||||
rows = repo.list_corrections(job_id)
|
||||
except JobNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
|
||||
return [_correction_row_to_response(r) for r in rows]
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{job_id}/approve",
|
||||
response_model=ApprovalResponse,
|
||||
)
|
||||
async def approve_document(
|
||||
job_id: UUID,
|
||||
session: Annotated[Session, Depends(get_session)],
|
||||
x_user_id: Annotated[
|
||||
str | None,
|
||||
Header(description="Free-form reviewer identifier recorded on the job."),
|
||||
] = None,
|
||||
) -> ApprovalResponse:
|
||||
"""Lock a job's final version. Idempotent: re-approving returns the
|
||||
existing row without overwriting ``reviewed_at``.
|
||||
"""
|
||||
repo = JobRepository(session)
|
||||
try:
|
||||
row = repo.approve(job_id, reviewed_by=x_user_id)
|
||||
except JobNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
|
||||
except JobNotCompletedError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc)) from exc
|
||||
|
||||
_logger.info("documents.approved", job_id=str(job_id), reviewed_by=row.reviewed_by or "")
|
||||
return ApprovalResponse(
|
||||
job_id=row.job_id,
|
||||
approved=bool(row.approved),
|
||||
reviewed_by=row.reviewed_by,
|
||||
reviewed_at=row.reviewed_at,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user