Files
OCR-SPRIN-SERVICE/src/ocr_sprint/schemas/document.py
Devin AI 66247e39a5 Phase 6: HITL review endpoints + audit trail
- New job_corrections table (append-only audit log) + migration
- Add approved / reviewed_by / reviewed_at columns to jobs
- PATCH  /documents/{id}         apply field-level corrections
- GET    /documents/{id}/history return chronological audit trail
- POST   /documents/{id}/approve lock final version (idempotent)
- Dotted field-path applier with root allow-list + list-index support
- Auto-clear `missing_field` review flag when required header keys filled
- Atomic batch apply: malformed path in batch rolls back all changes
- 22 new tests (11 repository-level, 11 API-level); 184 total passing

Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
2026-04-25 20:12:04 +00:00

62 lines
1.7 KiB
Python

"""Job-level schemas (request, response, status)."""
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import Any
from uuid import UUID, uuid4
from pydantic import BaseModel, ConfigDict, Field
from ocr_sprint.schemas.extraction import ExtractionResult
class SourceKind(str, Enum):
"""High-level type of the uploaded document."""
PDF = "pdf"
IMAGE = "image"
UNKNOWN = "unknown"
class DocumentStatus(str, Enum):
"""Lifecycle status of an OCR job."""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
NEEDS_REVIEW = "needs_review"
FAILED = "failed"
class DocumentJob(BaseModel):
"""Internal representation of a job (Phase 1 holds it in-memory)."""
model_config = ConfigDict(use_enum_values=False)
job_id: UUID = Field(default_factory=uuid4)
source_kind: SourceKind = SourceKind.UNKNOWN
filename: str
status: DocumentStatus = DocumentStatus.PENDING
created_at: datetime = Field(default_factory=lambda: datetime.utcnow())
updated_at: datetime = Field(default_factory=lambda: datetime.utcnow())
error: str | None = None
result: ExtractionResult | None = None
debug: dict[str, Any] = Field(default_factory=dict)
class DocumentResponse(BaseModel):
"""Public response payload returned by the documents API."""
job_id: UUID
status: DocumentStatus
confidence: float | None = None
data: ExtractionResult | None = None
review_flags: list[str] = Field(default_factory=list)
error: str | None = None
# Phase 6 — HITL review state.
approved: bool = False
reviewed_by: str | None = None
reviewed_at: datetime | None = None