- New job_corrections table (append-only audit log) + migration
- Add approved / reviewed_by / reviewed_at columns to jobs
- PATCH /documents/{id} apply field-level corrections
- GET /documents/{id}/history return chronological audit trail
- POST /documents/{id}/approve lock final version (idempotent)
- Dotted field-path applier with root allow-list + list-index support
- Auto-clear `missing_field` review flag when required header keys filled
- Atomic batch apply: malformed path in batch rolls back all changes
- 22 new tests (11 repository-level, 11 API-level); 184 total passing
Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
"""Job-level schemas (request, response, status)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any
|
|
from uuid import UUID, uuid4
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
from ocr_sprint.schemas.extraction import ExtractionResult
|
|
|
|
|
|
class SourceKind(str, Enum):
|
|
"""High-level type of the uploaded document."""
|
|
|
|
PDF = "pdf"
|
|
IMAGE = "image"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class DocumentStatus(str, Enum):
|
|
"""Lifecycle status of an OCR job."""
|
|
|
|
PENDING = "pending"
|
|
PROCESSING = "processing"
|
|
COMPLETED = "completed"
|
|
NEEDS_REVIEW = "needs_review"
|
|
FAILED = "failed"
|
|
|
|
|
|
class DocumentJob(BaseModel):
|
|
"""Internal representation of a job (Phase 1 holds it in-memory)."""
|
|
|
|
model_config = ConfigDict(use_enum_values=False)
|
|
|
|
job_id: UUID = Field(default_factory=uuid4)
|
|
source_kind: SourceKind = SourceKind.UNKNOWN
|
|
filename: str
|
|
status: DocumentStatus = DocumentStatus.PENDING
|
|
created_at: datetime = Field(default_factory=lambda: datetime.utcnow())
|
|
updated_at: datetime = Field(default_factory=lambda: datetime.utcnow())
|
|
error: str | None = None
|
|
result: ExtractionResult | None = None
|
|
debug: dict[str, Any] = Field(default_factory=dict)
|
|
|
|
|
|
class DocumentResponse(BaseModel):
|
|
"""Public response payload returned by the documents API."""
|
|
|
|
job_id: UUID
|
|
status: DocumentStatus
|
|
confidence: float | None = None
|
|
data: ExtractionResult | None = None
|
|
review_flags: list[str] = Field(default_factory=list)
|
|
error: str | None = None
|
|
# Phase 6 — HITL review state.
|
|
approved: bool = False
|
|
reviewed_by: str | None = None
|
|
reviewed_at: datetime | None = None
|