"""Job-level schemas (request, response, status).""" from __future__ import annotations from datetime import datetime from enum import Enum from typing import Any from uuid import UUID, uuid4 from pydantic import BaseModel, ConfigDict, Field from ocr_sprint.schemas.extraction import ExtractionResult class SourceKind(str, Enum): """High-level type of the uploaded document.""" PDF = "pdf" IMAGE = "image" UNKNOWN = "unknown" class DocumentStatus(str, Enum): """Lifecycle status of an OCR job.""" PENDING = "pending" PROCESSING = "processing" COMPLETED = "completed" NEEDS_REVIEW = "needs_review" FAILED = "failed" class DocumentJob(BaseModel): """Internal representation of a job (Phase 1 holds it in-memory).""" model_config = ConfigDict(use_enum_values=False) job_id: UUID = Field(default_factory=uuid4) source_kind: SourceKind = SourceKind.UNKNOWN filename: str status: DocumentStatus = DocumentStatus.PENDING created_at: datetime = Field(default_factory=lambda: datetime.utcnow()) updated_at: datetime = Field(default_factory=lambda: datetime.utcnow()) error: str | None = None result: ExtractionResult | None = None debug: dict[str, Any] = Field(default_factory=dict) class DocumentResponse(BaseModel): """Public response payload returned by the documents API.""" job_id: UUID status: DocumentStatus confidence: float | None = None data: ExtractionResult | None = None review_flags: list[str] = Field(default_factory=list) error: str | None = None # Phase 6 — HITL review state. approved: bool = False reviewed_by: str | None = None reviewed_at: datetime | None = None