"""Schemas for the Phase 7 ground-truth export. Each ``GroundTruthSample`` represents one training-ready example: * ``initial_*`` snapshots the pipeline's original (pre-HITL) output, reconstructed by replaying the audit trail in reverse. * ``final_*`` is the current ``result`` on the ``jobs`` row — the reviewer-approved answer. * ``corrections`` is the raw audit trail so downstream fine-tuning can see *what* was changed, *why* (free-text reason), and by whom. JSONL is emitted — one sample per line — so the file can be mmapped, streamed, or piped straight into an HF ``datasets.load_dataset("json", ...)`` call. """ from __future__ import annotations from datetime import datetime from typing import Any from uuid import UUID from pydantic import BaseModel, ConfigDict, Field class GroundTruthCorrection(BaseModel): """One row of the ``job_corrections`` audit trail, as exported.""" field_path: str old_value: Any | None = None new_value: Any | None = None corrected_by: str | None = None reason: str | None = None corrected_at: datetime class GroundTruthSample(BaseModel): """One training sample written as a single JSONL line.""" model_config = ConfigDict(populate_by_name=True) job_id: UUID filename: str source_kind: str approved: bool = False reviewed_by: str | None = None reviewed_at: datetime | None = None created_at: datetime # ``initial_*`` is the pipeline's pre-HITL answer, reconstructed from # the audit trail. ``final_*`` is the reviewer-approved version. initial_result: dict[str, Any] | None = None final_result: dict[str, Any] | None = None corrections: list[GroundTruthCorrection] = Field(default_factory=list) review_flags: list[str] = Field(default_factory=list) confidence: float | None = None class FieldCorrectionCount(BaseModel): field_path: str count: int class GroundTruthStats(BaseModel): """High-level dataset health report surfaced by ``GET /ground-truth/stats``.""" total_jobs: int completed_jobs: int needs_review_jobs: int failed_jobs: int approved_jobs: int total_corrections: int jobs_with_corrections: int # Most-corrected field paths (descending). Operators use this to # prioritise which fields to target with prompt tweaks or fine-tune # data collection first. top_corrected_fields: list[FieldCorrectionCount] = Field(default_factory=list)