Files
OCR-SPRIN-SERVICE/src/ocr_sprint/main.py
Devin AI 6003d96a94 Phase 7: ground-truth export (JSONL + stats) + CLI tool
- GET /api/v1/ground-truth/export  streaming JSONL (approved_only,
  since, until, has_corrections, limit)
- GET /api/v1/ground-truth/stats   total / approved / corrections
  counts + top-N most-corrected field paths
- python -m ocr_sprint.tools.export_ground_truth  operator CLI with
  the same filters + optional --print-stats
- Ground-truth sample reconstructs the pipeline's original output by
  replaying job_corrections in reverse
- docs/ground-truth-format.md    schema + fine-tuning guidance
- 17 new tests (service replay, endpoint filters, CLI)
- 201 total tests passing, ruff / mypy --strict clean

Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
2026-04-25 20:24:40 +00:00

60 lines
1.9 KiB
Python

"""FastAPI entrypoint."""
from __future__ import annotations
from fastapi import FastAPI
from ocr_sprint import __version__
from ocr_sprint.api.errors import register_error_handlers
from ocr_sprint.api.metrics import MetricsMiddleware, metrics_endpoint
from ocr_sprint.api.routes import documents, ground_truth, health
from ocr_sprint.config import get_settings
from ocr_sprint.db import models as _models # noqa: F401 (register ORM tables)
from ocr_sprint.db.base import Base, get_engine
from ocr_sprint.utils.logging import configure_logging
def _ensure_schema() -> None:
"""Create tables if they don't exist.
Production deploys should run Alembic migrations explicitly; this is a
convenience for local dev / tests so the API works without a manual
`alembic upgrade head` step.
"""
Base.metadata.create_all(bind=get_engine())
def create_app() -> FastAPI:
"""Application factory — keeps top-level state easy to test."""
settings = get_settings()
configure_logging(settings.app_log_level)
_ensure_schema()
app = FastAPI(
title="OCR Sprint Service",
version=__version__,
description="OCR + structured extraction for Indonesian police 'surat sprint' documents.",
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
)
register_error_handlers(app)
app.add_middleware(MetricsMiddleware)
app.include_router(health.router, prefix="/api/v1")
app.include_router(documents.router, prefix="/api/v1")
app.include_router(ground_truth.router, prefix="/api/v1")
app.add_api_route("/metrics", metrics_endpoint, methods=["GET"], include_in_schema=False)
return app
app = create_app()
def run() -> None:
"""Console-script entrypoint (`ocr-sprint-api`)."""
import uvicorn
s = get_settings()
uvicorn.run("ocr_sprint.main:app", host=s.app_host, port=s.app_port, reload=False)