- GET /api/v1/ground-truth/export streaming JSONL (approved_only, since, until, has_corrections, limit) - GET /api/v1/ground-truth/stats total / approved / corrections counts + top-N most-corrected field paths - python -m ocr_sprint.tools.export_ground_truth operator CLI with the same filters + optional --print-stats - Ground-truth sample reconstructs the pipeline's original output by replaying job_corrections in reverse - docs/ground-truth-format.md schema + fine-tuning guidance - 17 new tests (service replay, endpoint filters, CLI) - 201 total tests passing, ruff / mypy --strict clean Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
60 lines
1.9 KiB
Python
60 lines
1.9 KiB
Python
"""FastAPI entrypoint."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from fastapi import FastAPI
|
|
|
|
from ocr_sprint import __version__
|
|
from ocr_sprint.api.errors import register_error_handlers
|
|
from ocr_sprint.api.metrics import MetricsMiddleware, metrics_endpoint
|
|
from ocr_sprint.api.routes import documents, ground_truth, health
|
|
from ocr_sprint.config import get_settings
|
|
from ocr_sprint.db import models as _models # noqa: F401 (register ORM tables)
|
|
from ocr_sprint.db.base import Base, get_engine
|
|
from ocr_sprint.utils.logging import configure_logging
|
|
|
|
|
|
def _ensure_schema() -> None:
|
|
"""Create tables if they don't exist.
|
|
|
|
Production deploys should run Alembic migrations explicitly; this is a
|
|
convenience for local dev / tests so the API works without a manual
|
|
`alembic upgrade head` step.
|
|
"""
|
|
Base.metadata.create_all(bind=get_engine())
|
|
|
|
|
|
def create_app() -> FastAPI:
|
|
"""Application factory — keeps top-level state easy to test."""
|
|
settings = get_settings()
|
|
configure_logging(settings.app_log_level)
|
|
_ensure_schema()
|
|
|
|
app = FastAPI(
|
|
title="OCR Sprint Service",
|
|
version=__version__,
|
|
description="OCR + structured extraction for Indonesian police 'surat sprint' documents.",
|
|
docs_url="/docs",
|
|
redoc_url="/redoc",
|
|
openapi_url="/openapi.json",
|
|
)
|
|
|
|
register_error_handlers(app)
|
|
app.add_middleware(MetricsMiddleware)
|
|
app.include_router(health.router, prefix="/api/v1")
|
|
app.include_router(documents.router, prefix="/api/v1")
|
|
app.include_router(ground_truth.router, prefix="/api/v1")
|
|
app.add_api_route("/metrics", metrics_endpoint, methods=["GET"], include_in_schema=False)
|
|
return app
|
|
|
|
|
|
app = create_app()
|
|
|
|
|
|
def run() -> None:
|
|
"""Console-script entrypoint (`ocr-sprint-api`)."""
|
|
import uvicorn
|
|
|
|
s = get_settings()
|
|
uvicorn.run("ocr_sprint.main:app", host=s.app_host, port=s.app_port, reload=False)
|