Phase 1 MVP: synchronous OCR + regex header extraction
Implements the foundation of the OCR Sprint service: - FastAPI app with /api/v1/health and /api/v1/documents (sync upload) - Pydantic v2 schemas for documents, extraction result, personnel - Pipeline: PDF/image ingest (PyMuPDF), preprocessing (resize, deskew, denoise, optional adaptive threshold), PaddleOCR wrapper, regex-based header extraction (nomor sprint, tanggal, satuan, perihal, dasar), signatory NRP, master-pangkat validation, confidence scoring + routing. - Tests: 61 unit tests covering regex rules, validators, preprocess, ingest, confidence, and API contract (PaddleOCR mocked). - Tooling: pyproject (setuptools), ruff, mypy strict, pytest, pre-commit, Dockerfile, docker-compose, Makefile. - Docs: README + docs/architecture.md (full hybrid stack rationale and 6-phase roadmap). Co-authored-by: adrian kuman firmansah <adriancuman@gmail.com>
This commit is contained in:
42
src/ocr_sprint/main.py
Normal file
42
src/ocr_sprint/main.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""FastAPI entrypoint."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from ocr_sprint import __version__
|
||||
from ocr_sprint.api.errors import register_error_handlers
|
||||
from ocr_sprint.api.routes import documents, health
|
||||
from ocr_sprint.config import get_settings
|
||||
from ocr_sprint.utils.logging import configure_logging
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
"""Application factory — keeps top-level state easy to test."""
|
||||
settings = get_settings()
|
||||
configure_logging(settings.app_log_level)
|
||||
|
||||
app = FastAPI(
|
||||
title="OCR Sprint Service",
|
||||
version=__version__,
|
||||
description="OCR + structured extraction for Indonesian police 'surat sprint' documents.",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
openapi_url="/openapi.json",
|
||||
)
|
||||
|
||||
register_error_handlers(app)
|
||||
app.include_router(health.router, prefix="/api/v1")
|
||||
app.include_router(documents.router, prefix="/api/v1")
|
||||
return app
|
||||
|
||||
|
||||
app = create_app()
|
||||
|
||||
|
||||
def run() -> None:
|
||||
"""Console-script entrypoint (`ocr-sprint-api`)."""
|
||||
import uvicorn
|
||||
|
||||
s = get_settings()
|
||||
uvicorn.run("ocr_sprint.main:app", host=s.app_host, port=s.app_port, reload=False)
|
||||
Reference in New Issue
Block a user