Files
OCR-SPRIN-SERVICE/src/ocr_sprint/config.py
Devin AI ca0c0a0428 Phase 1 MVP: synchronous OCR + regex header extraction
Implements the foundation of the OCR Sprint service:
- FastAPI app with /api/v1/health and /api/v1/documents (sync upload)
- Pydantic v2 schemas for documents, extraction result, personnel
- Pipeline: PDF/image ingest (PyMuPDF), preprocessing (resize, deskew,
  denoise, optional adaptive threshold), PaddleOCR wrapper, regex-based
  header extraction (nomor sprint, tanggal, satuan, perihal, dasar),
  signatory NRP, master-pangkat validation, confidence scoring + routing.
- Tests: 61 unit tests covering regex rules, validators, preprocess,
  ingest, confidence, and API contract (PaddleOCR mocked).
- Tooling: pyproject (setuptools), ruff, mypy strict, pytest, pre-commit,
  Dockerfile, docker-compose, Makefile.
- Docs: README + docs/architecture.md (full hybrid stack rationale and
  6-phase roadmap).

Co-authored-by: adrian kuman firmansah <adriancuman@gmail.com>
2026-04-25 14:58:50 +00:00

73 lines
2.1 KiB
Python

"""Application settings loaded from environment / .env file."""
from __future__ import annotations
from functools import lru_cache
from pathlib import Path
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Runtime configuration. Override via environment variables or a .env file."""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# App
app_env: str = "local"
app_host: str = "0.0.0.0"
app_port: int = 8000
app_log_level: str = "INFO"
# Storage (Phase 1: local fs)
storage_local_dir: Path = Path("./storage")
# OCR
ocr_lang: str = "latin"
ocr_use_gpu: bool = False
ocr_det_model_dir: str | None = None
ocr_rec_model_dir: str | None = None
ocr_cls_model_dir: str | None = None
ocr_max_image_side: int = 2200
# Preprocessing
preprocess_target_dpi: int = 300
preprocess_denoise: bool = True
preprocess_deskew: bool = True
preprocess_adaptive_threshold: bool = False
# Confidence thresholds (Phase 5 routing)
confidence_auto_approve: float = Field(0.95, ge=0.0, le=1.0)
confidence_needs_review: float = Field(0.85, ge=0.0, le=1.0)
# LLM (Phase 5)
llm_enabled: bool = False
llm_provider: str = "ollama"
llm_model: str = "qwen2.5:1.5b"
llm_base_url: str = "http://localhost:11434"
llm_timeout_s: int = 60
# Async pipeline (Phase 4)
queue_enabled: bool = False
redis_url: str = "redis://localhost:6379/0"
database_url: str = "postgresql+psycopg://ocr:ocr@localhost:5432/ocr_sprint"
minio_endpoint: str = "localhost:9000"
minio_access_key: str = "minioadmin"
minio_secret_key: str = "minioadmin"
minio_bucket: str = "ocr-sprint"
minio_secure: bool = False
@lru_cache(maxsize=1)
def get_settings() -> Settings:
"""Cached accessor so settings are loaded once per process."""
settings = Settings()
settings.storage_local_dir.mkdir(parents=True, exist_ok=True)
return settings