OCR-SPRIN-SERVICE/src/ocr_sprint/config.py

"""Application settings loaded from environment / .env file."""

from __future__ import annotations

from functools import lru_cache
from pathlib import Path

from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    """Runtime configuration. Override via environment variables or a .env file."""

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
        extra="ignore",
    )

    # App
    app_env: str = "local"
    app_host: str = "0.0.0.0"
    app_port: int = 8000
    app_log_level: str = "INFO"

    # Storage (Phase 1: local fs)
    storage_local_dir: Path = Path("./storage")

    # OCR
    ocr_lang: str = "latin"
    ocr_use_gpu: bool = False
    ocr_det_model_dir: str | None = None
    ocr_rec_model_dir: str | None = None
    ocr_cls_model_dir: str | None = None
    ocr_max_image_side: int = 2200

    # Preprocessing
    preprocess_target_dpi: int = 300
    preprocess_denoise: bool = True
    preprocess_deskew: bool = True
    preprocess_adaptive_threshold: bool = False

    # Confidence thresholds (Phase 5 routing)
    confidence_auto_approve: float = Field(0.95, ge=0.0, le=1.0)
    confidence_needs_review: float = Field(0.85, ge=0.0, le=1.0)

    # LLM (Phase 5)
    llm_enabled: bool = False
    llm_provider: str = "ollama"
    llm_model: str = "qwen2.5:1.5b"
    llm_base_url: str = "http://localhost:11434"
    llm_timeout_s: int = 60

    # Async pipeline (Phase 4)
    queue_enabled: bool = False
    redis_url: str = "redis://localhost:6379/0"
    database_url: str = "postgresql+psycopg://ocr:ocr@localhost:5432/ocr_sprint"
    minio_endpoint: str = "localhost:9000"
    minio_access_key: str = "minioadmin"
    minio_secret_key: str = "minioadmin"
    minio_bucket: str = "ocr-sprint"
    minio_secure: bool = False


@lru_cache(maxsize=1)
def get_settings() -> Settings:
    """Cached accessor so settings are loaded once per process."""
    settings = Settings()
    settings.storage_local_dir.mkdir(parents=True, exist_ok=True)
    return settings