OCR-SPRIN-SERVICE/tests/unit/test_preprocess.py

"""Smoke tests for the preprocessing pipeline."""

from __future__ import annotations

import numpy as np

from ocr_sprint.pipeline.preprocess import PreprocessConfig, preprocess


def test_preprocess_returns_bgr_uint8(blank_bgr_image: np.ndarray) -> None:
    out = preprocess(blank_bgr_image)
    assert out.dtype == np.uint8
    assert out.ndim == 3
    assert out.shape[2] == 3


def test_preprocess_resizes_to_max_side() -> None:
    big = np.full((4000, 3000, 3), 255, dtype=np.uint8)
    cfg = PreprocessConfig(max_side=1000, denoise=False, deskew=False)
    out = preprocess(big, cfg)
    assert max(out.shape[:2]) == 1000


def test_preprocess_does_not_upscale_small_images() -> None:
    small = np.full((400, 300, 3), 255, dtype=np.uint8)
    cfg = PreprocessConfig(max_side=2200, denoise=False, deskew=False)
    out = preprocess(small, cfg)
    assert out.shape[:2] == (400, 300)


def test_adaptive_threshold_produces_binary_image() -> None:
    img = np.random.randint(0, 256, (200, 200, 3), dtype=np.uint8)
    cfg = PreprocessConfig(denoise=False, deskew=False, adaptive_threshold=True)
    out = preprocess(img, cfg)
    # adaptive threshold should leave only 0s and 255s
    unique = np.unique(out)
    assert set(unique.tolist()).issubset({0, 255})