"""Tests for confidence scoring + routing.""" from __future__ import annotations from ocr_sprint.pipeline.confidence import compute_confidence, route from ocr_sprint.schemas.document import DocumentStatus from ocr_sprint.schemas.extraction import ReviewFlag def test_no_flags_returns_blend_of_ocr_only() -> None: score = compute_confidence(0.9, []) # OCR weight 0.6 * 0.9 + validation 0.4 * 1.0 = 0.94 assert abs(score - 0.94) < 1e-6 def test_flags_reduce_score() -> None: base = compute_confidence(0.9, []) with_flags = compute_confidence(0.9, [ReviewFlag.MISSING_FIELD]) assert with_flags < base def test_score_is_clamped() -> None: catastrophic = compute_confidence( 0.0, [ ReviewFlag.MISSING_FIELD, ReviewFlag.LOW_OCR_CONFIDENCE, ReviewFlag.PERSONNEL_COUNT_MISMATCH, ReviewFlag.INVALID_NRP, ReviewFlag.UNKNOWN_PANGKAT, ReviewFlag.DATE_PARSE_FAILED, ], ) assert 0.0 <= catastrophic <= 1.0 def test_route_high_confidence() -> None: assert route(0.97) == DocumentStatus.COMPLETED def test_route_mid_goes_to_review() -> None: assert route(0.88) == DocumentStatus.NEEDS_REVIEW def test_route_low_goes_to_review() -> None: assert route(0.40) == DocumentStatus.NEEDS_REVIEW