diff --git a/src/ocr_sprint/ground_truth/service.py b/src/ocr_sprint/ground_truth/service.py index 5d33eaf..765409c 100644 --- a/src/ocr_sprint/ground_truth/service.py +++ b/src/ocr_sprint/ground_truth/service.py @@ -171,7 +171,10 @@ def iter_ground_truth_samples( reviewed_at=job_row.reviewed_at, created_at=job_row.created_at, initial_result=initial, - final_result=copy.deepcopy(job_row.result) if job_row.result else None, + # Use an ``is None`` check to stay consistent with + # ``build_initial_result``; otherwise an empty-dict result + # would produce ``initial_result={}`` but ``final_result=None``. + final_result=(copy.deepcopy(job_row.result) if job_row.result is not None else None), corrections=[ GroundTruthCorrection( field_path=c.field_path, diff --git a/tests/unit/test_ground_truth_service.py b/tests/unit/test_ground_truth_service.py index 694390f..78938f8 100644 --- a/tests/unit/test_ground_truth_service.py +++ b/tests/unit/test_ground_truth_service.py @@ -40,11 +40,19 @@ def _seed_approved_job_with_corrections( jid, status=DocumentStatus.NEEDS_REVIEW, confidence=0.8, - result=final_result - or { - "header": {"nomor_sprint": "SPR/1/2025", "satuan_penerbit": "POLRES X"}, - "personel": [{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"}], - }, + # ``is None`` (not truthiness) so callers can pass ``{}`` to + # exercise the empty-dict edge case. + result=( + final_result + if final_result is not None + else { + "header": { + "nomor_sprint": "SPR/1/2025", + "satuan_penerbit": "POLRES X", + }, + "personel": [{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"}], + } + ), review_flags=[], ) if corrections: @@ -197,6 +205,19 @@ def test_stats_counts_rollup_and_top_fields(db_ready: None) -> None: } +def test_empty_dict_result_stays_consistent(db_ready: None) -> None: + """An empty-dict result (``{}``) is logically a valid snapshot — it + must round-trip as ``{}`` on *both* ``initial_result`` and + ``final_result``, not ``{}`` on one and ``None`` on the other. + """ + _seed_approved_job_with_corrections(final_result={}) + with session_scope() as session: + samples = list(iter_ground_truth_samples(session, GroundTruthFilters())) + assert len(samples) == 1 + assert samples[0].initial_result == {} + assert samples[0].final_result == {} + + def test_serialize_is_valid_jsonl(db_ready: None) -> None: _seed_approved_job_with_corrections(corrections=[("header.perihal", "X", None)]) with session_scope() as session: