Fix empty-dict consistency in ground-truth export

Devin Review (post-merge on PR #6) flagged that the `final_result` assignment used a truthiness check (`if job_row.result`) while `build_initial_result` used an identity check (`is None`). For a job whose result is an empty dict (`{}`), the emitted `GroundTruthSample` ended up with `initial_result={}` but `final_result=None` — logically inconsistent. Switch the final-result assignment to the same `is None` check so both fields agree. Added `test_empty_dict_result_stays_consistent` to lock the invariant in, and fixed the test helper so callers can pass `{}` without the helper's `or` fallback replacing it. Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
2026-04-25 20:33:26 +00:00
parent 5ea45de5ea
commit 0755fbebda
2 changed files with 30 additions and 6 deletions
--- a/src/ocr_sprint/ground_truth/service.py
+++ b/src/ocr_sprint/ground_truth/service.py
@@ -171,7 +171,10 @@ def iter_ground_truth_samples(
            reviewed_at=job_row.reviewed_at,
            created_at=job_row.created_at,
            initial_result=initial,
-            final_result=copy.deepcopy(job_row.result) if job_row.result else None,
+            # Use an ``is None`` check to stay consistent with
+            # ``build_initial_result``; otherwise an empty-dict result
+            # would produce ``initial_result={}`` but ``final_result=None``.
+            final_result=(copy.deepcopy(job_row.result) if job_row.result is not None else None),
            corrections=[
                GroundTruthCorrection(
                    field_path=c.field_path,
--- a/tests/unit/test_ground_truth_service.py
+++ b/tests/unit/test_ground_truth_service.py
@@ -40,11 +40,19 @@ def _seed_approved_job_with_corrections(
            jid,
            status=DocumentStatus.NEEDS_REVIEW,
            confidence=0.8,
-            result=final_result
-            or {
-                "header": {"nomor_sprint": "SPR/1/2025", "satuan_penerbit": "POLRES X"},
-                "personel": [{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"}],
-            },
+            # ``is None`` (not truthiness) so callers can pass ``{}`` to
+            # exercise the empty-dict edge case.
+            result=(
+                final_result
+                if final_result is not None
+                else {
+                    "header": {
+                        "nomor_sprint": "SPR/1/2025",
+                        "satuan_penerbit": "POLRES X",
+                    },
+                    "personel": [{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"}],
+                }
+            ),
            review_flags=[],
        )
    if corrections:
@@ -197,6 +205,19 @@ def test_stats_counts_rollup_and_top_fields(db_ready: None) -> None:
    }


+def test_empty_dict_result_stays_consistent(db_ready: None) -> None:
+    """An empty-dict result (``{}``) is logically a valid snapshot — it
+    must round-trip as ``{}`` on *both* ``initial_result`` and
+    ``final_result``, not ``{}`` on one and ``None`` on the other.
+    """
+    _seed_approved_job_with_corrections(final_result={})
+    with session_scope() as session:
+        samples = list(iter_ground_truth_samples(session, GroundTruthFilters()))
+    assert len(samples) == 1
+    assert samples[0].initial_result == {}
+    assert samples[0].final_result == {}
+
+
 def test_serialize_is_valid_jsonl(db_ready: None) -> None:
    _seed_approved_job_with_corrections(corrections=[("header.perihal", "X", None)])
    with session_scope() as session: