From 0755fbebda5cf02f8e5c67818afc4c4d48dba2de Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sat, 25 Apr 2026 20:33:26 +0000
Subject: [PATCH] Fix empty-dict consistency in ground-truth export
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Devin Review (post-merge on PR #6) flagged that the `final_result`
assignment used a truthiness check (`if job_row.result`) while
`build_initial_result` used an identity check (`is None`). For a
job whose result is an empty dict (`{}`), the emitted
`GroundTruthSample` ended up with `initial_result={}` but
`final_result=None` — logically inconsistent.

Switch the final-result assignment to the same `is None` check so
both fields agree. Added `test_empty_dict_result_stays_consistent`
to lock the invariant in, and fixed the test helper so callers can
pass `{}` without the helper's `or` fallback replacing it.

Co-Authored-By: adrian kuman firmansah <adriancuman@gmail.com>
---
 src/ocr_sprint/ground_truth/service.py  |  5 +++-
 tests/unit/test_ground_truth_service.py | 31 +++++++++++++++++++++----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/ocr_sprint/ground_truth/service.py b/src/ocr_sprint/ground_truth/service.py
index 5d33eaf..765409c 100644
--- a/src/ocr_sprint/ground_truth/service.py
+++ b/src/ocr_sprint/ground_truth/service.py
@@ -171,7 +171,10 @@ def iter_ground_truth_samples(
             reviewed_at=job_row.reviewed_at,
             created_at=job_row.created_at,
             initial_result=initial,
-            final_result=copy.deepcopy(job_row.result) if job_row.result else None,
+            # Use an ``is None`` check to stay consistent with
+            # ``build_initial_result``; otherwise an empty-dict result
+            # would produce ``initial_result={}`` but ``final_result=None``.
+            final_result=(copy.deepcopy(job_row.result) if job_row.result is not None else None),
             corrections=[
                 GroundTruthCorrection(
                     field_path=c.field_path,
diff --git a/tests/unit/test_ground_truth_service.py b/tests/unit/test_ground_truth_service.py
index 694390f..78938f8 100644
--- a/tests/unit/test_ground_truth_service.py
+++ b/tests/unit/test_ground_truth_service.py
@@ -40,11 +40,19 @@ def _seed_approved_job_with_corrections(
             jid,
             status=DocumentStatus.NEEDS_REVIEW,
             confidence=0.8,
-            result=final_result
-            or {
-                "header": {"nomor_sprint": "SPR/1/2025", "satuan_penerbit": "POLRES X"},
-                "personel": [{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"}],
-            },
+            # ``is None`` (not truthiness) so callers can pass ``{}`` to
+            # exercise the empty-dict edge case.
+            result=(
+                final_result
+                if final_result is not None
+                else {
+                    "header": {
+                        "nomor_sprint": "SPR/1/2025",
+                        "satuan_penerbit": "POLRES X",
+                    },
+                    "personel": [{"pangkat": "AIPDA", "nrp": "77060000", "nama": "BUDI"}],
+                }
+            ),
             review_flags=[],
         )
     if corrections:
@@ -197,6 +205,19 @@ def test_stats_counts_rollup_and_top_fields(db_ready: None) -> None:
     }
 
 
+def test_empty_dict_result_stays_consistent(db_ready: None) -> None:
+    """An empty-dict result (``{}``) is logically a valid snapshot — it
+    must round-trip as ``{}`` on *both* ``initial_result`` and
+    ``final_result``, not ``{}`` on one and ``None`` on the other.
+    """
+    _seed_approved_job_with_corrections(final_result={})
+    with session_scope() as session:
+        samples = list(iter_ground_truth_samples(session, GroundTruthFilters()))
+    assert len(samples) == 1
+    assert samples[0].initial_result == {}
+    assert samples[0].final_result == {}
+
+
 def test_serialize_is_valid_jsonl(db_ready: None) -> None:
     _seed_approved_job_with_corrections(corrections=[("header.perihal", "X", None)])
     with session_scope() as session: