feat: implement PP-Structure table extraction pipeline with GPU runtime configuration support

2026-04-27 00:51:23 +07:00
parent 9d969e61fd
commit 6d793758ff
12 changed files with 896 additions and 31 deletions
--- a/.env.example
+++ b/.env.example
@@ -10,7 +10,8 @@ STORAGE_LOCAL_DIR=./storage
 # ==== OCR ====
 OCR_LANG=latin                # PaddleOCR lang code; "latin" works well for Bahasa Indonesia
 OCR_USE_GPU=false             # set true if running on a GPU host
-OCR_DET_MODEL_DIR=             # leave empty to use PaddleOCR defaults
+# Leave empty to use PaddleOCR defaults.
+OCR_DET_MODEL_DIR=
 OCR_REC_MODEL_DIR=
 OCR_CLS_MODEL_DIR=
 OCR_MAX_IMAGE_SIDE=2200       # downscale longest side before OCR
--- a/docs/FRONTEND-INTEGRATION.md
+++ b/docs/FRONTEND-INTEGRATION.md
@@ -0,0 +1,537 @@
+# Frontend Integration Guide
+
+Dokumen ini menjelaskan kontrak API yang perlu dipakai frontend untuk upload dokumen sprint, menampilkan hasil OCR, menjalankan review manual, dan approve hasil final.
+
+## Base URL
+
+Default local API:
+
+```text
+http://localhost:8000/api/v1
+```
+
+Untuk frontend, simpan URL di environment variable:
+
+```env
+VITE_OCR_API_BASE_URL=http://localhost:8000/api/v1
+```
+
+Jika `API_KEYS` di backend diisi, semua endpoint protected membutuhkan header:
+
+```http
+X-API-Key: <api-key>
+```
+
+Catatan: jangan expose API key production di frontend publik. Untuk deployment internal, gunakan reverse proxy atau session backend-for-frontend jika aksesnya tidak sepenuhnya trusted.
+
+## Health Check
+
+```http
+GET /health
+GET /health/ready
+```
+
+Contoh response `/health`:
+
+```json
+{
+  "status": "ok",
+  "version": "0.1.0"
+}
+```
+
+Contoh response `/health/ready`:
+
+```json
+{
+  "status": "ready",
+  "version": "0.1.0",
+  "models": {
+    "paddleocr": "ready",
+    "pp_structure": "disabled"
+  }
+}
+```
+
+Gunakan `/health/ready` untuk disable upload button sampai model OCR siap.
+
+## Upload Dokumen
+
+Endpoint:
+
+```http
+POST /documents
+POST /documents?sync=true
+```
+
+Body harus `multipart/form-data` dengan field `file`.
+
+Backend menerima PDF dan format image umum. Default max upload mengikuti backend config `BLOB_MAX_UPLOAD_MB`, saat ini 25 MB.
+
+### Recommended Flow
+
+Untuk frontend production, gunakan async flow:
+
+1. `POST /documents`
+2. Jika status HTTP `202`, ambil `job_id`
+3. Poll `GET /documents/{job_id}` setiap 1-3 detik
+4. Stop polling saat status `completed`, `needs_review`, atau `failed`
+
+Untuk local dev sederhana, `POST /documents?sync=true` boleh dipakai, tetapi request bisa lama karena OCR berjalan inline.
+
+### Upload Example
+
+```ts
+const API_BASE = import.meta.env.VITE_OCR_API_BASE_URL;
+const API_KEY = import.meta.env.VITE_OCR_API_KEY;
+
+async function uploadDocument(file: File) {
+  const form = new FormData();
+  form.append("file", file);
+
+  const res = await fetch(`${API_BASE}/documents`, {
+    method: "POST",
+    headers: API_KEY ? { "X-API-Key": API_KEY } : undefined,
+    body: form,
+  });
+
+  if (!res.ok) {
+    throw await readApiError(res);
+  }
+
+  return (await res.json()) as DocumentResponse;
+}
+```
+
+## Polling Job
+
+Endpoint:
+
+```http
+GET /documents/{job_id}
+```
+
+```ts
+const TERMINAL_STATUSES = new Set(["completed", "needs_review", "failed"]);
+
+async function getDocument(jobId: string) {
+  const res = await fetch(`${API_BASE}/documents/${jobId}`, {
+    headers: API_KEY ? { "X-API-Key": API_KEY } : undefined,
+  });
+
+  if (!res.ok) {
+    throw await readApiError(res);
+  }
+
+  return (await res.json()) as DocumentResponse;
+}
+
+async function pollDocument(jobId: string, onUpdate: (doc: DocumentResponse) => void) {
+  while (true) {
+    const doc = await getDocument(jobId);
+    onUpdate(doc);
+
+    if (TERMINAL_STATUSES.has(doc.status)) {
+      return doc;
+    }
+
+    await new Promise((resolve) => setTimeout(resolve, 2000));
+  }
+}
+```
+
+## Response Schema
+
+### DocumentResponse
+
+```ts
+type DocumentStatus =
+  | "pending"
+  | "processing"
+  | "completed"
+  | "needs_review"
+  | "failed";
+
+type DocumentResponse = {
+  job_id: string;
+  status: DocumentStatus;
+  confidence: number | null;
+  data: ExtractionResult | null;
+  review_flags: ReviewFlag[];
+  error: string | null;
+  approved: boolean;
+  reviewed_by: string | null;
+  reviewed_at: string | null;
+};
+```
+
+### ExtractionResult
+
+```ts
+type ExtractionResult = {
+  header: HeaderFields;
+  personel: PersonnelEntry[];
+  untuk: string[];
+  ttd: Signatory;
+  raw_text: string;
+  confidence: number;
+  review_flags: ReviewFlag[];
+};
+
+type HeaderFields = {
+  nomor_sprint: string | null;
+  tanggal: string | null; // YYYY-MM-DD
+  satuan_penerbit: string | null;
+  perihal: string | null;
+  dasar: string[];
+};
+
+type PersonnelEntry = {
+  no: number | null;
+  pangkat: string | null;
+  nrp: string | null;
+  nama: string | null;
+  jabatan_dinas: string | null;
+  jabatan_sprint: string | null;
+  keterangan: string | null;
+  confidence: number;
+};
+
+type Signatory = {
+  nama: string | null;
+  pangkat: string | null;
+  nrp: string | null;
+  jabatan: string | null;
+};
+```
+
+### Review Flags
+
+```ts
+type ReviewFlag =
+  | "low_ocr_confidence"
+  | "missing_field"
+  | "invalid_nrp"
+  | "unknown_pangkat"
+  | "personnel_count_mismatch"
+  | "date_parse_failed"
+  | "llm_fallback"
+  | "llm_unavailable"
+  | "personnel_text_fallback"
+  | "personnel_text_fallback_no_nrp"
+  | "incomplete_personnel_row";
+```
+
+Recommended UI labels:
+
+| Flag | Label |
+|---|---|
+| `low_ocr_confidence` | Confidence OCR rendah |
+| `missing_field` | Field wajib belum lengkap |
+| `invalid_nrp` | NRP tidak valid |
+| `unknown_pangkat` | Pangkat tidak dikenali |
+| `personnel_count_mismatch` | Jumlah personel perlu dicek |
+| `date_parse_failed` | Tanggal gagal dibaca |
+| `llm_fallback` | Sebagian field diisi fallback LLM |
+| `llm_unavailable` | LLM tidak tersedia |
+| `personnel_text_fallback` | Personel dibaca dari fallback teks |
+| `personnel_text_fallback_no_nrp` | Personel dibaca tanpa NRP |
+| `incomplete_personnel_row` | Baris personel belum lengkap |
+
+## Example Final Response
+
+```json
+{
+  "job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a",
+  "status": "needs_review",
+  "confidence": 0.82,
+  "data": {
+    "header": {
+      "nomor_sprint": "Sprin/123/IV/2026",
+      "tanggal": "2026-04-21",
+      "satuan_penerbit": "POLRES BANJAR",
+      "perihal": "Instruktur Ops Pekat I Lodaya 2026",
+      "dasar": []
+    },
+    "personel": [
+      {
+        "no": 1,
+        "pangkat": "IPDA",
+        "nrp": "12345678",
+        "nama": "BUDI SANTOSO",
+        "jabatan_dinas": "KANIT",
+        "jabatan_sprint": "INSTRUKTUR",
+        "keterangan": null,
+        "confidence": 0.91
+      }
+    ],
+    "untuk": ["Melaksanakan kegiatan sesuai surat perintah."],
+    "ttd": {
+      "nama": "AGUS",
+      "pangkat": "AKBP",
+      "nrp": "87654321",
+      "jabatan": "KAPOLRES"
+    },
+    "raw_text": "full OCR text...",
+    "confidence": 0.82,
+    "review_flags": ["low_ocr_confidence"]
+  },
+  "review_flags": ["low_ocr_confidence"],
+  "error": null,
+  "approved": false,
+  "reviewed_by": null,
+  "reviewed_at": null
+}
+```
+
+`raw_text` bisa panjang. Tampilkan di collapsible/debug panel, bukan di layar utama.
+
+## Review dan Koreksi HITL
+
+Frontend review screen sebaiknya mengizinkan editor untuk:
+
+- Header: nomor sprint, tanggal, satuan penerbit, perihal, dasar
+- Personel: pangkat, NRP, nama, jabatan dinas, jabatan sprint, keterangan
+- Untuk: daftar tugas
+- TTD: nama, pangkat, NRP, jabatan
+
+### Patch Corrections
+
+Endpoint:
+
+```http
+PATCH /documents/{job_id}
+```
+
+Body:
+
+```json
+{
+  "corrections": [
+    {
+      "path": "header.perihal",
+      "value": "Pelaksanaan Operasi Pekat I Lodaya 2026",
+      "reason": "OCR membaca perihal tidak lengkap"
+    },
+    {
+      "path": "personel[0].nama",
+      "value": "BUDI SANTOSO",
+      "reason": "Perbaikan nama"
+    }
+  ]
+}
+```
+
+Header opsional untuk audit trail:
+
+```http
+X-User-Id: reviewer-a
+```
+
+Path yang umum dipakai:
+
+```text
+header.nomor_sprint
+header.tanggal
+header.satuan_penerbit
+header.perihal
+header.dasar
+ttd.nama
+ttd.pangkat
+ttd.nrp
+ttd.jabatan
+personel[0].pangkat
+personel[0].nrp
+personel[0].nama
+personel[0].jabatan_dinas
+personel[0].jabatan_sprint
+personel[0].keterangan
+untuk
+```
+
+Semua correction dalam satu request bersifat atomic. Jika satu path invalid, seluruh batch ditolak dan tidak ada perubahan disimpan.
+
+### Patch Example
+
+```ts
+async function patchDocument(jobId: string, corrections: FieldCorrection[], userId?: string) {
+  const headers: Record<string, string> = { "Content-Type": "application/json" };
+  if (API_KEY) headers["X-API-Key"] = API_KEY;
+  if (userId) headers["X-User-Id"] = userId;
+
+  const res = await fetch(`${API_BASE}/documents/${jobId}`, {
+    method: "PATCH",
+    headers,
+    body: JSON.stringify({ corrections }),
+  });
+
+  if (!res.ok) {
+    throw await readApiError(res);
+  }
+
+  return (await res.json()) as DocumentResponse;
+}
+
+type FieldCorrection = {
+  path: string;
+  value: unknown;
+  reason?: string | null;
+};
+```
+
+## Correction History
+
+Endpoint:
+
+```http
+GET /documents/{job_id}/history
+```
+
+Response:
+
+```ts
+type CorrectionEventResponse = {
+  id: number;
+  job_id: string;
+  field_path: string;
+  old_value: unknown | null;
+  new_value: unknown | null;
+  corrected_by: string | null;
+  reason: string | null;
+  corrected_at: string;
+};
+```
+
+Gunakan endpoint ini untuk audit panel di halaman review.
+
+## Approve Final Result
+
+Endpoint:
+
+```http
+POST /documents/{job_id}/approve
+```
+
+Header opsional:
+
+```http
+X-User-Id: reviewer-a
+```
+
+Response:
+
+```json
+{
+  "job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a",
+  "approved": true,
+  "reviewed_by": "reviewer-a",
+  "reviewed_at": "2026-04-26T16:30:00"
+}
+```
+
+Setelah approved, `PATCH /documents/{job_id}` akan ditolak dengan `409`.
+
+## Error Handling
+
+Application errors:
+
+```json
+{
+  "error": "UnsupportedDocumentError",
+  "message": "Uploaded file is empty."
+}
+```
+
+FastAPI validation errors memakai shape standar:
+
+```json
+{
+  "detail": [
+    {
+      "type": "missing",
+      "loc": ["body", "file"],
+      "msg": "Field required"
+    }
+  ]
+}
+```
+
+Helper error:
+
+```ts
+async function readApiError(res: Response) {
+  let payload: unknown = null;
+  try {
+    payload = await res.json();
+  } catch {
+    payload = await res.text();
+  }
+
+  return {
+    status: res.status,
+    payload,
+  };
+}
+```
+
+Recommended UI handling:
+
+| HTTP Status | UI Handling |
+|---|---|
+| `400` | Tampilkan pesan validasi/upload |
+| `401` | Session/API key tidak valid |
+| `404` | Job tidak ditemukan |
+| `409` | Job belum selesai atau sudah approved |
+| `422` | Form correction tidak valid |
+| `500` | Tampilkan error umum dan minta operator cek log backend |
+
+## Ground Truth Admin
+
+Endpoint ini opsional untuk dashboard admin/training data.
+
+```http
+GET /ground-truth/stats?top_n=10
+GET /ground-truth/export?approved_only=true&has_corrections=true&limit=1000
+```
+
+`/ground-truth/export` mengembalikan `application/x-ndjson`, satu JSON per baris. Frontend biasanya cukup menyediakan tombol download, bukan parse seluruh stream di browser.
+
+## Recommended Screens
+
+1. Upload screen
+   - Dropzone file PDF/image
+   - Health readiness badge
+   - Upload progress
+   - Processing state setelah `job_id` diterima
+
+2. Result screen
+   - Status badge
+   - Confidence score
+   - Review flags
+   - Header summary
+   - Personnel table
+   - Untuk list
+   - TTD section
+   - Raw OCR collapsible
+
+3. Review screen
+   - Editable fields
+   - Dirty-state tracking
+   - Correction reason input
+   - Save corrections via `PATCH`
+   - History panel
+   - Approve button
+
+4. Admin screen
+   - Health/ready status
+   - Ground-truth stats
+   - Export approved samples
+
+## UX Rules
+
+- Jangan tunggu `POST /documents?sync=true` untuk production UI; gunakan async + polling.
+- Disable approve kalau status masih `pending` atau `processing`.
+- Tampilkan `needs_review` sebagai hasil yang berhasil diproses tetapi perlu validasi manusia.
+- Jangan render `raw_text` sebagai konten utama.
+- Pada `failed`, tampilkan `error` dari response jika ada.
+- Pada confidence rendah, arahkan user ke review fields yang punya flag terkait.
--- a/docs/OCR-RUNTIME-MODES.md
+++ b/docs/OCR-RUNTIME-MODES.md
@@ -0,0 +1,49 @@
+# OCR Runtime Modes
+
+Backend OCR bisa dijalankan dalam mode CPU atau GPU lewat konfigurasi `OCR_USE_GPU`.
+
+## Cara Pakai
+
+Mode CPU:
+
+```powershell
+.\update.ps1 -OcrMode cpu
+```
+
+Mode GPU:
+
+```powershell
+.\update.ps1 -OcrMode gpu
+```
+
+Jika parameter tidak diberikan, `update.ps1` memakai nilai yang sudah ada di `.env`.
+
+```env
+OCR_USE_GPU=false
+```
+
+atau:
+
+```env
+OCR_USE_GPU=true
+```
+
+## Perilaku Script
+
+- `-OcrMode cpu` menyimpan `OCR_USE_GPU=false` ke `.env`.
+- `-OcrMode gpu` menyimpan `OCR_USE_GPU=true` ke `.env`.
+- Script tidak menghapus package Paddle/CUDA yang sudah terpasang.
+- Dalam mode GPU, script akan memasang `paddlepaddle-gpu` dan runtime cuDNN/cuBLAS jika belum ada.
+- Dalam mode CPU, script hanya memasang `paddlepaddle` CPU jika belum ada runtime Paddle sama sekali.
+
+## Catatan
+
+Mode CPU tidak membutuhkan CUDA, cuDNN, atau driver NVIDIA.
+
+Mode GPU membutuhkan NVIDIA driver dan runtime CUDA/cuDNN yang cocok. Pada Windows, backend juga menambahkan folder DLL NVIDIA dari `.venv` secara otomatis sebelum PaddleOCR diinisialisasi.
+
+`TABLES_ENABLED` adalah konfigurasi terpisah dari mode CPU/GPU. Jika PP-Structure belum stabil di environment lokal, biarkan:
+
+```env
+TABLES_ENABLED=false
+```
--- a/src/ocr_sprint/api/routes/documents.py
+++ b/src/ocr_sprint/api/routes/documents.py
@@ -28,13 +28,6 @@ from functools import partial
 from typing import Annotated
 from uuid import UUID, uuid4

-# Thread pool dedicated to blocking OCR work. Using a *separate* pool
-# (rather than the default loop executor) lets us cap the number of
-# concurrent heavy OCR jobs independently of other thread-pool users.
-# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
-# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
-_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
-
 from fastapi import (
    APIRouter,
    Depends,
@@ -73,6 +66,13 @@ from ocr_sprint.schemas.review import (
 from ocr_sprint.storage.blob import get_blob_storage
 from ocr_sprint.utils.logging import get_logger

+# Thread pool dedicated to blocking OCR work. Using a *separate* pool
+# (rather than the default loop executor) lets us cap the number of
+# concurrent heavy OCR jobs independently of other thread-pool users.
+# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
+# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
+_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
+
 router = APIRouter(
    prefix="/documents",
    tags=["documents"],
@@ -99,18 +99,17 @@ def _row_to_response(row: object) -> DocumentResponse:

    assert isinstance(row, JobRow)
    status_enum = DocumentStatus(row.status)
-    personel_list = None
+    result_obj = None
    if row.result is not None:
        result_obj = ExtractionResult.model_validate(row.result)
        # Auto-number personnel entries sequentially (1, 2, 3, ...)
        for idx, entry in enumerate(result_obj.personel, start=1):
            entry.no = idx
-        personel_list = result_obj.personel
    return DocumentResponse(
        job_id=row.job_id,
        status=status_enum,
        confidence=row.confidence,
-        data=personel_list,
+        data=result_obj,
        review_flags=list(row.review_flags or []),
        error=row.error,
        approved=bool(row.approved),
--- a/src/ocr_sprint/api/routes/health.py
+++ b/src/ocr_sprint/api/routes/health.py
@@ -6,6 +6,7 @@ from fastapi import APIRouter
 from fastapi.responses import JSONResponse

 from ocr_sprint import __version__
+from ocr_sprint.config import get_settings
 from ocr_sprint.pipeline import ocr as _ocr
 from ocr_sprint.pipeline import table as _table

@@ -21,15 +22,18 @@ async def health() -> dict[str, str]:
@router.get("/health/ready")
 async def readiness() -> JSONResponse:
    """Readiness check — returns 200 when OCR models are loaded, 503 if still warming up."""
+    settings = get_settings()
    ocr_ready = _ocr._instance is not None
-    table_ready = _table._instance is not None
+    table_ready = (not settings.tables_enabled) or _table._instance is not None
    ready = ocr_ready and table_ready
    payload = {
        "status": "ready" if ready else "warming_up",
        "version": __version__,
        "models": {
            "paddleocr": "ready" if ocr_ready else "loading",
-            "pp_structure": "ready" if table_ready else "loading",
+            "pp_structure": (
+                "disabled" if not settings.tables_enabled else "ready" if table_ready else "loading"
+            ),
        },
    }
    return JSONResponse(content=payload, status_code=200 if ready else 503)
--- a/src/ocr_sprint/main.py
+++ b/src/ocr_sprint/main.py
@@ -88,6 +88,17 @@ def create_app() -> FastAPI:
    )

    register_error_handlers(app)
+
+    # CORS — allow frontend dev servers and production origins
+    from fastapi.middleware.cors import CORSMiddleware
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
    app.add_middleware(MetricsMiddleware)
    app.include_router(health.router, prefix="/api/v1")
    app.include_router(documents.router, prefix="/api/v1")
--- a/src/ocr_sprint/pipeline/ocr.py
+++ b/src/ocr_sprint/pipeline/ocr.py
@@ -122,9 +122,14 @@ class OCRPage:


 def _build_paddleocr() -> PaddleOCR:
+    s = get_settings()
+    if s.ocr_use_gpu:
+        from ocr_sprint.utils.gpu import configure_nvidia_dll_path
+
+        configure_nvidia_dll_path()
+
    from paddleocr import PaddleOCR

-    s = get_settings()
    kwargs: dict[str, object] = {
        "lang": s.ocr_lang,
        "use_angle_cls": True,
--- a/src/ocr_sprint/pipeline/table.py
+++ b/src/ocr_sprint/pipeline/table.py
@@ -67,24 +67,41 @@ class DetectedTable:
 # ---------- PP-Structure singleton ----------


+def _create_pp_structure(
+    pp_structure_cls: type[PPStructure], pp_lang: str, use_gpu: bool
+) -> PPStructure:
+    # layout=True so that PP-Structure also returns figure/text regions; we
+    # filter to tables only afterwards. show_log=False to keep stdout clean.
+    return pp_structure_cls(
+        lang=pp_lang,
+        use_gpu=use_gpu,
+        layout=True,
+        show_log=False,
+    )
+
+
 def _build_pp_structure() -> PPStructure:
+    s = get_settings()
+    if s.ocr_use_gpu:
+        from ocr_sprint.utils.gpu import configure_nvidia_dll_path
+
+        configure_nvidia_dll_path()
+
    from paddleocr import PPStructure

-    s = get_settings()
    # PPStructure layout models only support 'en' and 'ch', not 'latin'.
    # Use 'en' for layout/table detection — it's language-agnostic (detects
    # table structure, not text language). OCR within cells still works for
    # Indonesian text because the recognition model handles Latin scripts.
    pp_lang = "en" if s.ocr_lang not in ("en", "ch") else s.ocr_lang
    _logger.info("pp_structure.init", lang=pp_lang, use_gpu=s.ocr_use_gpu)
-    # layout=True so that PP-Structure also returns figure/text regions; we
-    # filter to tables only afterwards. show_log=False to keep stdout clean.
-    return PPStructure(
-        lang=pp_lang,
-        use_gpu=s.ocr_use_gpu,
-        layout=True,
-        show_log=False,
-    )
+    try:
+        return _create_pp_structure(PPStructure, pp_lang, s.ocr_use_gpu)
+    except Exception as exc:
+        if not s.ocr_use_gpu:
+            raise
+        _logger.warning("pp_structure.gpu_init_failed_falling_back_cpu", error=str(exc))
+        return _create_pp_structure(PPStructure, pp_lang, False)


 def get_pp_structure() -> PPStructure:
--- a/src/ocr_sprint/schemas/document.py
+++ b/src/ocr_sprint/schemas/document.py
@@ -10,7 +10,6 @@ from uuid import UUID, uuid4
 from pydantic import BaseModel, ConfigDict, Field

 from ocr_sprint.schemas.extraction import ExtractionResult
-from ocr_sprint.schemas.personnel import PersonnelEntry


 class SourceKind(str, Enum):
@@ -53,7 +52,7 @@ class DocumentResponse(BaseModel):
    job_id: UUID
    status: DocumentStatus
    confidence: float | None = None
-    data: list[PersonnelEntry] | None = None
+    data: ExtractionResult | None = None
    review_flags: list[str] = Field(default_factory=list)
    error: str | None = None
    # Phase 6 — HITL review state.
--- a/src/ocr_sprint/utils/gpu.py
+++ b/src/ocr_sprint/utils/gpu.py
@@ -0,0 +1,57 @@
+"""GPU runtime helpers."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+_DLL_HANDLES: list[object] = []
+_CONFIGURED = False
+
+
+def configure_nvidia_dll_path() -> None:
+    """Expose NVIDIA wheel DLL directories to the Windows dynamic loader.
+
+    Paddle's Windows GPU wheels dynamically load CUDA/cuDNN DLLs by name. When
+    those DLLs come from Python packages such as ``nvidia-cudnn-cu11`` instead
+    of a system-wide CUDA Toolkit install, their ``bin`` folders are not on
+    ``PATH`` by default.
+    """
+    global _CONFIGURED
+    if _CONFIGURED or os.name != "nt":
+        return
+
+    package_names = ("nvidia.cudnn", "nvidia.cublas", "nvidia.cuda_nvrtc")
+    dll_dirs: list[Path] = []
+    for package_name in package_names:
+        try:
+            module = __import__(package_name, fromlist=["__file__"])
+        except Exception:
+            continue
+        module_file = getattr(module, "__file__", None)
+        if not module_file:
+            continue
+        dll_dir = Path(module_file).resolve().parent / "bin"
+        if dll_dir.is_dir():
+            dll_dirs.append(dll_dir)
+
+    if not dll_dirs:
+        _CONFIGURED = True
+        return
+
+    current_path_parts = os.environ.get("PATH", "").split(os.pathsep)
+    current_path_norm = {part.casefold() for part in current_path_parts if part}
+
+    prepend: list[str] = []
+    for dll_dir in dll_dirs:
+        dll_dir_str = str(dll_dir)
+        if dll_dir_str.casefold() not in current_path_norm:
+            prepend.append(dll_dir_str)
+        add_dll_directory = getattr(os, "add_dll_directory", None)
+        if add_dll_directory is not None:
+            _DLL_HANDLES.append(add_dll_directory(dll_dir_str))
+
+    if prepend:
+        os.environ["PATH"] = os.pathsep.join([*prepend, os.environ.get("PATH", "")])
+
+    _CONFIGURED = True
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -2,8 +2,12 @@

 from __future__ import annotations

+import sys
+from types import ModuleType, SimpleNamespace
+
 import pytest

+from ocr_sprint.pipeline import table as table_module
 from ocr_sprint.pipeline.table import (
    DetectedTable,
    extract_tables_from_pp_result,
@@ -82,6 +86,34 @@ class TestDetectedTable:
        assert table.n_cols == 0


+class TestPpStructureInit:
+    def test_gpu_init_falls_back_to_cpu(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        calls: list[dict[str, object]] = []
+
+        class FakePPStructure:
+            def __init__(self, **kwargs: object) -> None:
+                calls.append(kwargs)
+                if kwargs["use_gpu"]:
+                    raise RuntimeError("gpu init failed")
+
+        fake_paddleocr = ModuleType("paddleocr")
+        fake_paddleocr.PPStructure = FakePPStructure
+        monkeypatch.setitem(sys.modules, "paddleocr", fake_paddleocr)
+        monkeypatch.setattr(
+            table_module,
+            "get_settings",
+            lambda: SimpleNamespace(ocr_lang="latin", ocr_use_gpu=True),
+        )
+
+        engine = table_module._build_pp_structure()
+
+        assert isinstance(engine, FakePPStructure)
+        assert calls == [
+            {"lang": "en", "use_gpu": True, "layout": True, "show_log": False},
+            {"lang": "en", "use_gpu": False, "layout": True, "show_log": False},
+        ]
+
+
@pytest.fixture
 def sample_personnel_table() -> DetectedTable:
    """Header + three personnel rows in a typical Polres-level format."""
--- a/update.ps1
+++ b/update.ps1
@@ -1,24 +1,178 @@
 #!/usr/bin/env pwsh
 # update.ps1 - One-command update & restart for ocr-sprint-service (local dev)

+param(
+    [ValidateSet("cpu", "gpu")]
+    [string] $OcrMode
+)
+
+$ErrorActionPreference = "Stop"
+
 $Port = 8000
+$ProjectRoot = $PSScriptRoot
+$VenvDir = Join-Path $ProjectRoot ".venv"
+$Python = Join-Path $VenvDir "Scripts\python.exe"
+
+function Invoke-Step {
+    param(
+        [Parameter(Mandatory = $true)]
+        [scriptblock] $Command,
+        [Parameter(Mandatory = $true)]
+        [string] $FailureMessage
+    )
+
+    & $Command
+    if ($LASTEXITCODE -ne 0) {
+        Write-Host "  $FailureMessage" -ForegroundColor Red
+        exit $LASTEXITCODE
+    }
+}
+
+function Get-DotEnvValue {
+    param(
+        [Parameter(Mandatory = $true)]
+        [string] $Name
+    )
+
+    $envFile = Join-Path $ProjectRoot ".env"
+    if (Test-Path $envFile) {
+        $line = Get-Content $envFile | Where-Object { $_ -match "^\s*$Name\s*=" } | Select-Object -Last 1
+        if ($line) {
+            return (($line -split "=", 2)[1] -split "\s+#", 2)[0].Trim()
+        }
+    }
+    return [Environment]::GetEnvironmentVariable($Name)
+}
+
+function Set-DotEnvValue {
+    param(
+        [Parameter(Mandatory = $true)]
+        [string] $Name,
+        [Parameter(Mandatory = $true)]
+        [string] $Value
+    )
+
+    $envFile = Join-Path $ProjectRoot ".env"
+    if (-not (Test-Path $envFile)) {
+        New-Item -Path $envFile -ItemType File | Out-Null
+    }
+
+    $lines = @(Get-Content $envFile)
+    $updated = $false
+    for ($i = 0; $i -lt $lines.Count; $i++) {
+        if ($lines[$i] -match "^\s*$Name\s*=") {
+            $comment = ""
+            if ($lines[$i] -match "(\s+#.*)$") {
+                $comment = $Matches[1]
+            }
+            $lines[$i] = "$Name=$Value$comment"
+            $updated = $true
+        }
+    }
+    if (-not $updated) {
+        $lines += "$Name=$Value"
+    }
+    Set-Content -Path $envFile -Value $lines
+}
+
+function Test-PythonPackage {
+    param(
+        [Parameter(Mandatory = $true)]
+        [string] $Name
+    )
+
+    & $Python -m pip show $Name *> $null
+    return $LASTEXITCODE -eq 0
+}
+
+function Add-NvidiaDllPaths {
+    $dllDirs = @(
+        (Join-Path $VenvDir "Lib\site-packages\nvidia\cudnn\bin"),
+        (Join-Path $VenvDir "Lib\site-packages\nvidia\cublas\bin"),
+        (Join-Path $VenvDir "Lib\site-packages\nvidia\cuda_nvrtc\bin")
+    )
+    foreach ($dir in $dllDirs) {
+        if ((Test-Path $dir) -and (($env:PATH -split ";") -notcontains $dir)) {
+            $env:PATH = "$dir;$env:PATH"
+        }
+    }
+}
+
+Set-Location $ProjectRoot
+
+if (-not (Test-Path $Python)) {
+    Write-Host "Virtualenv not found at $VenvDir. Creating one..." -ForegroundColor Yellow
+    $venvCreated = $false
+    $pythonLauncher = Get-Command py -ErrorAction SilentlyContinue
+    if ($pythonLauncher) {
+        foreach ($version in @("3.12", "3.11", "3.10")) {
+            & py "-$version" -m venv $VenvDir 2>$null
+            if ($LASTEXITCODE -eq 0) {
+                $venvCreated = $true
+                break
+            }
+        }
+    }
+    if (-not $venvCreated) {
+        $systemPython = Get-Command python -ErrorAction SilentlyContinue
+        if (-not $systemPython) {
+            Write-Host "  Python was not found. Install Python 3.10-3.12, then rerun this script." -ForegroundColor Red
+            exit 1
+        }
+        & python -m venv $VenvDir
+        $venvCreated = ($LASTEXITCODE -eq 0)
+    }
+    if (-not $venvCreated) {
+        Write-Host "  Failed to create virtualenv." -ForegroundColor Red
+        exit $LASTEXITCODE
+    }
+}
+
+$env:VIRTUAL_ENV = $VenvDir
+$env:PATH = "$(Join-Path $VenvDir 'Scripts');$env:PATH"
+
+if ($PSBoundParameters.ContainsKey("OcrMode")) {
+    $ocrUseGpuValue = if ($OcrMode -eq "gpu") { "true" } else { "false" }
+    Set-DotEnvValue "OCR_USE_GPU" $ocrUseGpuValue
+    $env:OCR_USE_GPU = $ocrUseGpuValue
+    Write-Host "OCR mode set to $($OcrMode.ToUpperInvariant()) and saved to .env." -ForegroundColor Green
+}

 # ── [1/5] Git pull ──────────────────────────────────────────────────────────
 Write-Host "`n[1/5] Pulling latest code..." -ForegroundColor Cyan
-git pull
+Invoke-Step { git pull } "Git pull failed."

 # ── [2/5] Install/update dependencies ───────────────────────────────────────
 Write-Host "`n[2/5] Installing/updating dependencies..." -ForegroundColor Cyan
-pip install -e ".[dev]" -q
+Invoke-Step { & $Python -m pip install -e ".[dev]" -q } "Dependency install failed."
+
+$ocrUseGpu = (Get-DotEnvValue "OCR_USE_GPU")
+if ($ocrUseGpu -and $ocrUseGpu.ToLowerInvariant() -in @("1", "true", "yes", "on")) {
+    Write-Host "  GPU mode enabled; checking Paddle CUDA runtime..." -ForegroundColor Cyan
+    if (-not (Test-PythonPackage "paddlepaddle-gpu")) {
+        Invoke-Step {
+            & $Python -m pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ -q
+        } "Paddle GPU install failed."
+    }
+    if (-not (Test-PythonPackage "nvidia-cudnn-cu11")) {
+        Invoke-Step { & $Python -m pip install nvidia-cudnn-cu11==8.9.5.29 -q } "NVIDIA cuDNN install failed."
+    }
+    Add-NvidiaDllPaths
+} else {
+    Write-Host "  CPU mode enabled; checking Paddle CPU runtime..." -ForegroundColor Cyan
+    if (-not ((Test-PythonPackage "paddlepaddle") -or (Test-PythonPackage "paddlepaddle-gpu"))) {
+        Invoke-Step { & $Python -m pip install paddlepaddle==2.6.2 -q } "Paddle CPU install failed."
+    }
+}

 # ── [3/5] Database migration ─────────────────────────────────────────────────
 Write-Host "`n[3/5] Running database migrations..." -ForegroundColor Cyan
-alembic upgrade head
+& $Python -m alembic upgrade head
 if ($LASTEXITCODE -ne 0) {
    Write-Host "  Migration conflict detected, stamping current state as head..." -ForegroundColor Yellow
-    alembic stamp head
+    Invoke-Step { & $Python -m alembic stamp head } "Alembic stamp failed."
    Write-Host "  Retrying upgrade for any remaining new migrations..." -ForegroundColor Yellow
-    alembic upgrade head
+    & $Python -m alembic upgrade head
    if ($LASTEXITCODE -ne 0) {
        Write-Host "  Migration still failed. Please check alembic manually." -ForegroundColor Red
        exit 1
@@ -57,4 +211,4 @@ if ($connections) {

 # ── [5/5] Start dev server ───────────────────────────────────────────────────
 Write-Host "`n[5/5] Starting dev server on port $Port (Ctrl+C to stop)..." -ForegroundColor Cyan
-uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port
+& $Python -m uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port