diff --git a/.env.example b/.env.example index 68b2964..4b3a0e7 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,8 @@ STORAGE_LOCAL_DIR=./storage # ==== OCR ==== OCR_LANG=latin # PaddleOCR lang code; "latin" works well for Bahasa Indonesia OCR_USE_GPU=false # set true if running on a GPU host -OCR_DET_MODEL_DIR= # leave empty to use PaddleOCR defaults +# Leave empty to use PaddleOCR defaults. +OCR_DET_MODEL_DIR= OCR_REC_MODEL_DIR= OCR_CLS_MODEL_DIR= OCR_MAX_IMAGE_SIDE=2200 # downscale longest side before OCR diff --git a/docs/FRONTEND-INTEGRATION.md b/docs/FRONTEND-INTEGRATION.md new file mode 100644 index 0000000..7d74e38 --- /dev/null +++ b/docs/FRONTEND-INTEGRATION.md @@ -0,0 +1,537 @@ +# Frontend Integration Guide + +Dokumen ini menjelaskan kontrak API yang perlu dipakai frontend untuk upload dokumen sprint, menampilkan hasil OCR, menjalankan review manual, dan approve hasil final. + +## Base URL + +Default local API: + +```text +http://localhost:8000/api/v1 +``` + +Untuk frontend, simpan URL di environment variable: + +```env +VITE_OCR_API_BASE_URL=http://localhost:8000/api/v1 +``` + +Jika `API_KEYS` di backend diisi, semua endpoint protected membutuhkan header: + +```http +X-API-Key: +``` + +Catatan: jangan expose API key production di frontend publik. Untuk deployment internal, gunakan reverse proxy atau session backend-for-frontend jika aksesnya tidak sepenuhnya trusted. + +## Health Check + +```http +GET /health +GET /health/ready +``` + +Contoh response `/health`: + +```json +{ + "status": "ok", + "version": "0.1.0" +} +``` + +Contoh response `/health/ready`: + +```json +{ + "status": "ready", + "version": "0.1.0", + "models": { + "paddleocr": "ready", + "pp_structure": "disabled" + } +} +``` + +Gunakan `/health/ready` untuk disable upload button sampai model OCR siap. + +## Upload Dokumen + +Endpoint: + +```http +POST /documents +POST /documents?sync=true +``` + +Body harus `multipart/form-data` dengan field `file`. + +Backend menerima PDF dan format image umum. Default max upload mengikuti backend config `BLOB_MAX_UPLOAD_MB`, saat ini 25 MB. + +### Recommended Flow + +Untuk frontend production, gunakan async flow: + +1. `POST /documents` +2. Jika status HTTP `202`, ambil `job_id` +3. Poll `GET /documents/{job_id}` setiap 1-3 detik +4. Stop polling saat status `completed`, `needs_review`, atau `failed` + +Untuk local dev sederhana, `POST /documents?sync=true` boleh dipakai, tetapi request bisa lama karena OCR berjalan inline. + +### Upload Example + +```ts +const API_BASE = import.meta.env.VITE_OCR_API_BASE_URL; +const API_KEY = import.meta.env.VITE_OCR_API_KEY; + +async function uploadDocument(file: File) { + const form = new FormData(); + form.append("file", file); + + const res = await fetch(`${API_BASE}/documents`, { + method: "POST", + headers: API_KEY ? { "X-API-Key": API_KEY } : undefined, + body: form, + }); + + if (!res.ok) { + throw await readApiError(res); + } + + return (await res.json()) as DocumentResponse; +} +``` + +## Polling Job + +Endpoint: + +```http +GET /documents/{job_id} +``` + +```ts +const TERMINAL_STATUSES = new Set(["completed", "needs_review", "failed"]); + +async function getDocument(jobId: string) { + const res = await fetch(`${API_BASE}/documents/${jobId}`, { + headers: API_KEY ? { "X-API-Key": API_KEY } : undefined, + }); + + if (!res.ok) { + throw await readApiError(res); + } + + return (await res.json()) as DocumentResponse; +} + +async function pollDocument(jobId: string, onUpdate: (doc: DocumentResponse) => void) { + while (true) { + const doc = await getDocument(jobId); + onUpdate(doc); + + if (TERMINAL_STATUSES.has(doc.status)) { + return doc; + } + + await new Promise((resolve) => setTimeout(resolve, 2000)); + } +} +``` + +## Response Schema + +### DocumentResponse + +```ts +type DocumentStatus = + | "pending" + | "processing" + | "completed" + | "needs_review" + | "failed"; + +type DocumentResponse = { + job_id: string; + status: DocumentStatus; + confidence: number | null; + data: ExtractionResult | null; + review_flags: ReviewFlag[]; + error: string | null; + approved: boolean; + reviewed_by: string | null; + reviewed_at: string | null; +}; +``` + +### ExtractionResult + +```ts +type ExtractionResult = { + header: HeaderFields; + personel: PersonnelEntry[]; + untuk: string[]; + ttd: Signatory; + raw_text: string; + confidence: number; + review_flags: ReviewFlag[]; +}; + +type HeaderFields = { + nomor_sprint: string | null; + tanggal: string | null; // YYYY-MM-DD + satuan_penerbit: string | null; + perihal: string | null; + dasar: string[]; +}; + +type PersonnelEntry = { + no: number | null; + pangkat: string | null; + nrp: string | null; + nama: string | null; + jabatan_dinas: string | null; + jabatan_sprint: string | null; + keterangan: string | null; + confidence: number; +}; + +type Signatory = { + nama: string | null; + pangkat: string | null; + nrp: string | null; + jabatan: string | null; +}; +``` + +### Review Flags + +```ts +type ReviewFlag = + | "low_ocr_confidence" + | "missing_field" + | "invalid_nrp" + | "unknown_pangkat" + | "personnel_count_mismatch" + | "date_parse_failed" + | "llm_fallback" + | "llm_unavailable" + | "personnel_text_fallback" + | "personnel_text_fallback_no_nrp" + | "incomplete_personnel_row"; +``` + +Recommended UI labels: + +| Flag | Label | +|---|---| +| `low_ocr_confidence` | Confidence OCR rendah | +| `missing_field` | Field wajib belum lengkap | +| `invalid_nrp` | NRP tidak valid | +| `unknown_pangkat` | Pangkat tidak dikenali | +| `personnel_count_mismatch` | Jumlah personel perlu dicek | +| `date_parse_failed` | Tanggal gagal dibaca | +| `llm_fallback` | Sebagian field diisi fallback LLM | +| `llm_unavailable` | LLM tidak tersedia | +| `personnel_text_fallback` | Personel dibaca dari fallback teks | +| `personnel_text_fallback_no_nrp` | Personel dibaca tanpa NRP | +| `incomplete_personnel_row` | Baris personel belum lengkap | + +## Example Final Response + +```json +{ + "job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a", + "status": "needs_review", + "confidence": 0.82, + "data": { + "header": { + "nomor_sprint": "Sprin/123/IV/2026", + "tanggal": "2026-04-21", + "satuan_penerbit": "POLRES BANJAR", + "perihal": "Instruktur Ops Pekat I Lodaya 2026", + "dasar": [] + }, + "personel": [ + { + "no": 1, + "pangkat": "IPDA", + "nrp": "12345678", + "nama": "BUDI SANTOSO", + "jabatan_dinas": "KANIT", + "jabatan_sprint": "INSTRUKTUR", + "keterangan": null, + "confidence": 0.91 + } + ], + "untuk": ["Melaksanakan kegiatan sesuai surat perintah."], + "ttd": { + "nama": "AGUS", + "pangkat": "AKBP", + "nrp": "87654321", + "jabatan": "KAPOLRES" + }, + "raw_text": "full OCR text...", + "confidence": 0.82, + "review_flags": ["low_ocr_confidence"] + }, + "review_flags": ["low_ocr_confidence"], + "error": null, + "approved": false, + "reviewed_by": null, + "reviewed_at": null +} +``` + +`raw_text` bisa panjang. Tampilkan di collapsible/debug panel, bukan di layar utama. + +## Review dan Koreksi HITL + +Frontend review screen sebaiknya mengizinkan editor untuk: + +- Header: nomor sprint, tanggal, satuan penerbit, perihal, dasar +- Personel: pangkat, NRP, nama, jabatan dinas, jabatan sprint, keterangan +- Untuk: daftar tugas +- TTD: nama, pangkat, NRP, jabatan + +### Patch Corrections + +Endpoint: + +```http +PATCH /documents/{job_id} +``` + +Body: + +```json +{ + "corrections": [ + { + "path": "header.perihal", + "value": "Pelaksanaan Operasi Pekat I Lodaya 2026", + "reason": "OCR membaca perihal tidak lengkap" + }, + { + "path": "personel[0].nama", + "value": "BUDI SANTOSO", + "reason": "Perbaikan nama" + } + ] +} +``` + +Header opsional untuk audit trail: + +```http +X-User-Id: reviewer-a +``` + +Path yang umum dipakai: + +```text +header.nomor_sprint +header.tanggal +header.satuan_penerbit +header.perihal +header.dasar +ttd.nama +ttd.pangkat +ttd.nrp +ttd.jabatan +personel[0].pangkat +personel[0].nrp +personel[0].nama +personel[0].jabatan_dinas +personel[0].jabatan_sprint +personel[0].keterangan +untuk +``` + +Semua correction dalam satu request bersifat atomic. Jika satu path invalid, seluruh batch ditolak dan tidak ada perubahan disimpan. + +### Patch Example + +```ts +async function patchDocument(jobId: string, corrections: FieldCorrection[], userId?: string) { + const headers: Record = { "Content-Type": "application/json" }; + if (API_KEY) headers["X-API-Key"] = API_KEY; + if (userId) headers["X-User-Id"] = userId; + + const res = await fetch(`${API_BASE}/documents/${jobId}`, { + method: "PATCH", + headers, + body: JSON.stringify({ corrections }), + }); + + if (!res.ok) { + throw await readApiError(res); + } + + return (await res.json()) as DocumentResponse; +} + +type FieldCorrection = { + path: string; + value: unknown; + reason?: string | null; +}; +``` + +## Correction History + +Endpoint: + +```http +GET /documents/{job_id}/history +``` + +Response: + +```ts +type CorrectionEventResponse = { + id: number; + job_id: string; + field_path: string; + old_value: unknown | null; + new_value: unknown | null; + corrected_by: string | null; + reason: string | null; + corrected_at: string; +}; +``` + +Gunakan endpoint ini untuk audit panel di halaman review. + +## Approve Final Result + +Endpoint: + +```http +POST /documents/{job_id}/approve +``` + +Header opsional: + +```http +X-User-Id: reviewer-a +``` + +Response: + +```json +{ + "job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a", + "approved": true, + "reviewed_by": "reviewer-a", + "reviewed_at": "2026-04-26T16:30:00" +} +``` + +Setelah approved, `PATCH /documents/{job_id}` akan ditolak dengan `409`. + +## Error Handling + +Application errors: + +```json +{ + "error": "UnsupportedDocumentError", + "message": "Uploaded file is empty." +} +``` + +FastAPI validation errors memakai shape standar: + +```json +{ + "detail": [ + { + "type": "missing", + "loc": ["body", "file"], + "msg": "Field required" + } + ] +} +``` + +Helper error: + +```ts +async function readApiError(res: Response) { + let payload: unknown = null; + try { + payload = await res.json(); + } catch { + payload = await res.text(); + } + + return { + status: res.status, + payload, + }; +} +``` + +Recommended UI handling: + +| HTTP Status | UI Handling | +|---|---| +| `400` | Tampilkan pesan validasi/upload | +| `401` | Session/API key tidak valid | +| `404` | Job tidak ditemukan | +| `409` | Job belum selesai atau sudah approved | +| `422` | Form correction tidak valid | +| `500` | Tampilkan error umum dan minta operator cek log backend | + +## Ground Truth Admin + +Endpoint ini opsional untuk dashboard admin/training data. + +```http +GET /ground-truth/stats?top_n=10 +GET /ground-truth/export?approved_only=true&has_corrections=true&limit=1000 +``` + +`/ground-truth/export` mengembalikan `application/x-ndjson`, satu JSON per baris. Frontend biasanya cukup menyediakan tombol download, bukan parse seluruh stream di browser. + +## Recommended Screens + +1. Upload screen + - Dropzone file PDF/image + - Health readiness badge + - Upload progress + - Processing state setelah `job_id` diterima + +2. Result screen + - Status badge + - Confidence score + - Review flags + - Header summary + - Personnel table + - Untuk list + - TTD section + - Raw OCR collapsible + +3. Review screen + - Editable fields + - Dirty-state tracking + - Correction reason input + - Save corrections via `PATCH` + - History panel + - Approve button + +4. Admin screen + - Health/ready status + - Ground-truth stats + - Export approved samples + +## UX Rules + +- Jangan tunggu `POST /documents?sync=true` untuk production UI; gunakan async + polling. +- Disable approve kalau status masih `pending` atau `processing`. +- Tampilkan `needs_review` sebagai hasil yang berhasil diproses tetapi perlu validasi manusia. +- Jangan render `raw_text` sebagai konten utama. +- Pada `failed`, tampilkan `error` dari response jika ada. +- Pada confidence rendah, arahkan user ke review fields yang punya flag terkait. diff --git a/docs/OCR-RUNTIME-MODES.md b/docs/OCR-RUNTIME-MODES.md new file mode 100644 index 0000000..93c5798 --- /dev/null +++ b/docs/OCR-RUNTIME-MODES.md @@ -0,0 +1,49 @@ +# OCR Runtime Modes + +Backend OCR bisa dijalankan dalam mode CPU atau GPU lewat konfigurasi `OCR_USE_GPU`. + +## Cara Pakai + +Mode CPU: + +```powershell +.\update.ps1 -OcrMode cpu +``` + +Mode GPU: + +```powershell +.\update.ps1 -OcrMode gpu +``` + +Jika parameter tidak diberikan, `update.ps1` memakai nilai yang sudah ada di `.env`. + +```env +OCR_USE_GPU=false +``` + +atau: + +```env +OCR_USE_GPU=true +``` + +## Perilaku Script + +- `-OcrMode cpu` menyimpan `OCR_USE_GPU=false` ke `.env`. +- `-OcrMode gpu` menyimpan `OCR_USE_GPU=true` ke `.env`. +- Script tidak menghapus package Paddle/CUDA yang sudah terpasang. +- Dalam mode GPU, script akan memasang `paddlepaddle-gpu` dan runtime cuDNN/cuBLAS jika belum ada. +- Dalam mode CPU, script hanya memasang `paddlepaddle` CPU jika belum ada runtime Paddle sama sekali. + +## Catatan + +Mode CPU tidak membutuhkan CUDA, cuDNN, atau driver NVIDIA. + +Mode GPU membutuhkan NVIDIA driver dan runtime CUDA/cuDNN yang cocok. Pada Windows, backend juga menambahkan folder DLL NVIDIA dari `.venv` secara otomatis sebelum PaddleOCR diinisialisasi. + +`TABLES_ENABLED` adalah konfigurasi terpisah dari mode CPU/GPU. Jika PP-Structure belum stabil di environment lokal, biarkan: + +```env +TABLES_ENABLED=false +``` diff --git a/src/ocr_sprint/api/routes/documents.py b/src/ocr_sprint/api/routes/documents.py index 88d42ac..e0ef4aa 100644 --- a/src/ocr_sprint/api/routes/documents.py +++ b/src/ocr_sprint/api/routes/documents.py @@ -28,13 +28,6 @@ from functools import partial from typing import Annotated from uuid import UUID, uuid4 -# Thread pool dedicated to blocking OCR work. Using a *separate* pool -# (rather than the default loop executor) lets us cap the number of -# concurrent heavy OCR jobs independently of other thread-pool users. -# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR -# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server. -_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline") - from fastapi import ( APIRouter, Depends, @@ -73,6 +66,13 @@ from ocr_sprint.schemas.review import ( from ocr_sprint.storage.blob import get_blob_storage from ocr_sprint.utils.logging import get_logger +# Thread pool dedicated to blocking OCR work. Using a *separate* pool +# (rather than the default loop executor) lets us cap the number of +# concurrent heavy OCR jobs independently of other thread-pool users. +# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR +# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server. +_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline") + router = APIRouter( prefix="/documents", tags=["documents"], @@ -99,18 +99,17 @@ def _row_to_response(row: object) -> DocumentResponse: assert isinstance(row, JobRow) status_enum = DocumentStatus(row.status) - personel_list = None + result_obj = None if row.result is not None: result_obj = ExtractionResult.model_validate(row.result) # Auto-number personnel entries sequentially (1, 2, 3, ...) for idx, entry in enumerate(result_obj.personel, start=1): entry.no = idx - personel_list = result_obj.personel return DocumentResponse( job_id=row.job_id, status=status_enum, confidence=row.confidence, - data=personel_list, + data=result_obj, review_flags=list(row.review_flags or []), error=row.error, approved=bool(row.approved), diff --git a/src/ocr_sprint/api/routes/health.py b/src/ocr_sprint/api/routes/health.py index d3786a7..5ae1972 100644 --- a/src/ocr_sprint/api/routes/health.py +++ b/src/ocr_sprint/api/routes/health.py @@ -6,6 +6,7 @@ from fastapi import APIRouter from fastapi.responses import JSONResponse from ocr_sprint import __version__ +from ocr_sprint.config import get_settings from ocr_sprint.pipeline import ocr as _ocr from ocr_sprint.pipeline import table as _table @@ -21,15 +22,18 @@ async def health() -> dict[str, str]: @router.get("/health/ready") async def readiness() -> JSONResponse: """Readiness check — returns 200 when OCR models are loaded, 503 if still warming up.""" + settings = get_settings() ocr_ready = _ocr._instance is not None - table_ready = _table._instance is not None + table_ready = (not settings.tables_enabled) or _table._instance is not None ready = ocr_ready and table_ready payload = { "status": "ready" if ready else "warming_up", "version": __version__, "models": { "paddleocr": "ready" if ocr_ready else "loading", - "pp_structure": "ready" if table_ready else "loading", + "pp_structure": ( + "disabled" if not settings.tables_enabled else "ready" if table_ready else "loading" + ), }, } return JSONResponse(content=payload, status_code=200 if ready else 503) diff --git a/src/ocr_sprint/main.py b/src/ocr_sprint/main.py index 21acd41..2007c9e 100644 --- a/src/ocr_sprint/main.py +++ b/src/ocr_sprint/main.py @@ -88,6 +88,17 @@ def create_app() -> FastAPI: ) register_error_handlers(app) + + # CORS — allow frontend dev servers and production origins + from fastapi.middleware.cors import CORSMiddleware + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + app.add_middleware(MetricsMiddleware) app.include_router(health.router, prefix="/api/v1") app.include_router(documents.router, prefix="/api/v1") diff --git a/src/ocr_sprint/pipeline/ocr.py b/src/ocr_sprint/pipeline/ocr.py index c9893b3..6320de6 100644 --- a/src/ocr_sprint/pipeline/ocr.py +++ b/src/ocr_sprint/pipeline/ocr.py @@ -122,9 +122,14 @@ class OCRPage: def _build_paddleocr() -> PaddleOCR: + s = get_settings() + if s.ocr_use_gpu: + from ocr_sprint.utils.gpu import configure_nvidia_dll_path + + configure_nvidia_dll_path() + from paddleocr import PaddleOCR - s = get_settings() kwargs: dict[str, object] = { "lang": s.ocr_lang, "use_angle_cls": True, diff --git a/src/ocr_sprint/pipeline/table.py b/src/ocr_sprint/pipeline/table.py index a6bd7bd..f75a7c3 100644 --- a/src/ocr_sprint/pipeline/table.py +++ b/src/ocr_sprint/pipeline/table.py @@ -67,24 +67,41 @@ class DetectedTable: # ---------- PP-Structure singleton ---------- +def _create_pp_structure( + pp_structure_cls: type[PPStructure], pp_lang: str, use_gpu: bool +) -> PPStructure: + # layout=True so that PP-Structure also returns figure/text regions; we + # filter to tables only afterwards. show_log=False to keep stdout clean. + return pp_structure_cls( + lang=pp_lang, + use_gpu=use_gpu, + layout=True, + show_log=False, + ) + + def _build_pp_structure() -> PPStructure: + s = get_settings() + if s.ocr_use_gpu: + from ocr_sprint.utils.gpu import configure_nvidia_dll_path + + configure_nvidia_dll_path() + from paddleocr import PPStructure - s = get_settings() # PPStructure layout models only support 'en' and 'ch', not 'latin'. # Use 'en' for layout/table detection — it's language-agnostic (detects # table structure, not text language). OCR within cells still works for # Indonesian text because the recognition model handles Latin scripts. pp_lang = "en" if s.ocr_lang not in ("en", "ch") else s.ocr_lang _logger.info("pp_structure.init", lang=pp_lang, use_gpu=s.ocr_use_gpu) - # layout=True so that PP-Structure also returns figure/text regions; we - # filter to tables only afterwards. show_log=False to keep stdout clean. - return PPStructure( - lang=pp_lang, - use_gpu=s.ocr_use_gpu, - layout=True, - show_log=False, - ) + try: + return _create_pp_structure(PPStructure, pp_lang, s.ocr_use_gpu) + except Exception as exc: + if not s.ocr_use_gpu: + raise + _logger.warning("pp_structure.gpu_init_failed_falling_back_cpu", error=str(exc)) + return _create_pp_structure(PPStructure, pp_lang, False) def get_pp_structure() -> PPStructure: diff --git a/src/ocr_sprint/schemas/document.py b/src/ocr_sprint/schemas/document.py index 12f7a69..3269539 100644 --- a/src/ocr_sprint/schemas/document.py +++ b/src/ocr_sprint/schemas/document.py @@ -10,7 +10,6 @@ from uuid import UUID, uuid4 from pydantic import BaseModel, ConfigDict, Field from ocr_sprint.schemas.extraction import ExtractionResult -from ocr_sprint.schemas.personnel import PersonnelEntry class SourceKind(str, Enum): @@ -53,7 +52,7 @@ class DocumentResponse(BaseModel): job_id: UUID status: DocumentStatus confidence: float | None = None - data: list[PersonnelEntry] | None = None + data: ExtractionResult | None = None review_flags: list[str] = Field(default_factory=list) error: str | None = None # Phase 6 — HITL review state. diff --git a/src/ocr_sprint/utils/gpu.py b/src/ocr_sprint/utils/gpu.py new file mode 100644 index 0000000..1a1783d --- /dev/null +++ b/src/ocr_sprint/utils/gpu.py @@ -0,0 +1,57 @@ +"""GPU runtime helpers.""" + +from __future__ import annotations + +import os +from pathlib import Path + +_DLL_HANDLES: list[object] = [] +_CONFIGURED = False + + +def configure_nvidia_dll_path() -> None: + """Expose NVIDIA wheel DLL directories to the Windows dynamic loader. + + Paddle's Windows GPU wheels dynamically load CUDA/cuDNN DLLs by name. When + those DLLs come from Python packages such as ``nvidia-cudnn-cu11`` instead + of a system-wide CUDA Toolkit install, their ``bin`` folders are not on + ``PATH`` by default. + """ + global _CONFIGURED + if _CONFIGURED or os.name != "nt": + return + + package_names = ("nvidia.cudnn", "nvidia.cublas", "nvidia.cuda_nvrtc") + dll_dirs: list[Path] = [] + for package_name in package_names: + try: + module = __import__(package_name, fromlist=["__file__"]) + except Exception: + continue + module_file = getattr(module, "__file__", None) + if not module_file: + continue + dll_dir = Path(module_file).resolve().parent / "bin" + if dll_dir.is_dir(): + dll_dirs.append(dll_dir) + + if not dll_dirs: + _CONFIGURED = True + return + + current_path_parts = os.environ.get("PATH", "").split(os.pathsep) + current_path_norm = {part.casefold() for part in current_path_parts if part} + + prepend: list[str] = [] + for dll_dir in dll_dirs: + dll_dir_str = str(dll_dir) + if dll_dir_str.casefold() not in current_path_norm: + prepend.append(dll_dir_str) + add_dll_directory = getattr(os, "add_dll_directory", None) + if add_dll_directory is not None: + _DLL_HANDLES.append(add_dll_directory(dll_dir_str)) + + if prepend: + os.environ["PATH"] = os.pathsep.join([*prepend, os.environ.get("PATH", "")]) + + _CONFIGURED = True diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index c944269..ea6f27e 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2,8 +2,12 @@ from __future__ import annotations +import sys +from types import ModuleType, SimpleNamespace + import pytest +from ocr_sprint.pipeline import table as table_module from ocr_sprint.pipeline.table import ( DetectedTable, extract_tables_from_pp_result, @@ -82,6 +86,34 @@ class TestDetectedTable: assert table.n_cols == 0 +class TestPpStructureInit: + def test_gpu_init_falls_back_to_cpu(self, monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[dict[str, object]] = [] + + class FakePPStructure: + def __init__(self, **kwargs: object) -> None: + calls.append(kwargs) + if kwargs["use_gpu"]: + raise RuntimeError("gpu init failed") + + fake_paddleocr = ModuleType("paddleocr") + fake_paddleocr.PPStructure = FakePPStructure + monkeypatch.setitem(sys.modules, "paddleocr", fake_paddleocr) + monkeypatch.setattr( + table_module, + "get_settings", + lambda: SimpleNamespace(ocr_lang="latin", ocr_use_gpu=True), + ) + + engine = table_module._build_pp_structure() + + assert isinstance(engine, FakePPStructure) + assert calls == [ + {"lang": "en", "use_gpu": True, "layout": True, "show_log": False}, + {"lang": "en", "use_gpu": False, "layout": True, "show_log": False}, + ] + + @pytest.fixture def sample_personnel_table() -> DetectedTable: """Header + three personnel rows in a typical Polres-level format.""" diff --git a/update.ps1 b/update.ps1 index 7b3b432..d3edb35 100644 --- a/update.ps1 +++ b/update.ps1 @@ -1,24 +1,178 @@ #!/usr/bin/env pwsh # update.ps1 - One-command update & restart for ocr-sprint-service (local dev) +param( + [ValidateSet("cpu", "gpu")] + [string] $OcrMode +) + +$ErrorActionPreference = "Stop" + $Port = 8000 +$ProjectRoot = $PSScriptRoot +$VenvDir = Join-Path $ProjectRoot ".venv" +$Python = Join-Path $VenvDir "Scripts\python.exe" + +function Invoke-Step { + param( + [Parameter(Mandatory = $true)] + [scriptblock] $Command, + [Parameter(Mandatory = $true)] + [string] $FailureMessage + ) + + & $Command + if ($LASTEXITCODE -ne 0) { + Write-Host " $FailureMessage" -ForegroundColor Red + exit $LASTEXITCODE + } +} + +function Get-DotEnvValue { + param( + [Parameter(Mandatory = $true)] + [string] $Name + ) + + $envFile = Join-Path $ProjectRoot ".env" + if (Test-Path $envFile) { + $line = Get-Content $envFile | Where-Object { $_ -match "^\s*$Name\s*=" } | Select-Object -Last 1 + if ($line) { + return (($line -split "=", 2)[1] -split "\s+#", 2)[0].Trim() + } + } + return [Environment]::GetEnvironmentVariable($Name) +} + +function Set-DotEnvValue { + param( + [Parameter(Mandatory = $true)] + [string] $Name, + [Parameter(Mandatory = $true)] + [string] $Value + ) + + $envFile = Join-Path $ProjectRoot ".env" + if (-not (Test-Path $envFile)) { + New-Item -Path $envFile -ItemType File | Out-Null + } + + $lines = @(Get-Content $envFile) + $updated = $false + for ($i = 0; $i -lt $lines.Count; $i++) { + if ($lines[$i] -match "^\s*$Name\s*=") { + $comment = "" + if ($lines[$i] -match "(\s+#.*)$") { + $comment = $Matches[1] + } + $lines[$i] = "$Name=$Value$comment" + $updated = $true + } + } + if (-not $updated) { + $lines += "$Name=$Value" + } + Set-Content -Path $envFile -Value $lines +} + +function Test-PythonPackage { + param( + [Parameter(Mandatory = $true)] + [string] $Name + ) + + & $Python -m pip show $Name *> $null + return $LASTEXITCODE -eq 0 +} + +function Add-NvidiaDllPaths { + $dllDirs = @( + (Join-Path $VenvDir "Lib\site-packages\nvidia\cudnn\bin"), + (Join-Path $VenvDir "Lib\site-packages\nvidia\cublas\bin"), + (Join-Path $VenvDir "Lib\site-packages\nvidia\cuda_nvrtc\bin") + ) + foreach ($dir in $dllDirs) { + if ((Test-Path $dir) -and (($env:PATH -split ";") -notcontains $dir)) { + $env:PATH = "$dir;$env:PATH" + } + } +} + +Set-Location $ProjectRoot + +if (-not (Test-Path $Python)) { + Write-Host "Virtualenv not found at $VenvDir. Creating one..." -ForegroundColor Yellow + $venvCreated = $false + $pythonLauncher = Get-Command py -ErrorAction SilentlyContinue + if ($pythonLauncher) { + foreach ($version in @("3.12", "3.11", "3.10")) { + & py "-$version" -m venv $VenvDir 2>$null + if ($LASTEXITCODE -eq 0) { + $venvCreated = $true + break + } + } + } + if (-not $venvCreated) { + $systemPython = Get-Command python -ErrorAction SilentlyContinue + if (-not $systemPython) { + Write-Host " Python was not found. Install Python 3.10-3.12, then rerun this script." -ForegroundColor Red + exit 1 + } + & python -m venv $VenvDir + $venvCreated = ($LASTEXITCODE -eq 0) + } + if (-not $venvCreated) { + Write-Host " Failed to create virtualenv." -ForegroundColor Red + exit $LASTEXITCODE + } +} + +$env:VIRTUAL_ENV = $VenvDir +$env:PATH = "$(Join-Path $VenvDir 'Scripts');$env:PATH" + +if ($PSBoundParameters.ContainsKey("OcrMode")) { + $ocrUseGpuValue = if ($OcrMode -eq "gpu") { "true" } else { "false" } + Set-DotEnvValue "OCR_USE_GPU" $ocrUseGpuValue + $env:OCR_USE_GPU = $ocrUseGpuValue + Write-Host "OCR mode set to $($OcrMode.ToUpperInvariant()) and saved to .env." -ForegroundColor Green +} # ── [1/5] Git pull ────────────────────────────────────────────────────────── Write-Host "`n[1/5] Pulling latest code..." -ForegroundColor Cyan -git pull +Invoke-Step { git pull } "Git pull failed." # ── [2/5] Install/update dependencies ─────────────────────────────────────── Write-Host "`n[2/5] Installing/updating dependencies..." -ForegroundColor Cyan -pip install -e ".[dev]" -q +Invoke-Step { & $Python -m pip install -e ".[dev]" -q } "Dependency install failed." + +$ocrUseGpu = (Get-DotEnvValue "OCR_USE_GPU") +if ($ocrUseGpu -and $ocrUseGpu.ToLowerInvariant() -in @("1", "true", "yes", "on")) { + Write-Host " GPU mode enabled; checking Paddle CUDA runtime..." -ForegroundColor Cyan + if (-not (Test-PythonPackage "paddlepaddle-gpu")) { + Invoke-Step { + & $Python -m pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ -q + } "Paddle GPU install failed." + } + if (-not (Test-PythonPackage "nvidia-cudnn-cu11")) { + Invoke-Step { & $Python -m pip install nvidia-cudnn-cu11==8.9.5.29 -q } "NVIDIA cuDNN install failed." + } + Add-NvidiaDllPaths +} else { + Write-Host " CPU mode enabled; checking Paddle CPU runtime..." -ForegroundColor Cyan + if (-not ((Test-PythonPackage "paddlepaddle") -or (Test-PythonPackage "paddlepaddle-gpu"))) { + Invoke-Step { & $Python -m pip install paddlepaddle==2.6.2 -q } "Paddle CPU install failed." + } +} # ── [3/5] Database migration ───────────────────────────────────────────────── Write-Host "`n[3/5] Running database migrations..." -ForegroundColor Cyan -alembic upgrade head +& $Python -m alembic upgrade head if ($LASTEXITCODE -ne 0) { Write-Host " Migration conflict detected, stamping current state as head..." -ForegroundColor Yellow - alembic stamp head + Invoke-Step { & $Python -m alembic stamp head } "Alembic stamp failed." Write-Host " Retrying upgrade for any remaining new migrations..." -ForegroundColor Yellow - alembic upgrade head + & $Python -m alembic upgrade head if ($LASTEXITCODE -ne 0) { Write-Host " Migration still failed. Please check alembic manually." -ForegroundColor Red exit 1 @@ -57,4 +211,4 @@ if ($connections) { # ── [5/5] Start dev server ─────────────────────────────────────────────────── Write-Host "`n[5/5] Starting dev server on port $Port (Ctrl+C to stop)..." -ForegroundColor Cyan -uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port +& $Python -m uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port