feat: implement PP-Structure table extraction pipeline with GPU runtime configuration support

This commit is contained in:
Adriankf59
2026-04-27 00:51:23 +07:00
parent 9d969e61fd
commit 6d793758ff
12 changed files with 896 additions and 31 deletions

View File

@@ -10,7 +10,8 @@ STORAGE_LOCAL_DIR=./storage
# ==== OCR ==== # ==== OCR ====
OCR_LANG=latin # PaddleOCR lang code; "latin" works well for Bahasa Indonesia OCR_LANG=latin # PaddleOCR lang code; "latin" works well for Bahasa Indonesia
OCR_USE_GPU=false # set true if running on a GPU host OCR_USE_GPU=false # set true if running on a GPU host
OCR_DET_MODEL_DIR= # leave empty to use PaddleOCR defaults # Leave empty to use PaddleOCR defaults.
OCR_DET_MODEL_DIR=
OCR_REC_MODEL_DIR= OCR_REC_MODEL_DIR=
OCR_CLS_MODEL_DIR= OCR_CLS_MODEL_DIR=
OCR_MAX_IMAGE_SIDE=2200 # downscale longest side before OCR OCR_MAX_IMAGE_SIDE=2200 # downscale longest side before OCR

View File

@@ -0,0 +1,537 @@
# Frontend Integration Guide
Dokumen ini menjelaskan kontrak API yang perlu dipakai frontend untuk upload dokumen sprint, menampilkan hasil OCR, menjalankan review manual, dan approve hasil final.
## Base URL
Default local API:
```text
http://localhost:8000/api/v1
```
Untuk frontend, simpan URL di environment variable:
```env
VITE_OCR_API_BASE_URL=http://localhost:8000/api/v1
```
Jika `API_KEYS` di backend diisi, semua endpoint protected membutuhkan header:
```http
X-API-Key: <api-key>
```
Catatan: jangan expose API key production di frontend publik. Untuk deployment internal, gunakan reverse proxy atau session backend-for-frontend jika aksesnya tidak sepenuhnya trusted.
## Health Check
```http
GET /health
GET /health/ready
```
Contoh response `/health`:
```json
{
"status": "ok",
"version": "0.1.0"
}
```
Contoh response `/health/ready`:
```json
{
"status": "ready",
"version": "0.1.0",
"models": {
"paddleocr": "ready",
"pp_structure": "disabled"
}
}
```
Gunakan `/health/ready` untuk disable upload button sampai model OCR siap.
## Upload Dokumen
Endpoint:
```http
POST /documents
POST /documents?sync=true
```
Body harus `multipart/form-data` dengan field `file`.
Backend menerima PDF dan format image umum. Default max upload mengikuti backend config `BLOB_MAX_UPLOAD_MB`, saat ini 25 MB.
### Recommended Flow
Untuk frontend production, gunakan async flow:
1. `POST /documents`
2. Jika status HTTP `202`, ambil `job_id`
3. Poll `GET /documents/{job_id}` setiap 1-3 detik
4. Stop polling saat status `completed`, `needs_review`, atau `failed`
Untuk local dev sederhana, `POST /documents?sync=true` boleh dipakai, tetapi request bisa lama karena OCR berjalan inline.
### Upload Example
```ts
const API_BASE = import.meta.env.VITE_OCR_API_BASE_URL;
const API_KEY = import.meta.env.VITE_OCR_API_KEY;
async function uploadDocument(file: File) {
const form = new FormData();
form.append("file", file);
const res = await fetch(`${API_BASE}/documents`, {
method: "POST",
headers: API_KEY ? { "X-API-Key": API_KEY } : undefined,
body: form,
});
if (!res.ok) {
throw await readApiError(res);
}
return (await res.json()) as DocumentResponse;
}
```
## Polling Job
Endpoint:
```http
GET /documents/{job_id}
```
```ts
const TERMINAL_STATUSES = new Set(["completed", "needs_review", "failed"]);
async function getDocument(jobId: string) {
const res = await fetch(`${API_BASE}/documents/${jobId}`, {
headers: API_KEY ? { "X-API-Key": API_KEY } : undefined,
});
if (!res.ok) {
throw await readApiError(res);
}
return (await res.json()) as DocumentResponse;
}
async function pollDocument(jobId: string, onUpdate: (doc: DocumentResponse) => void) {
while (true) {
const doc = await getDocument(jobId);
onUpdate(doc);
if (TERMINAL_STATUSES.has(doc.status)) {
return doc;
}
await new Promise((resolve) => setTimeout(resolve, 2000));
}
}
```
## Response Schema
### DocumentResponse
```ts
type DocumentStatus =
| "pending"
| "processing"
| "completed"
| "needs_review"
| "failed";
type DocumentResponse = {
job_id: string;
status: DocumentStatus;
confidence: number | null;
data: ExtractionResult | null;
review_flags: ReviewFlag[];
error: string | null;
approved: boolean;
reviewed_by: string | null;
reviewed_at: string | null;
};
```
### ExtractionResult
```ts
type ExtractionResult = {
header: HeaderFields;
personel: PersonnelEntry[];
untuk: string[];
ttd: Signatory;
raw_text: string;
confidence: number;
review_flags: ReviewFlag[];
};
type HeaderFields = {
nomor_sprint: string | null;
tanggal: string | null; // YYYY-MM-DD
satuan_penerbit: string | null;
perihal: string | null;
dasar: string[];
};
type PersonnelEntry = {
no: number | null;
pangkat: string | null;
nrp: string | null;
nama: string | null;
jabatan_dinas: string | null;
jabatan_sprint: string | null;
keterangan: string | null;
confidence: number;
};
type Signatory = {
nama: string | null;
pangkat: string | null;
nrp: string | null;
jabatan: string | null;
};
```
### Review Flags
```ts
type ReviewFlag =
| "low_ocr_confidence"
| "missing_field"
| "invalid_nrp"
| "unknown_pangkat"
| "personnel_count_mismatch"
| "date_parse_failed"
| "llm_fallback"
| "llm_unavailable"
| "personnel_text_fallback"
| "personnel_text_fallback_no_nrp"
| "incomplete_personnel_row";
```
Recommended UI labels:
| Flag | Label |
|---|---|
| `low_ocr_confidence` | Confidence OCR rendah |
| `missing_field` | Field wajib belum lengkap |
| `invalid_nrp` | NRP tidak valid |
| `unknown_pangkat` | Pangkat tidak dikenali |
| `personnel_count_mismatch` | Jumlah personel perlu dicek |
| `date_parse_failed` | Tanggal gagal dibaca |
| `llm_fallback` | Sebagian field diisi fallback LLM |
| `llm_unavailable` | LLM tidak tersedia |
| `personnel_text_fallback` | Personel dibaca dari fallback teks |
| `personnel_text_fallback_no_nrp` | Personel dibaca tanpa NRP |
| `incomplete_personnel_row` | Baris personel belum lengkap |
## Example Final Response
```json
{
"job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a",
"status": "needs_review",
"confidence": 0.82,
"data": {
"header": {
"nomor_sprint": "Sprin/123/IV/2026",
"tanggal": "2026-04-21",
"satuan_penerbit": "POLRES BANJAR",
"perihal": "Instruktur Ops Pekat I Lodaya 2026",
"dasar": []
},
"personel": [
{
"no": 1,
"pangkat": "IPDA",
"nrp": "12345678",
"nama": "BUDI SANTOSO",
"jabatan_dinas": "KANIT",
"jabatan_sprint": "INSTRUKTUR",
"keterangan": null,
"confidence": 0.91
}
],
"untuk": ["Melaksanakan kegiatan sesuai surat perintah."],
"ttd": {
"nama": "AGUS",
"pangkat": "AKBP",
"nrp": "87654321",
"jabatan": "KAPOLRES"
},
"raw_text": "full OCR text...",
"confidence": 0.82,
"review_flags": ["low_ocr_confidence"]
},
"review_flags": ["low_ocr_confidence"],
"error": null,
"approved": false,
"reviewed_by": null,
"reviewed_at": null
}
```
`raw_text` bisa panjang. Tampilkan di collapsible/debug panel, bukan di layar utama.
## Review dan Koreksi HITL
Frontend review screen sebaiknya mengizinkan editor untuk:
- Header: nomor sprint, tanggal, satuan penerbit, perihal, dasar
- Personel: pangkat, NRP, nama, jabatan dinas, jabatan sprint, keterangan
- Untuk: daftar tugas
- TTD: nama, pangkat, NRP, jabatan
### Patch Corrections
Endpoint:
```http
PATCH /documents/{job_id}
```
Body:
```json
{
"corrections": [
{
"path": "header.perihal",
"value": "Pelaksanaan Operasi Pekat I Lodaya 2026",
"reason": "OCR membaca perihal tidak lengkap"
},
{
"path": "personel[0].nama",
"value": "BUDI SANTOSO",
"reason": "Perbaikan nama"
}
]
}
```
Header opsional untuk audit trail:
```http
X-User-Id: reviewer-a
```
Path yang umum dipakai:
```text
header.nomor_sprint
header.tanggal
header.satuan_penerbit
header.perihal
header.dasar
ttd.nama
ttd.pangkat
ttd.nrp
ttd.jabatan
personel[0].pangkat
personel[0].nrp
personel[0].nama
personel[0].jabatan_dinas
personel[0].jabatan_sprint
personel[0].keterangan
untuk
```
Semua correction dalam satu request bersifat atomic. Jika satu path invalid, seluruh batch ditolak dan tidak ada perubahan disimpan.
### Patch Example
```ts
async function patchDocument(jobId: string, corrections: FieldCorrection[], userId?: string) {
const headers: Record<string, string> = { "Content-Type": "application/json" };
if (API_KEY) headers["X-API-Key"] = API_KEY;
if (userId) headers["X-User-Id"] = userId;
const res = await fetch(`${API_BASE}/documents/${jobId}`, {
method: "PATCH",
headers,
body: JSON.stringify({ corrections }),
});
if (!res.ok) {
throw await readApiError(res);
}
return (await res.json()) as DocumentResponse;
}
type FieldCorrection = {
path: string;
value: unknown;
reason?: string | null;
};
```
## Correction History
Endpoint:
```http
GET /documents/{job_id}/history
```
Response:
```ts
type CorrectionEventResponse = {
id: number;
job_id: string;
field_path: string;
old_value: unknown | null;
new_value: unknown | null;
corrected_by: string | null;
reason: string | null;
corrected_at: string;
};
```
Gunakan endpoint ini untuk audit panel di halaman review.
## Approve Final Result
Endpoint:
```http
POST /documents/{job_id}/approve
```
Header opsional:
```http
X-User-Id: reviewer-a
```
Response:
```json
{
"job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a",
"approved": true,
"reviewed_by": "reviewer-a",
"reviewed_at": "2026-04-26T16:30:00"
}
```
Setelah approved, `PATCH /documents/{job_id}` akan ditolak dengan `409`.
## Error Handling
Application errors:
```json
{
"error": "UnsupportedDocumentError",
"message": "Uploaded file is empty."
}
```
FastAPI validation errors memakai shape standar:
```json
{
"detail": [
{
"type": "missing",
"loc": ["body", "file"],
"msg": "Field required"
}
]
}
```
Helper error:
```ts
async function readApiError(res: Response) {
let payload: unknown = null;
try {
payload = await res.json();
} catch {
payload = await res.text();
}
return {
status: res.status,
payload,
};
}
```
Recommended UI handling:
| HTTP Status | UI Handling |
|---|---|
| `400` | Tampilkan pesan validasi/upload |
| `401` | Session/API key tidak valid |
| `404` | Job tidak ditemukan |
| `409` | Job belum selesai atau sudah approved |
| `422` | Form correction tidak valid |
| `500` | Tampilkan error umum dan minta operator cek log backend |
## Ground Truth Admin
Endpoint ini opsional untuk dashboard admin/training data.
```http
GET /ground-truth/stats?top_n=10
GET /ground-truth/export?approved_only=true&has_corrections=true&limit=1000
```
`/ground-truth/export` mengembalikan `application/x-ndjson`, satu JSON per baris. Frontend biasanya cukup menyediakan tombol download, bukan parse seluruh stream di browser.
## Recommended Screens
1. Upload screen
- Dropzone file PDF/image
- Health readiness badge
- Upload progress
- Processing state setelah `job_id` diterima
2. Result screen
- Status badge
- Confidence score
- Review flags
- Header summary
- Personnel table
- Untuk list
- TTD section
- Raw OCR collapsible
3. Review screen
- Editable fields
- Dirty-state tracking
- Correction reason input
- Save corrections via `PATCH`
- History panel
- Approve button
4. Admin screen
- Health/ready status
- Ground-truth stats
- Export approved samples
## UX Rules
- Jangan tunggu `POST /documents?sync=true` untuk production UI; gunakan async + polling.
- Disable approve kalau status masih `pending` atau `processing`.
- Tampilkan `needs_review` sebagai hasil yang berhasil diproses tetapi perlu validasi manusia.
- Jangan render `raw_text` sebagai konten utama.
- Pada `failed`, tampilkan `error` dari response jika ada.
- Pada confidence rendah, arahkan user ke review fields yang punya flag terkait.

49
docs/OCR-RUNTIME-MODES.md Normal file
View File

@@ -0,0 +1,49 @@
# OCR Runtime Modes
Backend OCR bisa dijalankan dalam mode CPU atau GPU lewat konfigurasi `OCR_USE_GPU`.
## Cara Pakai
Mode CPU:
```powershell
.\update.ps1 -OcrMode cpu
```
Mode GPU:
```powershell
.\update.ps1 -OcrMode gpu
```
Jika parameter tidak diberikan, `update.ps1` memakai nilai yang sudah ada di `.env`.
```env
OCR_USE_GPU=false
```
atau:
```env
OCR_USE_GPU=true
```
## Perilaku Script
- `-OcrMode cpu` menyimpan `OCR_USE_GPU=false` ke `.env`.
- `-OcrMode gpu` menyimpan `OCR_USE_GPU=true` ke `.env`.
- Script tidak menghapus package Paddle/CUDA yang sudah terpasang.
- Dalam mode GPU, script akan memasang `paddlepaddle-gpu` dan runtime cuDNN/cuBLAS jika belum ada.
- Dalam mode CPU, script hanya memasang `paddlepaddle` CPU jika belum ada runtime Paddle sama sekali.
## Catatan
Mode CPU tidak membutuhkan CUDA, cuDNN, atau driver NVIDIA.
Mode GPU membutuhkan NVIDIA driver dan runtime CUDA/cuDNN yang cocok. Pada Windows, backend juga menambahkan folder DLL NVIDIA dari `.venv` secara otomatis sebelum PaddleOCR diinisialisasi.
`TABLES_ENABLED` adalah konfigurasi terpisah dari mode CPU/GPU. Jika PP-Structure belum stabil di environment lokal, biarkan:
```env
TABLES_ENABLED=false
```

View File

@@ -28,13 +28,6 @@ from functools import partial
from typing import Annotated from typing import Annotated
from uuid import UUID, uuid4 from uuid import UUID, uuid4
# Thread pool dedicated to blocking OCR work. Using a *separate* pool
# (rather than the default loop executor) lets us cap the number of
# concurrent heavy OCR jobs independently of other thread-pool users.
# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
from fastapi import ( from fastapi import (
APIRouter, APIRouter,
Depends, Depends,
@@ -73,6 +66,13 @@ from ocr_sprint.schemas.review import (
from ocr_sprint.storage.blob import get_blob_storage from ocr_sprint.storage.blob import get_blob_storage
from ocr_sprint.utils.logging import get_logger from ocr_sprint.utils.logging import get_logger
# Thread pool dedicated to blocking OCR work. Using a *separate* pool
# (rather than the default loop executor) lets us cap the number of
# concurrent heavy OCR jobs independently of other thread-pool users.
# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
router = APIRouter( router = APIRouter(
prefix="/documents", prefix="/documents",
tags=["documents"], tags=["documents"],
@@ -99,18 +99,17 @@ def _row_to_response(row: object) -> DocumentResponse:
assert isinstance(row, JobRow) assert isinstance(row, JobRow)
status_enum = DocumentStatus(row.status) status_enum = DocumentStatus(row.status)
personel_list = None result_obj = None
if row.result is not None: if row.result is not None:
result_obj = ExtractionResult.model_validate(row.result) result_obj = ExtractionResult.model_validate(row.result)
# Auto-number personnel entries sequentially (1, 2, 3, ...) # Auto-number personnel entries sequentially (1, 2, 3, ...)
for idx, entry in enumerate(result_obj.personel, start=1): for idx, entry in enumerate(result_obj.personel, start=1):
entry.no = idx entry.no = idx
personel_list = result_obj.personel
return DocumentResponse( return DocumentResponse(
job_id=row.job_id, job_id=row.job_id,
status=status_enum, status=status_enum,
confidence=row.confidence, confidence=row.confidence,
data=personel_list, data=result_obj,
review_flags=list(row.review_flags or []), review_flags=list(row.review_flags or []),
error=row.error, error=row.error,
approved=bool(row.approved), approved=bool(row.approved),

View File

@@ -6,6 +6,7 @@ from fastapi import APIRouter
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from ocr_sprint import __version__ from ocr_sprint import __version__
from ocr_sprint.config import get_settings
from ocr_sprint.pipeline import ocr as _ocr from ocr_sprint.pipeline import ocr as _ocr
from ocr_sprint.pipeline import table as _table from ocr_sprint.pipeline import table as _table
@@ -21,15 +22,18 @@ async def health() -> dict[str, str]:
@router.get("/health/ready") @router.get("/health/ready")
async def readiness() -> JSONResponse: async def readiness() -> JSONResponse:
"""Readiness check — returns 200 when OCR models are loaded, 503 if still warming up.""" """Readiness check — returns 200 when OCR models are loaded, 503 if still warming up."""
settings = get_settings()
ocr_ready = _ocr._instance is not None ocr_ready = _ocr._instance is not None
table_ready = _table._instance is not None table_ready = (not settings.tables_enabled) or _table._instance is not None
ready = ocr_ready and table_ready ready = ocr_ready and table_ready
payload = { payload = {
"status": "ready" if ready else "warming_up", "status": "ready" if ready else "warming_up",
"version": __version__, "version": __version__,
"models": { "models": {
"paddleocr": "ready" if ocr_ready else "loading", "paddleocr": "ready" if ocr_ready else "loading",
"pp_structure": "ready" if table_ready else "loading", "pp_structure": (
"disabled" if not settings.tables_enabled else "ready" if table_ready else "loading"
),
}, },
} }
return JSONResponse(content=payload, status_code=200 if ready else 503) return JSONResponse(content=payload, status_code=200 if ready else 503)

View File

@@ -88,6 +88,17 @@ def create_app() -> FastAPI:
) )
register_error_handlers(app) register_error_handlers(app)
# CORS — allow frontend dev servers and production origins
from fastapi.middleware.cors import CORSMiddleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(MetricsMiddleware) app.add_middleware(MetricsMiddleware)
app.include_router(health.router, prefix="/api/v1") app.include_router(health.router, prefix="/api/v1")
app.include_router(documents.router, prefix="/api/v1") app.include_router(documents.router, prefix="/api/v1")

View File

@@ -122,9 +122,14 @@ class OCRPage:
def _build_paddleocr() -> PaddleOCR: def _build_paddleocr() -> PaddleOCR:
s = get_settings()
if s.ocr_use_gpu:
from ocr_sprint.utils.gpu import configure_nvidia_dll_path
configure_nvidia_dll_path()
from paddleocr import PaddleOCR from paddleocr import PaddleOCR
s = get_settings()
kwargs: dict[str, object] = { kwargs: dict[str, object] = {
"lang": s.ocr_lang, "lang": s.ocr_lang,
"use_angle_cls": True, "use_angle_cls": True,

View File

@@ -67,24 +67,41 @@ class DetectedTable:
# ---------- PP-Structure singleton ---------- # ---------- PP-Structure singleton ----------
def _create_pp_structure(
pp_structure_cls: type[PPStructure], pp_lang: str, use_gpu: bool
) -> PPStructure:
# layout=True so that PP-Structure also returns figure/text regions; we
# filter to tables only afterwards. show_log=False to keep stdout clean.
return pp_structure_cls(
lang=pp_lang,
use_gpu=use_gpu,
layout=True,
show_log=False,
)
def _build_pp_structure() -> PPStructure: def _build_pp_structure() -> PPStructure:
s = get_settings()
if s.ocr_use_gpu:
from ocr_sprint.utils.gpu import configure_nvidia_dll_path
configure_nvidia_dll_path()
from paddleocr import PPStructure from paddleocr import PPStructure
s = get_settings()
# PPStructure layout models only support 'en' and 'ch', not 'latin'. # PPStructure layout models only support 'en' and 'ch', not 'latin'.
# Use 'en' for layout/table detection — it's language-agnostic (detects # Use 'en' for layout/table detection — it's language-agnostic (detects
# table structure, not text language). OCR within cells still works for # table structure, not text language). OCR within cells still works for
# Indonesian text because the recognition model handles Latin scripts. # Indonesian text because the recognition model handles Latin scripts.
pp_lang = "en" if s.ocr_lang not in ("en", "ch") else s.ocr_lang pp_lang = "en" if s.ocr_lang not in ("en", "ch") else s.ocr_lang
_logger.info("pp_structure.init", lang=pp_lang, use_gpu=s.ocr_use_gpu) _logger.info("pp_structure.init", lang=pp_lang, use_gpu=s.ocr_use_gpu)
# layout=True so that PP-Structure also returns figure/text regions; we try:
# filter to tables only afterwards. show_log=False to keep stdout clean. return _create_pp_structure(PPStructure, pp_lang, s.ocr_use_gpu)
return PPStructure( except Exception as exc:
lang=pp_lang, if not s.ocr_use_gpu:
use_gpu=s.ocr_use_gpu, raise
layout=True, _logger.warning("pp_structure.gpu_init_failed_falling_back_cpu", error=str(exc))
show_log=False, return _create_pp_structure(PPStructure, pp_lang, False)
)
def get_pp_structure() -> PPStructure: def get_pp_structure() -> PPStructure:

View File

@@ -10,7 +10,6 @@ from uuid import UUID, uuid4
from pydantic import BaseModel, ConfigDict, Field from pydantic import BaseModel, ConfigDict, Field
from ocr_sprint.schemas.extraction import ExtractionResult from ocr_sprint.schemas.extraction import ExtractionResult
from ocr_sprint.schemas.personnel import PersonnelEntry
class SourceKind(str, Enum): class SourceKind(str, Enum):
@@ -53,7 +52,7 @@ class DocumentResponse(BaseModel):
job_id: UUID job_id: UUID
status: DocumentStatus status: DocumentStatus
confidence: float | None = None confidence: float | None = None
data: list[PersonnelEntry] | None = None data: ExtractionResult | None = None
review_flags: list[str] = Field(default_factory=list) review_flags: list[str] = Field(default_factory=list)
error: str | None = None error: str | None = None
# Phase 6 — HITL review state. # Phase 6 — HITL review state.

View File

@@ -0,0 +1,57 @@
"""GPU runtime helpers."""
from __future__ import annotations
import os
from pathlib import Path
_DLL_HANDLES: list[object] = []
_CONFIGURED = False
def configure_nvidia_dll_path() -> None:
"""Expose NVIDIA wheel DLL directories to the Windows dynamic loader.
Paddle's Windows GPU wheels dynamically load CUDA/cuDNN DLLs by name. When
those DLLs come from Python packages such as ``nvidia-cudnn-cu11`` instead
of a system-wide CUDA Toolkit install, their ``bin`` folders are not on
``PATH`` by default.
"""
global _CONFIGURED
if _CONFIGURED or os.name != "nt":
return
package_names = ("nvidia.cudnn", "nvidia.cublas", "nvidia.cuda_nvrtc")
dll_dirs: list[Path] = []
for package_name in package_names:
try:
module = __import__(package_name, fromlist=["__file__"])
except Exception:
continue
module_file = getattr(module, "__file__", None)
if not module_file:
continue
dll_dir = Path(module_file).resolve().parent / "bin"
if dll_dir.is_dir():
dll_dirs.append(dll_dir)
if not dll_dirs:
_CONFIGURED = True
return
current_path_parts = os.environ.get("PATH", "").split(os.pathsep)
current_path_norm = {part.casefold() for part in current_path_parts if part}
prepend: list[str] = []
for dll_dir in dll_dirs:
dll_dir_str = str(dll_dir)
if dll_dir_str.casefold() not in current_path_norm:
prepend.append(dll_dir_str)
add_dll_directory = getattr(os, "add_dll_directory", None)
if add_dll_directory is not None:
_DLL_HANDLES.append(add_dll_directory(dll_dir_str))
if prepend:
os.environ["PATH"] = os.pathsep.join([*prepend, os.environ.get("PATH", "")])
_CONFIGURED = True

View File

@@ -2,8 +2,12 @@
from __future__ import annotations from __future__ import annotations
import sys
from types import ModuleType, SimpleNamespace
import pytest import pytest
from ocr_sprint.pipeline import table as table_module
from ocr_sprint.pipeline.table import ( from ocr_sprint.pipeline.table import (
DetectedTable, DetectedTable,
extract_tables_from_pp_result, extract_tables_from_pp_result,
@@ -82,6 +86,34 @@ class TestDetectedTable:
assert table.n_cols == 0 assert table.n_cols == 0
class TestPpStructureInit:
def test_gpu_init_falls_back_to_cpu(self, monkeypatch: pytest.MonkeyPatch) -> None:
calls: list[dict[str, object]] = []
class FakePPStructure:
def __init__(self, **kwargs: object) -> None:
calls.append(kwargs)
if kwargs["use_gpu"]:
raise RuntimeError("gpu init failed")
fake_paddleocr = ModuleType("paddleocr")
fake_paddleocr.PPStructure = FakePPStructure
monkeypatch.setitem(sys.modules, "paddleocr", fake_paddleocr)
monkeypatch.setattr(
table_module,
"get_settings",
lambda: SimpleNamespace(ocr_lang="latin", ocr_use_gpu=True),
)
engine = table_module._build_pp_structure()
assert isinstance(engine, FakePPStructure)
assert calls == [
{"lang": "en", "use_gpu": True, "layout": True, "show_log": False},
{"lang": "en", "use_gpu": False, "layout": True, "show_log": False},
]
@pytest.fixture @pytest.fixture
def sample_personnel_table() -> DetectedTable: def sample_personnel_table() -> DetectedTable:
"""Header + three personnel rows in a typical Polres-level format.""" """Header + three personnel rows in a typical Polres-level format."""

View File

@@ -1,24 +1,178 @@
#!/usr/bin/env pwsh #!/usr/bin/env pwsh
# update.ps1 - One-command update & restart for ocr-sprint-service (local dev) # update.ps1 - One-command update & restart for ocr-sprint-service (local dev)
param(
[ValidateSet("cpu", "gpu")]
[string] $OcrMode
)
$ErrorActionPreference = "Stop"
$Port = 8000 $Port = 8000
$ProjectRoot = $PSScriptRoot
$VenvDir = Join-Path $ProjectRoot ".venv"
$Python = Join-Path $VenvDir "Scripts\python.exe"
function Invoke-Step {
param(
[Parameter(Mandatory = $true)]
[scriptblock] $Command,
[Parameter(Mandatory = $true)]
[string] $FailureMessage
)
& $Command
if ($LASTEXITCODE -ne 0) {
Write-Host " $FailureMessage" -ForegroundColor Red
exit $LASTEXITCODE
}
}
function Get-DotEnvValue {
param(
[Parameter(Mandatory = $true)]
[string] $Name
)
$envFile = Join-Path $ProjectRoot ".env"
if (Test-Path $envFile) {
$line = Get-Content $envFile | Where-Object { $_ -match "^\s*$Name\s*=" } | Select-Object -Last 1
if ($line) {
return (($line -split "=", 2)[1] -split "\s+#", 2)[0].Trim()
}
}
return [Environment]::GetEnvironmentVariable($Name)
}
function Set-DotEnvValue {
param(
[Parameter(Mandatory = $true)]
[string] $Name,
[Parameter(Mandatory = $true)]
[string] $Value
)
$envFile = Join-Path $ProjectRoot ".env"
if (-not (Test-Path $envFile)) {
New-Item -Path $envFile -ItemType File | Out-Null
}
$lines = @(Get-Content $envFile)
$updated = $false
for ($i = 0; $i -lt $lines.Count; $i++) {
if ($lines[$i] -match "^\s*$Name\s*=") {
$comment = ""
if ($lines[$i] -match "(\s+#.*)$") {
$comment = $Matches[1]
}
$lines[$i] = "$Name=$Value$comment"
$updated = $true
}
}
if (-not $updated) {
$lines += "$Name=$Value"
}
Set-Content -Path $envFile -Value $lines
}
function Test-PythonPackage {
param(
[Parameter(Mandatory = $true)]
[string] $Name
)
& $Python -m pip show $Name *> $null
return $LASTEXITCODE -eq 0
}
function Add-NvidiaDllPaths {
$dllDirs = @(
(Join-Path $VenvDir "Lib\site-packages\nvidia\cudnn\bin"),
(Join-Path $VenvDir "Lib\site-packages\nvidia\cublas\bin"),
(Join-Path $VenvDir "Lib\site-packages\nvidia\cuda_nvrtc\bin")
)
foreach ($dir in $dllDirs) {
if ((Test-Path $dir) -and (($env:PATH -split ";") -notcontains $dir)) {
$env:PATH = "$dir;$env:PATH"
}
}
}
Set-Location $ProjectRoot
if (-not (Test-Path $Python)) {
Write-Host "Virtualenv not found at $VenvDir. Creating one..." -ForegroundColor Yellow
$venvCreated = $false
$pythonLauncher = Get-Command py -ErrorAction SilentlyContinue
if ($pythonLauncher) {
foreach ($version in @("3.12", "3.11", "3.10")) {
& py "-$version" -m venv $VenvDir 2>$null
if ($LASTEXITCODE -eq 0) {
$venvCreated = $true
break
}
}
}
if (-not $venvCreated) {
$systemPython = Get-Command python -ErrorAction SilentlyContinue
if (-not $systemPython) {
Write-Host " Python was not found. Install Python 3.10-3.12, then rerun this script." -ForegroundColor Red
exit 1
}
& python -m venv $VenvDir
$venvCreated = ($LASTEXITCODE -eq 0)
}
if (-not $venvCreated) {
Write-Host " Failed to create virtualenv." -ForegroundColor Red
exit $LASTEXITCODE
}
}
$env:VIRTUAL_ENV = $VenvDir
$env:PATH = "$(Join-Path $VenvDir 'Scripts');$env:PATH"
if ($PSBoundParameters.ContainsKey("OcrMode")) {
$ocrUseGpuValue = if ($OcrMode -eq "gpu") { "true" } else { "false" }
Set-DotEnvValue "OCR_USE_GPU" $ocrUseGpuValue
$env:OCR_USE_GPU = $ocrUseGpuValue
Write-Host "OCR mode set to $($OcrMode.ToUpperInvariant()) and saved to .env." -ForegroundColor Green
}
# ── [1/5] Git pull ────────────────────────────────────────────────────────── # ── [1/5] Git pull ──────────────────────────────────────────────────────────
Write-Host "`n[1/5] Pulling latest code..." -ForegroundColor Cyan Write-Host "`n[1/5] Pulling latest code..." -ForegroundColor Cyan
git pull Invoke-Step { git pull } "Git pull failed."
# ── [2/5] Install/update dependencies ─────────────────────────────────────── # ── [2/5] Install/update dependencies ───────────────────────────────────────
Write-Host "`n[2/5] Installing/updating dependencies..." -ForegroundColor Cyan Write-Host "`n[2/5] Installing/updating dependencies..." -ForegroundColor Cyan
pip install -e ".[dev]" -q Invoke-Step { & $Python -m pip install -e ".[dev]" -q } "Dependency install failed."
$ocrUseGpu = (Get-DotEnvValue "OCR_USE_GPU")
if ($ocrUseGpu -and $ocrUseGpu.ToLowerInvariant() -in @("1", "true", "yes", "on")) {
Write-Host " GPU mode enabled; checking Paddle CUDA runtime..." -ForegroundColor Cyan
if (-not (Test-PythonPackage "paddlepaddle-gpu")) {
Invoke-Step {
& $Python -m pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ -q
} "Paddle GPU install failed."
}
if (-not (Test-PythonPackage "nvidia-cudnn-cu11")) {
Invoke-Step { & $Python -m pip install nvidia-cudnn-cu11==8.9.5.29 -q } "NVIDIA cuDNN install failed."
}
Add-NvidiaDllPaths
} else {
Write-Host " CPU mode enabled; checking Paddle CPU runtime..." -ForegroundColor Cyan
if (-not ((Test-PythonPackage "paddlepaddle") -or (Test-PythonPackage "paddlepaddle-gpu"))) {
Invoke-Step { & $Python -m pip install paddlepaddle==2.6.2 -q } "Paddle CPU install failed."
}
}
# ── [3/5] Database migration ───────────────────────────────────────────────── # ── [3/5] Database migration ─────────────────────────────────────────────────
Write-Host "`n[3/5] Running database migrations..." -ForegroundColor Cyan Write-Host "`n[3/5] Running database migrations..." -ForegroundColor Cyan
alembic upgrade head & $Python -m alembic upgrade head
if ($LASTEXITCODE -ne 0) { if ($LASTEXITCODE -ne 0) {
Write-Host " Migration conflict detected, stamping current state as head..." -ForegroundColor Yellow Write-Host " Migration conflict detected, stamping current state as head..." -ForegroundColor Yellow
alembic stamp head Invoke-Step { & $Python -m alembic stamp head } "Alembic stamp failed."
Write-Host " Retrying upgrade for any remaining new migrations..." -ForegroundColor Yellow Write-Host " Retrying upgrade for any remaining new migrations..." -ForegroundColor Yellow
alembic upgrade head & $Python -m alembic upgrade head
if ($LASTEXITCODE -ne 0) { if ($LASTEXITCODE -ne 0) {
Write-Host " Migration still failed. Please check alembic manually." -ForegroundColor Red Write-Host " Migration still failed. Please check alembic manually." -ForegroundColor Red
exit 1 exit 1
@@ -57,4 +211,4 @@ if ($connections) {
# ── [5/5] Start dev server ─────────────────────────────────────────────────── # ── [5/5] Start dev server ───────────────────────────────────────────────────
Write-Host "`n[5/5] Starting dev server on port $Port (Ctrl+C to stop)..." -ForegroundColor Cyan Write-Host "`n[5/5] Starting dev server on port $Port (Ctrl+C to stop)..." -ForegroundColor Cyan
uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port & $Python -m uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port