feat: implement PP-Structure table extraction pipeline with GPU runtime configuration support
This commit is contained in:
@@ -10,7 +10,8 @@ STORAGE_LOCAL_DIR=./storage
|
||||
# ==== OCR ====
|
||||
OCR_LANG=latin # PaddleOCR lang code; "latin" works well for Bahasa Indonesia
|
||||
OCR_USE_GPU=false # set true if running on a GPU host
|
||||
OCR_DET_MODEL_DIR= # leave empty to use PaddleOCR defaults
|
||||
# Leave empty to use PaddleOCR defaults.
|
||||
OCR_DET_MODEL_DIR=
|
||||
OCR_REC_MODEL_DIR=
|
||||
OCR_CLS_MODEL_DIR=
|
||||
OCR_MAX_IMAGE_SIDE=2200 # downscale longest side before OCR
|
||||
|
||||
537
docs/FRONTEND-INTEGRATION.md
Normal file
537
docs/FRONTEND-INTEGRATION.md
Normal file
@@ -0,0 +1,537 @@
|
||||
# Frontend Integration Guide
|
||||
|
||||
Dokumen ini menjelaskan kontrak API yang perlu dipakai frontend untuk upload dokumen sprint, menampilkan hasil OCR, menjalankan review manual, dan approve hasil final.
|
||||
|
||||
## Base URL
|
||||
|
||||
Default local API:
|
||||
|
||||
```text
|
||||
http://localhost:8000/api/v1
|
||||
```
|
||||
|
||||
Untuk frontend, simpan URL di environment variable:
|
||||
|
||||
```env
|
||||
VITE_OCR_API_BASE_URL=http://localhost:8000/api/v1
|
||||
```
|
||||
|
||||
Jika `API_KEYS` di backend diisi, semua endpoint protected membutuhkan header:
|
||||
|
||||
```http
|
||||
X-API-Key: <api-key>
|
||||
```
|
||||
|
||||
Catatan: jangan expose API key production di frontend publik. Untuk deployment internal, gunakan reverse proxy atau session backend-for-frontend jika aksesnya tidak sepenuhnya trusted.
|
||||
|
||||
## Health Check
|
||||
|
||||
```http
|
||||
GET /health
|
||||
GET /health/ready
|
||||
```
|
||||
|
||||
Contoh response `/health`:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
```
|
||||
|
||||
Contoh response `/health/ready`:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ready",
|
||||
"version": "0.1.0",
|
||||
"models": {
|
||||
"paddleocr": "ready",
|
||||
"pp_structure": "disabled"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Gunakan `/health/ready` untuk disable upload button sampai model OCR siap.
|
||||
|
||||
## Upload Dokumen
|
||||
|
||||
Endpoint:
|
||||
|
||||
```http
|
||||
POST /documents
|
||||
POST /documents?sync=true
|
||||
```
|
||||
|
||||
Body harus `multipart/form-data` dengan field `file`.
|
||||
|
||||
Backend menerima PDF dan format image umum. Default max upload mengikuti backend config `BLOB_MAX_UPLOAD_MB`, saat ini 25 MB.
|
||||
|
||||
### Recommended Flow
|
||||
|
||||
Untuk frontend production, gunakan async flow:
|
||||
|
||||
1. `POST /documents`
|
||||
2. Jika status HTTP `202`, ambil `job_id`
|
||||
3. Poll `GET /documents/{job_id}` setiap 1-3 detik
|
||||
4. Stop polling saat status `completed`, `needs_review`, atau `failed`
|
||||
|
||||
Untuk local dev sederhana, `POST /documents?sync=true` boleh dipakai, tetapi request bisa lama karena OCR berjalan inline.
|
||||
|
||||
### Upload Example
|
||||
|
||||
```ts
|
||||
const API_BASE = import.meta.env.VITE_OCR_API_BASE_URL;
|
||||
const API_KEY = import.meta.env.VITE_OCR_API_KEY;
|
||||
|
||||
async function uploadDocument(file: File) {
|
||||
const form = new FormData();
|
||||
form.append("file", file);
|
||||
|
||||
const res = await fetch(`${API_BASE}/documents`, {
|
||||
method: "POST",
|
||||
headers: API_KEY ? { "X-API-Key": API_KEY } : undefined,
|
||||
body: form,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw await readApiError(res);
|
||||
}
|
||||
|
||||
return (await res.json()) as DocumentResponse;
|
||||
}
|
||||
```
|
||||
|
||||
## Polling Job
|
||||
|
||||
Endpoint:
|
||||
|
||||
```http
|
||||
GET /documents/{job_id}
|
||||
```
|
||||
|
||||
```ts
|
||||
const TERMINAL_STATUSES = new Set(["completed", "needs_review", "failed"]);
|
||||
|
||||
async function getDocument(jobId: string) {
|
||||
const res = await fetch(`${API_BASE}/documents/${jobId}`, {
|
||||
headers: API_KEY ? { "X-API-Key": API_KEY } : undefined,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw await readApiError(res);
|
||||
}
|
||||
|
||||
return (await res.json()) as DocumentResponse;
|
||||
}
|
||||
|
||||
async function pollDocument(jobId: string, onUpdate: (doc: DocumentResponse) => void) {
|
||||
while (true) {
|
||||
const doc = await getDocument(jobId);
|
||||
onUpdate(doc);
|
||||
|
||||
if (TERMINAL_STATUSES.has(doc.status)) {
|
||||
return doc;
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Response Schema
|
||||
|
||||
### DocumentResponse
|
||||
|
||||
```ts
|
||||
type DocumentStatus =
|
||||
| "pending"
|
||||
| "processing"
|
||||
| "completed"
|
||||
| "needs_review"
|
||||
| "failed";
|
||||
|
||||
type DocumentResponse = {
|
||||
job_id: string;
|
||||
status: DocumentStatus;
|
||||
confidence: number | null;
|
||||
data: ExtractionResult | null;
|
||||
review_flags: ReviewFlag[];
|
||||
error: string | null;
|
||||
approved: boolean;
|
||||
reviewed_by: string | null;
|
||||
reviewed_at: string | null;
|
||||
};
|
||||
```
|
||||
|
||||
### ExtractionResult
|
||||
|
||||
```ts
|
||||
type ExtractionResult = {
|
||||
header: HeaderFields;
|
||||
personel: PersonnelEntry[];
|
||||
untuk: string[];
|
||||
ttd: Signatory;
|
||||
raw_text: string;
|
||||
confidence: number;
|
||||
review_flags: ReviewFlag[];
|
||||
};
|
||||
|
||||
type HeaderFields = {
|
||||
nomor_sprint: string | null;
|
||||
tanggal: string | null; // YYYY-MM-DD
|
||||
satuan_penerbit: string | null;
|
||||
perihal: string | null;
|
||||
dasar: string[];
|
||||
};
|
||||
|
||||
type PersonnelEntry = {
|
||||
no: number | null;
|
||||
pangkat: string | null;
|
||||
nrp: string | null;
|
||||
nama: string | null;
|
||||
jabatan_dinas: string | null;
|
||||
jabatan_sprint: string | null;
|
||||
keterangan: string | null;
|
||||
confidence: number;
|
||||
};
|
||||
|
||||
type Signatory = {
|
||||
nama: string | null;
|
||||
pangkat: string | null;
|
||||
nrp: string | null;
|
||||
jabatan: string | null;
|
||||
};
|
||||
```
|
||||
|
||||
### Review Flags
|
||||
|
||||
```ts
|
||||
type ReviewFlag =
|
||||
| "low_ocr_confidence"
|
||||
| "missing_field"
|
||||
| "invalid_nrp"
|
||||
| "unknown_pangkat"
|
||||
| "personnel_count_mismatch"
|
||||
| "date_parse_failed"
|
||||
| "llm_fallback"
|
||||
| "llm_unavailable"
|
||||
| "personnel_text_fallback"
|
||||
| "personnel_text_fallback_no_nrp"
|
||||
| "incomplete_personnel_row";
|
||||
```
|
||||
|
||||
Recommended UI labels:
|
||||
|
||||
| Flag | Label |
|
||||
|---|---|
|
||||
| `low_ocr_confidence` | Confidence OCR rendah |
|
||||
| `missing_field` | Field wajib belum lengkap |
|
||||
| `invalid_nrp` | NRP tidak valid |
|
||||
| `unknown_pangkat` | Pangkat tidak dikenali |
|
||||
| `personnel_count_mismatch` | Jumlah personel perlu dicek |
|
||||
| `date_parse_failed` | Tanggal gagal dibaca |
|
||||
| `llm_fallback` | Sebagian field diisi fallback LLM |
|
||||
| `llm_unavailable` | LLM tidak tersedia |
|
||||
| `personnel_text_fallback` | Personel dibaca dari fallback teks |
|
||||
| `personnel_text_fallback_no_nrp` | Personel dibaca tanpa NRP |
|
||||
| `incomplete_personnel_row` | Baris personel belum lengkap |
|
||||
|
||||
## Example Final Response
|
||||
|
||||
```json
|
||||
{
|
||||
"job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a",
|
||||
"status": "needs_review",
|
||||
"confidence": 0.82,
|
||||
"data": {
|
||||
"header": {
|
||||
"nomor_sprint": "Sprin/123/IV/2026",
|
||||
"tanggal": "2026-04-21",
|
||||
"satuan_penerbit": "POLRES BANJAR",
|
||||
"perihal": "Instruktur Ops Pekat I Lodaya 2026",
|
||||
"dasar": []
|
||||
},
|
||||
"personel": [
|
||||
{
|
||||
"no": 1,
|
||||
"pangkat": "IPDA",
|
||||
"nrp": "12345678",
|
||||
"nama": "BUDI SANTOSO",
|
||||
"jabatan_dinas": "KANIT",
|
||||
"jabatan_sprint": "INSTRUKTUR",
|
||||
"keterangan": null,
|
||||
"confidence": 0.91
|
||||
}
|
||||
],
|
||||
"untuk": ["Melaksanakan kegiatan sesuai surat perintah."],
|
||||
"ttd": {
|
||||
"nama": "AGUS",
|
||||
"pangkat": "AKBP",
|
||||
"nrp": "87654321",
|
||||
"jabatan": "KAPOLRES"
|
||||
},
|
||||
"raw_text": "full OCR text...",
|
||||
"confidence": 0.82,
|
||||
"review_flags": ["low_ocr_confidence"]
|
||||
},
|
||||
"review_flags": ["low_ocr_confidence"],
|
||||
"error": null,
|
||||
"approved": false,
|
||||
"reviewed_by": null,
|
||||
"reviewed_at": null
|
||||
}
|
||||
```
|
||||
|
||||
`raw_text` bisa panjang. Tampilkan di collapsible/debug panel, bukan di layar utama.
|
||||
|
||||
## Review dan Koreksi HITL
|
||||
|
||||
Frontend review screen sebaiknya mengizinkan editor untuk:
|
||||
|
||||
- Header: nomor sprint, tanggal, satuan penerbit, perihal, dasar
|
||||
- Personel: pangkat, NRP, nama, jabatan dinas, jabatan sprint, keterangan
|
||||
- Untuk: daftar tugas
|
||||
- TTD: nama, pangkat, NRP, jabatan
|
||||
|
||||
### Patch Corrections
|
||||
|
||||
Endpoint:
|
||||
|
||||
```http
|
||||
PATCH /documents/{job_id}
|
||||
```
|
||||
|
||||
Body:
|
||||
|
||||
```json
|
||||
{
|
||||
"corrections": [
|
||||
{
|
||||
"path": "header.perihal",
|
||||
"value": "Pelaksanaan Operasi Pekat I Lodaya 2026",
|
||||
"reason": "OCR membaca perihal tidak lengkap"
|
||||
},
|
||||
{
|
||||
"path": "personel[0].nama",
|
||||
"value": "BUDI SANTOSO",
|
||||
"reason": "Perbaikan nama"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Header opsional untuk audit trail:
|
||||
|
||||
```http
|
||||
X-User-Id: reviewer-a
|
||||
```
|
||||
|
||||
Path yang umum dipakai:
|
||||
|
||||
```text
|
||||
header.nomor_sprint
|
||||
header.tanggal
|
||||
header.satuan_penerbit
|
||||
header.perihal
|
||||
header.dasar
|
||||
ttd.nama
|
||||
ttd.pangkat
|
||||
ttd.nrp
|
||||
ttd.jabatan
|
||||
personel[0].pangkat
|
||||
personel[0].nrp
|
||||
personel[0].nama
|
||||
personel[0].jabatan_dinas
|
||||
personel[0].jabatan_sprint
|
||||
personel[0].keterangan
|
||||
untuk
|
||||
```
|
||||
|
||||
Semua correction dalam satu request bersifat atomic. Jika satu path invalid, seluruh batch ditolak dan tidak ada perubahan disimpan.
|
||||
|
||||
### Patch Example
|
||||
|
||||
```ts
|
||||
async function patchDocument(jobId: string, corrections: FieldCorrection[], userId?: string) {
|
||||
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
||||
if (API_KEY) headers["X-API-Key"] = API_KEY;
|
||||
if (userId) headers["X-User-Id"] = userId;
|
||||
|
||||
const res = await fetch(`${API_BASE}/documents/${jobId}`, {
|
||||
method: "PATCH",
|
||||
headers,
|
||||
body: JSON.stringify({ corrections }),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw await readApiError(res);
|
||||
}
|
||||
|
||||
return (await res.json()) as DocumentResponse;
|
||||
}
|
||||
|
||||
type FieldCorrection = {
|
||||
path: string;
|
||||
value: unknown;
|
||||
reason?: string | null;
|
||||
};
|
||||
```
|
||||
|
||||
## Correction History
|
||||
|
||||
Endpoint:
|
||||
|
||||
```http
|
||||
GET /documents/{job_id}/history
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```ts
|
||||
type CorrectionEventResponse = {
|
||||
id: number;
|
||||
job_id: string;
|
||||
field_path: string;
|
||||
old_value: unknown | null;
|
||||
new_value: unknown | null;
|
||||
corrected_by: string | null;
|
||||
reason: string | null;
|
||||
corrected_at: string;
|
||||
};
|
||||
```
|
||||
|
||||
Gunakan endpoint ini untuk audit panel di halaman review.
|
||||
|
||||
## Approve Final Result
|
||||
|
||||
Endpoint:
|
||||
|
||||
```http
|
||||
POST /documents/{job_id}/approve
|
||||
```
|
||||
|
||||
Header opsional:
|
||||
|
||||
```http
|
||||
X-User-Id: reviewer-a
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"job_id": "e21e83ed-a42c-4672-baec-914e5c60cc5a",
|
||||
"approved": true,
|
||||
"reviewed_by": "reviewer-a",
|
||||
"reviewed_at": "2026-04-26T16:30:00"
|
||||
}
|
||||
```
|
||||
|
||||
Setelah approved, `PATCH /documents/{job_id}` akan ditolak dengan `409`.
|
||||
|
||||
## Error Handling
|
||||
|
||||
Application errors:
|
||||
|
||||
```json
|
||||
{
|
||||
"error": "UnsupportedDocumentError",
|
||||
"message": "Uploaded file is empty."
|
||||
}
|
||||
```
|
||||
|
||||
FastAPI validation errors memakai shape standar:
|
||||
|
||||
```json
|
||||
{
|
||||
"detail": [
|
||||
{
|
||||
"type": "missing",
|
||||
"loc": ["body", "file"],
|
||||
"msg": "Field required"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Helper error:
|
||||
|
||||
```ts
|
||||
async function readApiError(res: Response) {
|
||||
let payload: unknown = null;
|
||||
try {
|
||||
payload = await res.json();
|
||||
} catch {
|
||||
payload = await res.text();
|
||||
}
|
||||
|
||||
return {
|
||||
status: res.status,
|
||||
payload,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
Recommended UI handling:
|
||||
|
||||
| HTTP Status | UI Handling |
|
||||
|---|---|
|
||||
| `400` | Tampilkan pesan validasi/upload |
|
||||
| `401` | Session/API key tidak valid |
|
||||
| `404` | Job tidak ditemukan |
|
||||
| `409` | Job belum selesai atau sudah approved |
|
||||
| `422` | Form correction tidak valid |
|
||||
| `500` | Tampilkan error umum dan minta operator cek log backend |
|
||||
|
||||
## Ground Truth Admin
|
||||
|
||||
Endpoint ini opsional untuk dashboard admin/training data.
|
||||
|
||||
```http
|
||||
GET /ground-truth/stats?top_n=10
|
||||
GET /ground-truth/export?approved_only=true&has_corrections=true&limit=1000
|
||||
```
|
||||
|
||||
`/ground-truth/export` mengembalikan `application/x-ndjson`, satu JSON per baris. Frontend biasanya cukup menyediakan tombol download, bukan parse seluruh stream di browser.
|
||||
|
||||
## Recommended Screens
|
||||
|
||||
1. Upload screen
|
||||
- Dropzone file PDF/image
|
||||
- Health readiness badge
|
||||
- Upload progress
|
||||
- Processing state setelah `job_id` diterima
|
||||
|
||||
2. Result screen
|
||||
- Status badge
|
||||
- Confidence score
|
||||
- Review flags
|
||||
- Header summary
|
||||
- Personnel table
|
||||
- Untuk list
|
||||
- TTD section
|
||||
- Raw OCR collapsible
|
||||
|
||||
3. Review screen
|
||||
- Editable fields
|
||||
- Dirty-state tracking
|
||||
- Correction reason input
|
||||
- Save corrections via `PATCH`
|
||||
- History panel
|
||||
- Approve button
|
||||
|
||||
4. Admin screen
|
||||
- Health/ready status
|
||||
- Ground-truth stats
|
||||
- Export approved samples
|
||||
|
||||
## UX Rules
|
||||
|
||||
- Jangan tunggu `POST /documents?sync=true` untuk production UI; gunakan async + polling.
|
||||
- Disable approve kalau status masih `pending` atau `processing`.
|
||||
- Tampilkan `needs_review` sebagai hasil yang berhasil diproses tetapi perlu validasi manusia.
|
||||
- Jangan render `raw_text` sebagai konten utama.
|
||||
- Pada `failed`, tampilkan `error` dari response jika ada.
|
||||
- Pada confidence rendah, arahkan user ke review fields yang punya flag terkait.
|
||||
49
docs/OCR-RUNTIME-MODES.md
Normal file
49
docs/OCR-RUNTIME-MODES.md
Normal file
@@ -0,0 +1,49 @@
|
||||
# OCR Runtime Modes
|
||||
|
||||
Backend OCR bisa dijalankan dalam mode CPU atau GPU lewat konfigurasi `OCR_USE_GPU`.
|
||||
|
||||
## Cara Pakai
|
||||
|
||||
Mode CPU:
|
||||
|
||||
```powershell
|
||||
.\update.ps1 -OcrMode cpu
|
||||
```
|
||||
|
||||
Mode GPU:
|
||||
|
||||
```powershell
|
||||
.\update.ps1 -OcrMode gpu
|
||||
```
|
||||
|
||||
Jika parameter tidak diberikan, `update.ps1` memakai nilai yang sudah ada di `.env`.
|
||||
|
||||
```env
|
||||
OCR_USE_GPU=false
|
||||
```
|
||||
|
||||
atau:
|
||||
|
||||
```env
|
||||
OCR_USE_GPU=true
|
||||
```
|
||||
|
||||
## Perilaku Script
|
||||
|
||||
- `-OcrMode cpu` menyimpan `OCR_USE_GPU=false` ke `.env`.
|
||||
- `-OcrMode gpu` menyimpan `OCR_USE_GPU=true` ke `.env`.
|
||||
- Script tidak menghapus package Paddle/CUDA yang sudah terpasang.
|
||||
- Dalam mode GPU, script akan memasang `paddlepaddle-gpu` dan runtime cuDNN/cuBLAS jika belum ada.
|
||||
- Dalam mode CPU, script hanya memasang `paddlepaddle` CPU jika belum ada runtime Paddle sama sekali.
|
||||
|
||||
## Catatan
|
||||
|
||||
Mode CPU tidak membutuhkan CUDA, cuDNN, atau driver NVIDIA.
|
||||
|
||||
Mode GPU membutuhkan NVIDIA driver dan runtime CUDA/cuDNN yang cocok. Pada Windows, backend juga menambahkan folder DLL NVIDIA dari `.venv` secara otomatis sebelum PaddleOCR diinisialisasi.
|
||||
|
||||
`TABLES_ENABLED` adalah konfigurasi terpisah dari mode CPU/GPU. Jika PP-Structure belum stabil di environment lokal, biarkan:
|
||||
|
||||
```env
|
||||
TABLES_ENABLED=false
|
||||
```
|
||||
@@ -28,13 +28,6 @@ from functools import partial
|
||||
from typing import Annotated
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
# Thread pool dedicated to blocking OCR work. Using a *separate* pool
|
||||
# (rather than the default loop executor) lets us cap the number of
|
||||
# concurrent heavy OCR jobs independently of other thread-pool users.
|
||||
# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
|
||||
# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
|
||||
_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
|
||||
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
@@ -73,6 +66,13 @@ from ocr_sprint.schemas.review import (
|
||||
from ocr_sprint.storage.blob import get_blob_storage
|
||||
from ocr_sprint.utils.logging import get_logger
|
||||
|
||||
# Thread pool dedicated to blocking OCR work. Using a *separate* pool
|
||||
# (rather than the default loop executor) lets us cap the number of
|
||||
# concurrent heavy OCR jobs independently of other thread-pool users.
|
||||
# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
|
||||
# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
|
||||
_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/documents",
|
||||
tags=["documents"],
|
||||
@@ -99,18 +99,17 @@ def _row_to_response(row: object) -> DocumentResponse:
|
||||
|
||||
assert isinstance(row, JobRow)
|
||||
status_enum = DocumentStatus(row.status)
|
||||
personel_list = None
|
||||
result_obj = None
|
||||
if row.result is not None:
|
||||
result_obj = ExtractionResult.model_validate(row.result)
|
||||
# Auto-number personnel entries sequentially (1, 2, 3, ...)
|
||||
for idx, entry in enumerate(result_obj.personel, start=1):
|
||||
entry.no = idx
|
||||
personel_list = result_obj.personel
|
||||
return DocumentResponse(
|
||||
job_id=row.job_id,
|
||||
status=status_enum,
|
||||
confidence=row.confidence,
|
||||
data=personel_list,
|
||||
data=result_obj,
|
||||
review_flags=list(row.review_flags or []),
|
||||
error=row.error,
|
||||
approved=bool(row.approved),
|
||||
|
||||
@@ -6,6 +6,7 @@ from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from ocr_sprint import __version__
|
||||
from ocr_sprint.config import get_settings
|
||||
from ocr_sprint.pipeline import ocr as _ocr
|
||||
from ocr_sprint.pipeline import table as _table
|
||||
|
||||
@@ -21,15 +22,18 @@ async def health() -> dict[str, str]:
|
||||
@router.get("/health/ready")
|
||||
async def readiness() -> JSONResponse:
|
||||
"""Readiness check — returns 200 when OCR models are loaded, 503 if still warming up."""
|
||||
settings = get_settings()
|
||||
ocr_ready = _ocr._instance is not None
|
||||
table_ready = _table._instance is not None
|
||||
table_ready = (not settings.tables_enabled) or _table._instance is not None
|
||||
ready = ocr_ready and table_ready
|
||||
payload = {
|
||||
"status": "ready" if ready else "warming_up",
|
||||
"version": __version__,
|
||||
"models": {
|
||||
"paddleocr": "ready" if ocr_ready else "loading",
|
||||
"pp_structure": "ready" if table_ready else "loading",
|
||||
"pp_structure": (
|
||||
"disabled" if not settings.tables_enabled else "ready" if table_ready else "loading"
|
||||
),
|
||||
},
|
||||
}
|
||||
return JSONResponse(content=payload, status_code=200 if ready else 503)
|
||||
|
||||
@@ -88,6 +88,17 @@ def create_app() -> FastAPI:
|
||||
)
|
||||
|
||||
register_error_handlers(app)
|
||||
|
||||
# CORS — allow frontend dev servers and production origins
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.add_middleware(MetricsMiddleware)
|
||||
app.include_router(health.router, prefix="/api/v1")
|
||||
app.include_router(documents.router, prefix="/api/v1")
|
||||
|
||||
@@ -122,9 +122,14 @@ class OCRPage:
|
||||
|
||||
|
||||
def _build_paddleocr() -> PaddleOCR:
|
||||
s = get_settings()
|
||||
if s.ocr_use_gpu:
|
||||
from ocr_sprint.utils.gpu import configure_nvidia_dll_path
|
||||
|
||||
configure_nvidia_dll_path()
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
s = get_settings()
|
||||
kwargs: dict[str, object] = {
|
||||
"lang": s.ocr_lang,
|
||||
"use_angle_cls": True,
|
||||
|
||||
@@ -67,24 +67,41 @@ class DetectedTable:
|
||||
# ---------- PP-Structure singleton ----------
|
||||
|
||||
|
||||
def _create_pp_structure(
|
||||
pp_structure_cls: type[PPStructure], pp_lang: str, use_gpu: bool
|
||||
) -> PPStructure:
|
||||
# layout=True so that PP-Structure also returns figure/text regions; we
|
||||
# filter to tables only afterwards. show_log=False to keep stdout clean.
|
||||
return pp_structure_cls(
|
||||
lang=pp_lang,
|
||||
use_gpu=use_gpu,
|
||||
layout=True,
|
||||
show_log=False,
|
||||
)
|
||||
|
||||
|
||||
def _build_pp_structure() -> PPStructure:
|
||||
s = get_settings()
|
||||
if s.ocr_use_gpu:
|
||||
from ocr_sprint.utils.gpu import configure_nvidia_dll_path
|
||||
|
||||
configure_nvidia_dll_path()
|
||||
|
||||
from paddleocr import PPStructure
|
||||
|
||||
s = get_settings()
|
||||
# PPStructure layout models only support 'en' and 'ch', not 'latin'.
|
||||
# Use 'en' for layout/table detection — it's language-agnostic (detects
|
||||
# table structure, not text language). OCR within cells still works for
|
||||
# Indonesian text because the recognition model handles Latin scripts.
|
||||
pp_lang = "en" if s.ocr_lang not in ("en", "ch") else s.ocr_lang
|
||||
_logger.info("pp_structure.init", lang=pp_lang, use_gpu=s.ocr_use_gpu)
|
||||
# layout=True so that PP-Structure also returns figure/text regions; we
|
||||
# filter to tables only afterwards. show_log=False to keep stdout clean.
|
||||
return PPStructure(
|
||||
lang=pp_lang,
|
||||
use_gpu=s.ocr_use_gpu,
|
||||
layout=True,
|
||||
show_log=False,
|
||||
)
|
||||
try:
|
||||
return _create_pp_structure(PPStructure, pp_lang, s.ocr_use_gpu)
|
||||
except Exception as exc:
|
||||
if not s.ocr_use_gpu:
|
||||
raise
|
||||
_logger.warning("pp_structure.gpu_init_failed_falling_back_cpu", error=str(exc))
|
||||
return _create_pp_structure(PPStructure, pp_lang, False)
|
||||
|
||||
|
||||
def get_pp_structure() -> PPStructure:
|
||||
|
||||
@@ -10,7 +10,6 @@ from uuid import UUID, uuid4
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from ocr_sprint.schemas.extraction import ExtractionResult
|
||||
from ocr_sprint.schemas.personnel import PersonnelEntry
|
||||
|
||||
|
||||
class SourceKind(str, Enum):
|
||||
@@ -53,7 +52,7 @@ class DocumentResponse(BaseModel):
|
||||
job_id: UUID
|
||||
status: DocumentStatus
|
||||
confidence: float | None = None
|
||||
data: list[PersonnelEntry] | None = None
|
||||
data: ExtractionResult | None = None
|
||||
review_flags: list[str] = Field(default_factory=list)
|
||||
error: str | None = None
|
||||
# Phase 6 — HITL review state.
|
||||
|
||||
57
src/ocr_sprint/utils/gpu.py
Normal file
57
src/ocr_sprint/utils/gpu.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""GPU runtime helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
_DLL_HANDLES: list[object] = []
|
||||
_CONFIGURED = False
|
||||
|
||||
|
||||
def configure_nvidia_dll_path() -> None:
|
||||
"""Expose NVIDIA wheel DLL directories to the Windows dynamic loader.
|
||||
|
||||
Paddle's Windows GPU wheels dynamically load CUDA/cuDNN DLLs by name. When
|
||||
those DLLs come from Python packages such as ``nvidia-cudnn-cu11`` instead
|
||||
of a system-wide CUDA Toolkit install, their ``bin`` folders are not on
|
||||
``PATH`` by default.
|
||||
"""
|
||||
global _CONFIGURED
|
||||
if _CONFIGURED or os.name != "nt":
|
||||
return
|
||||
|
||||
package_names = ("nvidia.cudnn", "nvidia.cublas", "nvidia.cuda_nvrtc")
|
||||
dll_dirs: list[Path] = []
|
||||
for package_name in package_names:
|
||||
try:
|
||||
module = __import__(package_name, fromlist=["__file__"])
|
||||
except Exception:
|
||||
continue
|
||||
module_file = getattr(module, "__file__", None)
|
||||
if not module_file:
|
||||
continue
|
||||
dll_dir = Path(module_file).resolve().parent / "bin"
|
||||
if dll_dir.is_dir():
|
||||
dll_dirs.append(dll_dir)
|
||||
|
||||
if not dll_dirs:
|
||||
_CONFIGURED = True
|
||||
return
|
||||
|
||||
current_path_parts = os.environ.get("PATH", "").split(os.pathsep)
|
||||
current_path_norm = {part.casefold() for part in current_path_parts if part}
|
||||
|
||||
prepend: list[str] = []
|
||||
for dll_dir in dll_dirs:
|
||||
dll_dir_str = str(dll_dir)
|
||||
if dll_dir_str.casefold() not in current_path_norm:
|
||||
prepend.append(dll_dir_str)
|
||||
add_dll_directory = getattr(os, "add_dll_directory", None)
|
||||
if add_dll_directory is not None:
|
||||
_DLL_HANDLES.append(add_dll_directory(dll_dir_str))
|
||||
|
||||
if prepend:
|
||||
os.environ["PATH"] = os.pathsep.join([*prepend, os.environ.get("PATH", "")])
|
||||
|
||||
_CONFIGURED = True
|
||||
@@ -2,8 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from types import ModuleType, SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from ocr_sprint.pipeline import table as table_module
|
||||
from ocr_sprint.pipeline.table import (
|
||||
DetectedTable,
|
||||
extract_tables_from_pp_result,
|
||||
@@ -82,6 +86,34 @@ class TestDetectedTable:
|
||||
assert table.n_cols == 0
|
||||
|
||||
|
||||
class TestPpStructureInit:
|
||||
def test_gpu_init_falls_back_to_cpu(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
calls: list[dict[str, object]] = []
|
||||
|
||||
class FakePPStructure:
|
||||
def __init__(self, **kwargs: object) -> None:
|
||||
calls.append(kwargs)
|
||||
if kwargs["use_gpu"]:
|
||||
raise RuntimeError("gpu init failed")
|
||||
|
||||
fake_paddleocr = ModuleType("paddleocr")
|
||||
fake_paddleocr.PPStructure = FakePPStructure
|
||||
monkeypatch.setitem(sys.modules, "paddleocr", fake_paddleocr)
|
||||
monkeypatch.setattr(
|
||||
table_module,
|
||||
"get_settings",
|
||||
lambda: SimpleNamespace(ocr_lang="latin", ocr_use_gpu=True),
|
||||
)
|
||||
|
||||
engine = table_module._build_pp_structure()
|
||||
|
||||
assert isinstance(engine, FakePPStructure)
|
||||
assert calls == [
|
||||
{"lang": "en", "use_gpu": True, "layout": True, "show_log": False},
|
||||
{"lang": "en", "use_gpu": False, "layout": True, "show_log": False},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_personnel_table() -> DetectedTable:
|
||||
"""Header + three personnel rows in a typical Polres-level format."""
|
||||
|
||||
166
update.ps1
166
update.ps1
@@ -1,24 +1,178 @@
|
||||
#!/usr/bin/env pwsh
|
||||
# update.ps1 - One-command update & restart for ocr-sprint-service (local dev)
|
||||
|
||||
param(
|
||||
[ValidateSet("cpu", "gpu")]
|
||||
[string] $OcrMode
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
$Port = 8000
|
||||
$ProjectRoot = $PSScriptRoot
|
||||
$VenvDir = Join-Path $ProjectRoot ".venv"
|
||||
$Python = Join-Path $VenvDir "Scripts\python.exe"
|
||||
|
||||
function Invoke-Step {
|
||||
param(
|
||||
[Parameter(Mandatory = $true)]
|
||||
[scriptblock] $Command,
|
||||
[Parameter(Mandatory = $true)]
|
||||
[string] $FailureMessage
|
||||
)
|
||||
|
||||
& $Command
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host " $FailureMessage" -ForegroundColor Red
|
||||
exit $LASTEXITCODE
|
||||
}
|
||||
}
|
||||
|
||||
function Get-DotEnvValue {
|
||||
param(
|
||||
[Parameter(Mandatory = $true)]
|
||||
[string] $Name
|
||||
)
|
||||
|
||||
$envFile = Join-Path $ProjectRoot ".env"
|
||||
if (Test-Path $envFile) {
|
||||
$line = Get-Content $envFile | Where-Object { $_ -match "^\s*$Name\s*=" } | Select-Object -Last 1
|
||||
if ($line) {
|
||||
return (($line -split "=", 2)[1] -split "\s+#", 2)[0].Trim()
|
||||
}
|
||||
}
|
||||
return [Environment]::GetEnvironmentVariable($Name)
|
||||
}
|
||||
|
||||
function Set-DotEnvValue {
|
||||
param(
|
||||
[Parameter(Mandatory = $true)]
|
||||
[string] $Name,
|
||||
[Parameter(Mandatory = $true)]
|
||||
[string] $Value
|
||||
)
|
||||
|
||||
$envFile = Join-Path $ProjectRoot ".env"
|
||||
if (-not (Test-Path $envFile)) {
|
||||
New-Item -Path $envFile -ItemType File | Out-Null
|
||||
}
|
||||
|
||||
$lines = @(Get-Content $envFile)
|
||||
$updated = $false
|
||||
for ($i = 0; $i -lt $lines.Count; $i++) {
|
||||
if ($lines[$i] -match "^\s*$Name\s*=") {
|
||||
$comment = ""
|
||||
if ($lines[$i] -match "(\s+#.*)$") {
|
||||
$comment = $Matches[1]
|
||||
}
|
||||
$lines[$i] = "$Name=$Value$comment"
|
||||
$updated = $true
|
||||
}
|
||||
}
|
||||
if (-not $updated) {
|
||||
$lines += "$Name=$Value"
|
||||
}
|
||||
Set-Content -Path $envFile -Value $lines
|
||||
}
|
||||
|
||||
function Test-PythonPackage {
|
||||
param(
|
||||
[Parameter(Mandatory = $true)]
|
||||
[string] $Name
|
||||
)
|
||||
|
||||
& $Python -m pip show $Name *> $null
|
||||
return $LASTEXITCODE -eq 0
|
||||
}
|
||||
|
||||
function Add-NvidiaDllPaths {
|
||||
$dllDirs = @(
|
||||
(Join-Path $VenvDir "Lib\site-packages\nvidia\cudnn\bin"),
|
||||
(Join-Path $VenvDir "Lib\site-packages\nvidia\cublas\bin"),
|
||||
(Join-Path $VenvDir "Lib\site-packages\nvidia\cuda_nvrtc\bin")
|
||||
)
|
||||
foreach ($dir in $dllDirs) {
|
||||
if ((Test-Path $dir) -and (($env:PATH -split ";") -notcontains $dir)) {
|
||||
$env:PATH = "$dir;$env:PATH"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Set-Location $ProjectRoot
|
||||
|
||||
if (-not (Test-Path $Python)) {
|
||||
Write-Host "Virtualenv not found at $VenvDir. Creating one..." -ForegroundColor Yellow
|
||||
$venvCreated = $false
|
||||
$pythonLauncher = Get-Command py -ErrorAction SilentlyContinue
|
||||
if ($pythonLauncher) {
|
||||
foreach ($version in @("3.12", "3.11", "3.10")) {
|
||||
& py "-$version" -m venv $VenvDir 2>$null
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
$venvCreated = $true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if (-not $venvCreated) {
|
||||
$systemPython = Get-Command python -ErrorAction SilentlyContinue
|
||||
if (-not $systemPython) {
|
||||
Write-Host " Python was not found. Install Python 3.10-3.12, then rerun this script." -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
& python -m venv $VenvDir
|
||||
$venvCreated = ($LASTEXITCODE -eq 0)
|
||||
}
|
||||
if (-not $venvCreated) {
|
||||
Write-Host " Failed to create virtualenv." -ForegroundColor Red
|
||||
exit $LASTEXITCODE
|
||||
}
|
||||
}
|
||||
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
$env:PATH = "$(Join-Path $VenvDir 'Scripts');$env:PATH"
|
||||
|
||||
if ($PSBoundParameters.ContainsKey("OcrMode")) {
|
||||
$ocrUseGpuValue = if ($OcrMode -eq "gpu") { "true" } else { "false" }
|
||||
Set-DotEnvValue "OCR_USE_GPU" $ocrUseGpuValue
|
||||
$env:OCR_USE_GPU = $ocrUseGpuValue
|
||||
Write-Host "OCR mode set to $($OcrMode.ToUpperInvariant()) and saved to .env." -ForegroundColor Green
|
||||
}
|
||||
|
||||
# ── [1/5] Git pull ──────────────────────────────────────────────────────────
|
||||
Write-Host "`n[1/5] Pulling latest code..." -ForegroundColor Cyan
|
||||
git pull
|
||||
Invoke-Step { git pull } "Git pull failed."
|
||||
|
||||
# ── [2/5] Install/update dependencies ───────────────────────────────────────
|
||||
Write-Host "`n[2/5] Installing/updating dependencies..." -ForegroundColor Cyan
|
||||
pip install -e ".[dev]" -q
|
||||
Invoke-Step { & $Python -m pip install -e ".[dev]" -q } "Dependency install failed."
|
||||
|
||||
$ocrUseGpu = (Get-DotEnvValue "OCR_USE_GPU")
|
||||
if ($ocrUseGpu -and $ocrUseGpu.ToLowerInvariant() -in @("1", "true", "yes", "on")) {
|
||||
Write-Host " GPU mode enabled; checking Paddle CUDA runtime..." -ForegroundColor Cyan
|
||||
if (-not (Test-PythonPackage "paddlepaddle-gpu")) {
|
||||
Invoke-Step {
|
||||
& $Python -m pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ -q
|
||||
} "Paddle GPU install failed."
|
||||
}
|
||||
if (-not (Test-PythonPackage "nvidia-cudnn-cu11")) {
|
||||
Invoke-Step { & $Python -m pip install nvidia-cudnn-cu11==8.9.5.29 -q } "NVIDIA cuDNN install failed."
|
||||
}
|
||||
Add-NvidiaDllPaths
|
||||
} else {
|
||||
Write-Host " CPU mode enabled; checking Paddle CPU runtime..." -ForegroundColor Cyan
|
||||
if (-not ((Test-PythonPackage "paddlepaddle") -or (Test-PythonPackage "paddlepaddle-gpu"))) {
|
||||
Invoke-Step { & $Python -m pip install paddlepaddle==2.6.2 -q } "Paddle CPU install failed."
|
||||
}
|
||||
}
|
||||
|
||||
# ── [3/5] Database migration ─────────────────────────────────────────────────
|
||||
Write-Host "`n[3/5] Running database migrations..." -ForegroundColor Cyan
|
||||
alembic upgrade head
|
||||
& $Python -m alembic upgrade head
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host " Migration conflict detected, stamping current state as head..." -ForegroundColor Yellow
|
||||
alembic stamp head
|
||||
Invoke-Step { & $Python -m alembic stamp head } "Alembic stamp failed."
|
||||
Write-Host " Retrying upgrade for any remaining new migrations..." -ForegroundColor Yellow
|
||||
alembic upgrade head
|
||||
& $Python -m alembic upgrade head
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host " Migration still failed. Please check alembic manually." -ForegroundColor Red
|
||||
exit 1
|
||||
@@ -57,4 +211,4 @@ if ($connections) {
|
||||
|
||||
# ── [5/5] Start dev server ───────────────────────────────────────────────────
|
||||
Write-Host "`n[5/5] Starting dev server on port $Port (Ctrl+C to stop)..." -ForegroundColor Cyan
|
||||
uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port
|
||||
& $Python -m uvicorn ocr_sprint.main:app --reload --host 0.0.0.0 --port $Port
|
||||
|
||||
Reference in New Issue
Block a user