update
This commit is contained in:
@@ -10,7 +10,10 @@ flow on top:
|
||||
* `POST /documents?sync=true` — runs the pipeline inline (the original
|
||||
Phase 1 behaviour). Useful for tests and
|
||||
small-volume single-tenant deploys without
|
||||
a Celery worker.
|
||||
a Celery worker. The heavy OCR work is
|
||||
offloaded to a thread-pool executor so the
|
||||
uvicorn event loop stays responsive during
|
||||
processing (~30-120s on CPU).
|
||||
* `GET /documents/{job_id}` — returns the current job state. Async
|
||||
clients poll this until `status` is in a
|
||||
terminal state (completed / needs_review /
|
||||
@@ -19,9 +22,19 @@ flow on top:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from functools import partial
|
||||
from typing import Annotated
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
# Thread pool dedicated to blocking OCR work. Using a *separate* pool
|
||||
# (rather than the default loop executor) lets us cap the number of
|
||||
# concurrent heavy OCR jobs independently of other thread-pool users.
|
||||
# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
|
||||
# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
|
||||
_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
|
||||
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
@@ -165,11 +178,13 @@ async def create_document(
|
||||
|
||||
|
||||
async def _run_inline(job_id: UUID, content: bytes) -> DocumentResponse:
|
||||
"""Synchronous pipeline execution.
|
||||
"""Run the OCR pipeline without blocking the uvicorn event loop.
|
||||
|
||||
Each state transition opens its own short session so the request-scoped
|
||||
session's rollback-on-exception behaviour cannot wipe out the
|
||||
``mark_failed`` write or strand the blob on disk.
|
||||
``run_pipeline`` is CPU-bound and can take 30-120 s on a 2 vCPU server.
|
||||
Awaiting it directly on the async handler would freeze the entire event
|
||||
loop (and therefore block health-checks, metrics, and every other request)
|
||||
for the full duration. We push the work onto a dedicated single-thread
|
||||
executor so the loop stays free while the OCR runs in the background.
|
||||
"""
|
||||
import time
|
||||
|
||||
@@ -177,8 +192,13 @@ async def _run_inline(job_id: UUID, content: bytes) -> DocumentResponse:
|
||||
JobRepository(s).mark_processing(job_id)
|
||||
|
||||
started = time.perf_counter()
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
output = run_pipeline(content)
|
||||
# run_pipeline is synchronous; wrap it so asyncio can await it.
|
||||
output = await loop.run_in_executor(
|
||||
_OCR_EXECUTOR,
|
||||
partial(run_pipeline, content),
|
||||
)
|
||||
except ValueError as exc:
|
||||
with session_scope() as s:
|
||||
JobRepository(s).mark_failed(job_id, error=str(exc))
|
||||
|
||||
@@ -3,8 +3,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from ocr_sprint import __version__
|
||||
from ocr_sprint.pipeline import ocr as _ocr
|
||||
from ocr_sprint.pipeline import table as _table
|
||||
|
||||
router = APIRouter(tags=["health"])
|
||||
|
||||
@@ -13,3 +16,20 @@ router = APIRouter(tags=["health"])
|
||||
async def health() -> dict[str, str]:
|
||||
"""Lightweight liveness check — does NOT touch the OCR engine."""
|
||||
return {"status": "ok", "version": __version__}
|
||||
|
||||
|
||||
@router.get("/health/ready")
|
||||
async def readiness() -> JSONResponse:
|
||||
"""Readiness check — returns 200 when OCR models are loaded, 503 if still warming up."""
|
||||
ocr_ready = _ocr._instance is not None
|
||||
table_ready = _table._instance is not None
|
||||
ready = ocr_ready and table_ready
|
||||
payload = {
|
||||
"status": "ready" if ready else "warming_up",
|
||||
"version": __version__,
|
||||
"models": {
|
||||
"paddleocr": "ready" if ocr_ready else "loading",
|
||||
"pp_structure": "ready" if table_ready else "loading",
|
||||
},
|
||||
}
|
||||
return JSONResponse(content=payload, status_code=200 if ready else 503)
|
||||
|
||||
Reference in New Issue
Block a user