update

2026-04-26 22:08:41 +08:00
parent 5d9d9f784a
commit 9d969e61fd
6 changed files with 149 additions and 7 deletions
--- a/src/ocr_sprint/api/routes/documents.py
+++ b/src/ocr_sprint/api/routes/documents.py
@@ -10,7 +10,10 @@ flow on top:
 * `POST /documents?sync=true` — runs the pipeline inline (the original
                                 Phase 1 behaviour). Useful for tests and
                                 small-volume single-tenant deploys without
-                                 a Celery worker.
+                                 a Celery worker. The heavy OCR work is
+                                 offloaded to a thread-pool executor so the
+                                 uvicorn event loop stays responsive during
+                                 processing (~30-120s on CPU).
 * `GET  /documents/{job_id}`  — returns the current job state. Async
                                 clients poll this until `status` is in a
                                 terminal state (completed / needs_review /
@@ -19,9 +22,19 @@ flow on top:

 from __future__ import annotations

+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
 from typing import Annotated
 from uuid import UUID, uuid4

+# Thread pool dedicated to blocking OCR work. Using a *separate* pool
+# (rather than the default loop executor) lets us cap the number of
+# concurrent heavy OCR jobs independently of other thread-pool users.
+# With 1 Celery worker + 1 sync slot we never exceed 2 parallel OCR
+# runs; keep the pool at 1 so RAM stays bounded on the 7.4 GB server.
+_OCR_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ocr-inline")
+
 from fastapi import (
    APIRouter,
    Depends,
@@ -165,11 +178,13 @@ async def create_document(


 async def _run_inline(job_id: UUID, content: bytes) -> DocumentResponse:
-    """Synchronous pipeline execution.
+    """Run the OCR pipeline without blocking the uvicorn event loop.

-    Each state transition opens its own short session so the request-scoped
-    session's rollback-on-exception behaviour cannot wipe out the
-    ``mark_failed`` write or strand the blob on disk.
+    ``run_pipeline`` is CPU-bound and can take 30-120 s on a 2 vCPU server.
+    Awaiting it directly on the async handler would freeze the entire event
+    loop (and therefore block health-checks, metrics, and every other request)
+    for the full duration. We push the work onto a dedicated single-thread
+    executor so the loop stays free while the OCR runs in the background.
    """
    import time

@@ -177,8 +192,13 @@ async def _run_inline(job_id: UUID, content: bytes) -> DocumentResponse:
        JobRepository(s).mark_processing(job_id)

    started = time.perf_counter()
+    loop = asyncio.get_event_loop()
    try:
-        output = run_pipeline(content)
+        # run_pipeline is synchronous; wrap it so asyncio can await it.
+        output = await loop.run_in_executor(
+            _OCR_EXECUTOR,
+            partial(run_pipeline, content),
+        )
    except ValueError as exc:
        with session_scope() as s:
            JobRepository(s).mark_failed(job_id, error=str(exc))
--- a/src/ocr_sprint/api/routes/health.py
+++ b/src/ocr_sprint/api/routes/health.py
@@ -3,8 +3,11 @@
 from __future__ import annotations

 from fastapi import APIRouter
+from fastapi.responses import JSONResponse

 from ocr_sprint import __version__
+from ocr_sprint.pipeline import ocr as _ocr
+from ocr_sprint.pipeline import table as _table

 router = APIRouter(tags=["health"])

@@ -13,3 +16,20 @@ router = APIRouter(tags=["health"])
 async def health() -> dict[str, str]:
    """Lightweight liveness check — does NOT touch the OCR engine."""
    return {"status": "ok", "version": __version__}
+
+
+@router.get("/health/ready")
+async def readiness() -> JSONResponse:
+    """Readiness check — returns 200 when OCR models are loaded, 503 if still warming up."""
+    ocr_ready = _ocr._instance is not None
+    table_ready = _table._instance is not None
+    ready = ocr_ready and table_ready
+    payload = {
+        "status": "ready" if ready else "warming_up",
+        "version": __version__,
+        "models": {
+            "paddleocr": "ready" if ocr_ready else "loading",
+            "pp_structure": "ready" if table_ready else "loading",
+        },
+    }
+    return JSONResponse(content=payload, status_code=200 if ready else 503)