# syntax=docker/dockerfile:1.6 # CPU-only image for the OCR Sprint API. # PaddleOCR + PyMuPDF + OpenCV-headless work on plain Debian without poppler. FROM python:3.11-slim AS base ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PIP_NO_CACHE_DIR=1 \ DEBIAN_FRONTEND=noninteractive # System deps for OpenCV, libmagic, PaddlePaddle, and image format support. RUN apt-get update && apt-get install -y --no-install-recommends \ libgl1 \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender1 \ libgomp1 \ libmagic1 \ ca-certificates \ curl \ && rm -rf /var/lib/apt/lists/* WORKDIR /app # ----- builder layer (install deps separately for caching) ----- FROM base AS builder COPY pyproject.toml README.md ./ COPY src/ ./src/ # `[ocr]` pulls Paddle wheels (~1.5 GB). `[dev]` keeps test+lint deps so # that `make test` works inside the image. RUN pip install --upgrade pip && pip install ".[ocr,dev]" # ----- runtime layer ----- FROM base AS runtime COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages COPY --from=builder /usr/local/bin /usr/local/bin COPY pyproject.toml README.md alembic.ini ./ COPY src/ ./src/ COPY alembic/ ./alembic/ # Pre-create cache dirs so PaddleOCR can write models on first run, and # the blob storage root so the API can write uploads as the unprivileged # `app` user. RUN mkdir -p /home/app/.paddleocr /app/storage/blobs \ && useradd --create-home --uid 1000 app \ && chown -R app:app /home/app /app USER app EXPOSE 8000 HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ CMD curl -fsS http://localhost:8000/api/v1/health || exit 1 CMD ["uvicorn", "ocr_sprint.main:app", "--host", "0.0.0.0", "--port", "8000"]