# ==== App ==== APP_ENV=local # local | dev | staging | prod APP_HOST=0.0.0.0 APP_PORT=8000 APP_LOG_LEVEL=INFO # ==== Storage (Phase 1: local filesystem) ==== STORAGE_LOCAL_DIR=./storage # ==== OCR ==== OCR_LANG=latin # PaddleOCR lang code; "latin" works well for Bahasa Indonesia OCR_USE_GPU=false # set true if running on a GPU host # Leave empty to use PaddleOCR defaults. OCR_DET_MODEL_DIR= OCR_REC_MODEL_DIR= OCR_CLS_MODEL_DIR= OCR_MAX_IMAGE_SIDE=2200 # downscale longest side before OCR # ==== Preprocessing ==== PREPROCESS_TARGET_DPI=300 PREPROCESS_DENOISE=true PREPROCESS_DESKEW=true PREPROCESS_ADAPTIVE_THRESHOLD=false # turn on for low-quality phone photos # ==== Document detection (Phase 2, IMAGE sources only) ==== PREPROCESS_DETECT_DOCUMENT=true PREPROCESS_REMOVE_SHADOW=true PREPROCESS_MIN_QUAD_AREA_FRACTION=0.20 # ==== Table extraction (Phase 3, PaddleOCR PP-Structure) ==== TABLES_ENABLED=true # ==== Confidence / routing (Phase 5) ==== CONFIDENCE_AUTO_APPROVE=0.95 CONFIDENCE_NEEDS_REVIEW=0.85 # ==== LLM (Phase 5, optional) ==== LLM_ENABLED=false LLM_PROVIDER=ollama LLM_MODEL=qwen2.5:1.5b # CPU-friendly default LLM_BASE_URL=http://localhost:11434 LLM_TIMEOUT_S=60 # ==== Async pipeline + persistence (Phase 4) ==== QUEUE_ENABLED=false # POST /documents queues async when true REDIS_URL=redis://localhost:6379/0 CELERY_TASK_DEFAULT_QUEUE=ocr_sprint # Persistence: sqlite for local dev, Postgres for production via docker-compose. DATABASE_URL=sqlite:///./storage/ocr_sprint.sqlite DATABASE_ECHO=false # Blob storage: local filesystem only for the MVP (no S3/MinIO). BLOB_STORAGE_DIR=./storage/blobs BLOB_MAX_UPLOAD_MB=25 # Auth: comma-separated list of accepted API keys. Empty = auth disabled # (local dev only; production must set at least one). API_KEYS= API_KEY_HEADER=X-API-Key