Files
OCR-SPRIN-SERVICE/docs/DEPLOYMENT-MANUAL.md

18 KiB

Deployment Manual OCR Sprint Service (Tanpa Docker)

Panduan lengkap deployment OCR Sprint Service langsung di server tanpa menggunakan Docker.

Prasyarat Server

Spesifikasi Minimum

  • OS: Ubuntu 20.04+ / Debian 11+ / RHEL 8+
  • CPU: 4 cores (8 cores recommended)
  • RAM: 8 GB minimum (16 GB recommended)
  • Storage: 50 GB free space
  • User: Non-root user dengan sudo access

Port yang Dibutuhkan

  • 8000: API server (internal, akan di-proxy oleh Nginx)
  • 80/443: HTTP/HTTPS (Nginx)
  • 5432: PostgreSQL (localhost only)
  • 6379: Redis (localhost only)

Langkah 1: Install System Dependencies

Ubuntu/Debian

# Update system
sudo apt update && sudo apt upgrade -y

# Install Python 3.11
sudo apt install -y software-properties-common
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt update
sudo apt install -y python3.11 python3.11-venv python3.11-dev python3-pip

# Install system libraries untuk OpenCV dan PaddleOCR
sudo apt install -y \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender1 \
    libgomp1 \
    libmagic1 \
    build-essential \
    git \
    curl \
    wget

# Install Redis
sudo apt install -y redis-server
sudo systemctl enable redis-server
sudo systemctl start redis-server

# Install PostgreSQL
sudo apt install -y postgresql postgresql-contrib
sudo systemctl enable postgresql
sudo systemctl start postgresql

RHEL/CentOS/Rocky Linux

# Update system
sudo dnf update -y

# Install Python 3.11
sudo dnf install -y python3.11 python3.11-devel python3.11-pip

# Install system libraries
sudo dnf install -y \
    mesa-libGL \
    glib2 \
    libSM \
    libXext \
    libXrender \
    file-libs \
    gcc \
    gcc-c++ \
    make \
    git

# Install Redis
sudo dnf install -y redis
sudo systemctl enable redis
sudo systemctl start redis

# Install PostgreSQL
sudo dnf install -y postgresql-server postgresql-contrib
sudo postgresql-setup --initdb
sudo systemctl enable postgresql
sudo systemctl start postgresql

Langkah 2: Setup Database PostgreSQL

# Masuk sebagai postgres user
sudo -u postgres psql

# Jalankan SQL commands berikut:
-- Create user dan database
CREATE USER ocr WITH PASSWORD 'ganti-dengan-password-kuat';
CREATE DATABASE ocr_sprint OWNER ocr;

-- Grant privileges
GRANT ALL PRIVILEGES ON DATABASE ocr_sprint TO ocr;

-- Connect ke database
\c ocr_sprint

-- Grant schema privileges (PostgreSQL 15+)
GRANT ALL ON SCHEMA public TO ocr;

-- Exit
\q

Konfigurasi PostgreSQL untuk remote access (opsional):

# Edit postgresql.conf
sudo nano /etc/postgresql/14/main/postgresql.conf

# Uncomment dan ubah:
listen_addresses = 'localhost'  # Tetap localhost untuk keamanan

# Edit pg_hba.conf
sudo nano /etc/postgresql/14/main/pg_hba.conf

# Tambahkan line:
local   ocr_sprint      ocr                                     scram-sha-256

# Restart PostgreSQL
sudo systemctl restart postgresql

Langkah 3: Setup Application User

# Create dedicated user untuk aplikasi
sudo useradd -m -s /bin/bash ocr
sudo usermod -aG sudo ocr  # Opsional, untuk maintenance

# Create application directory
sudo mkdir -p /opt/ocr-sprint-service
sudo chown ocr:ocr /opt/ocr-sprint-service

# Switch ke user ocr
sudo su - ocr

Langkah 4: Install Application

# Clone repository
cd /opt
git clone https://github.com/Adriankf59/ocr-sprint-service.git
cd ocr-sprint-service

# Create virtual environment
python3.11 -m venv .venv

# Activate virtual environment
source .venv/bin/activate

# Upgrade pip
pip install --upgrade pip setuptools wheel

# Install application dengan OCR dependencies
pip install -e ".[ocr]"

# Verify installation
python -c "import paddleocr; print('PaddleOCR installed successfully')"

Langkah 5: Konfigurasi Application

# Copy environment template
cp .env.example .env

# Edit konfigurasi
nano .env

Konfigurasi production (/opt/ocr-sprint-service/.env):

# ==== App ====
APP_ENV=prod
APP_HOST=0.0.0.0
APP_PORT=8000
APP_LOG_LEVEL=INFO

# ==== Storage ====
STORAGE_LOCAL_DIR=/opt/ocr-sprint-service/storage
BLOB_STORAGE_DIR=/opt/ocr-sprint-service/storage/blobs
BLOB_MAX_UPLOAD_MB=25

# ==== OCR ====
OCR_LANG=latin
OCR_USE_GPU=false
OCR_MAX_IMAGE_SIDE=2200

# ==== Preprocessing ====
PREPROCESS_TARGET_DPI=300
PREPROCESS_DENOISE=true
PREPROCESS_DESKEW=true
PREPROCESS_DETECT_DOCUMENT=true
PREPROCESS_REMOVE_SHADOW=true
PREPROCESS_MIN_QUAD_AREA_FRACTION=0.20

# ==== Table Extraction ====
TABLES_ENABLED=true

# ==== Confidence ====
CONFIDENCE_AUTO_APPROVE=0.95
CONFIDENCE_NEEDS_REVIEW=0.85

# ==== LLM (Phase 5, optional) ====
LLM_ENABLED=false

# ==== Async Pipeline ====
QUEUE_ENABLED=true
REDIS_URL=redis://localhost:6379/0
CELERY_TASK_DEFAULT_QUEUE=ocr_sprint

# ==== Database ====
DATABASE_URL=postgresql+psycopg://ocr:ganti-dengan-password-kuat@localhost:5432/ocr_sprint
DATABASE_ECHO=false

# ==== Auth (WAJIB!) ====
API_KEYS=key1-ganti-dengan-random-string,key2-ganti-dengan-random-string
API_KEY_HEADER=X-API-Key

Generate secure API keys:

# Generate 2 API keys
openssl rand -hex 32
openssl rand -hex 32

Create storage directories:

mkdir -p /opt/ocr-sprint-service/storage/blobs
chmod 755 /opt/ocr-sprint-service/storage

Langkah 6: Run Database Migrations

# Masih sebagai user ocr, dengan venv activated
cd /opt/ocr-sprint-service
source .venv/bin/activate

# Run migrations
alembic upgrade head

# Verify
alembic current

Langkah 7: Test Manual Run

# Test API server
uvicorn ocr_sprint.main:app --host 0.0.0.0 --port 8000

# Di terminal lain, test health check
curl http://localhost:8000/api/v1/health

# Jika berhasil, stop dengan Ctrl+C

Langkah 8: Setup Systemd Services

API Service

# Exit dari user ocr, kembali ke user dengan sudo
exit

# Create systemd service file
sudo nano /etc/systemd/system/ocr-sprint-api.service

Content /etc/systemd/system/ocr-sprint-api.service:

[Unit]
Description=OCR Sprint API Service
After=network.target postgresql.service redis.service
Wants=postgresql.service redis.service

[Service]
Type=simple
User=ocr
Group=ocr
WorkingDirectory=/opt/ocr-sprint-service

# Environment
Environment="PATH=/opt/ocr-sprint-service/.venv/bin:/usr/local/bin:/usr/bin:/bin"
EnvironmentFile=/opt/ocr-sprint-service/.env

# Start command - 4 workers untuk production
ExecStart=/opt/ocr-sprint-service/.venv/bin/uvicorn \
    ocr_sprint.main:app \
    --host 0.0.0.0 \
    --port 8000 \
    --workers 4 \
    --log-level info

# Restart policy
Restart=always
RestartSec=10
StartLimitInterval=0

# Resource limits
LimitNOFILE=65536
MemoryLimit=6G

# Security
NoNewPrivileges=true
PrivateTmp=true

[Install]
WantedBy=multi-user.target

Celery Worker Service

sudo nano /etc/systemd/system/ocr-sprint-worker.service

Content /etc/systemd/system/ocr-sprint-worker.service:

[Unit]
Description=OCR Sprint Celery Worker
After=network.target postgresql.service redis.service ocr-sprint-api.service
Wants=postgresql.service redis.service

[Service]
Type=simple
User=ocr
Group=ocr
WorkingDirectory=/opt/ocr-sprint-service

# Environment
Environment="PATH=/opt/ocr-sprint-service/.venv/bin:/usr/local/bin:/usr/bin:/bin"
EnvironmentFile=/opt/ocr-sprint-service/.env

# Start command - concurrency 2 untuk 4 core CPU
ExecStart=/opt/ocr-sprint-service/.venv/bin/celery \
    -A ocr_sprint.worker.celery_app \
    worker \
    --loglevel=info \
    --concurrency=2 \
    --max-tasks-per-child=100

# Restart policy
Restart=always
RestartSec=10
StartLimitInterval=0

# Resource limits
LimitNOFILE=65536
MemoryLimit=4G

# Security
NoNewPrivileges=true
PrivateTmp=true

[Install]
WantedBy=multi-user.target

Enable dan Start Services

# Reload systemd
sudo systemctl daemon-reload

# Enable services (auto-start on boot)
sudo systemctl enable ocr-sprint-api
sudo systemctl enable ocr-sprint-worker

# Start services
sudo systemctl start ocr-sprint-api
sudo systemctl start ocr-sprint-worker

# Check status
sudo systemctl status ocr-sprint-api
sudo systemctl status ocr-sprint-worker

# View logs
sudo journalctl -u ocr-sprint-api -f
sudo journalctl -u ocr-sprint-worker -f

Langkah 9: Setup Nginx Reverse Proxy

Install Nginx

sudo apt install -y nginx certbot python3-certbot-nginx

Konfigurasi Nginx

sudo nano /etc/nginx/sites-available/ocr-sprint

Content /etc/nginx/sites-available/ocr-sprint:

# Upstream untuk load balancing (jika scale horizontal)
upstream ocr_api {
    server 127.0.0.1:8000;
    keepalive 32;
}

# Rate limiting
limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s;

server {
    listen 80;
    server_name ocr.yourdomain.com;  # Ganti dengan domain Anda

    # Max upload size (sesuaikan dengan BLOB_MAX_UPLOAD_MB)
    client_max_body_size 30M;
    client_body_buffer_size 128k;

    # Timeouts untuk dokumen besar
    proxy_connect_timeout 300s;
    proxy_send_timeout 300s;
    proxy_read_timeout 300s;
    send_timeout 300s;

    # Logging
    access_log /var/log/nginx/ocr-sprint-access.log;
    error_log /var/log/nginx/ocr-sprint-error.log;

    # API endpoints
    location /api/ {
        # Rate limiting
        limit_req zone=api_limit burst=20 nodelay;

        proxy_pass http://ocr_api;
        proxy_http_version 1.1;
        
        # Headers
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header Connection "";
        
        # Disable buffering untuk streaming responses
        proxy_buffering off;
    }

    # Health check endpoint (no rate limit)
    location /api/v1/health {
        proxy_pass http://ocr_api;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
        access_log off;
    }

    # Metrics endpoint (restrict access)
    location /metrics {
        # Allow only from internal network
        allow 10.0.0.0/8;
        allow 172.16.0.0/12;
        allow 192.168.0.0/16;
        allow 127.0.0.1;
        deny all;

        proxy_pass http://ocr_api;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
    }

    # Docs (opsional, bisa di-disable di production)
    location /docs {
        proxy_pass http://ocr_api;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
    }

    location /redoc {
        proxy_pass http://ocr_api;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
    }
}

Enable Site

# Test konfigurasi
sudo nginx -t

# Enable site
sudo ln -s /etc/nginx/sites-available/ocr-sprint /etc/nginx/sites-enabled/

# Remove default site (opsional)
sudo rm /etc/nginx/sites-enabled/default

# Reload Nginx
sudo systemctl reload nginx

Setup SSL dengan Let's Encrypt

# Install certbot
sudo apt install -y certbot python3-certbot-nginx

# Obtain certificate (ganti dengan domain Anda)
sudo certbot --nginx -d ocr.yourdomain.com

# Test auto-renewal
sudo certbot renew --dry-run

Certbot akan otomatis mengupdate konfigurasi Nginx untuk HTTPS.

Langkah 10: Setup Firewall

# Install UFW (jika belum ada)
sudo apt install -y ufw

# Allow SSH (PENTING! Jangan sampai terkunci)
sudo ufw allow 22/tcp

# Allow HTTP dan HTTPS
sudo ufw allow 80/tcp
sudo ufw allow 443/tcp

# Enable firewall
sudo ufw enable

# Check status
sudo ufw status

Langkah 11: Verifikasi Deployment

Test dari Server

# Health check
curl http://localhost:8000/api/v1/health

# Test dengan API key
curl -X POST http://localhost:8000/api/v1/documents?sync=true \
  -H "X-API-Key: your-api-key-here" \
  -F "file=@/path/to/test.pdf"

Test dari Client

# Health check via domain
curl https://ocr.yourdomain.com/api/v1/health

# Upload dokumen
curl -X POST https://ocr.yourdomain.com/api/v1/documents \
  -H "X-API-Key: your-api-key-here" \
  -F "file=@document.pdf"

Monitoring dan Maintenance

View Logs

# API logs
sudo journalctl -u ocr-sprint-api -f

# Worker logs
sudo journalctl -u ocr-sprint-worker -f

# Nginx logs
sudo tail -f /var/log/nginx/ocr-sprint-access.log
sudo tail -f /var/log/nginx/ocr-sprint-error.log

# PostgreSQL logs
sudo tail -f /var/log/postgresql/postgresql-14-main.log

Service Management

# Restart services
sudo systemctl restart ocr-sprint-api
sudo systemctl restart ocr-sprint-worker

# Stop services
sudo systemctl stop ocr-sprint-api
sudo systemctl stop ocr-sprint-worker

# Check status
sudo systemctl status ocr-sprint-api
sudo systemctl status ocr-sprint-worker

Database Backup

# Create backup script
sudo nano /opt/ocr-sprint-service/backup.sh

Content backup.sh:

#!/bin/bash
BACKUP_DIR="/opt/ocr-sprint-service/backups"
DATE=$(date +%Y%m%d_%H%M%S)

mkdir -p $BACKUP_DIR

# Backup database
pg_dump -U ocr -h localhost ocr_sprint | gzip > $BACKUP_DIR/db_$DATE.sql.gz

# Backup blobs (opsional, bisa besar)
# tar -czf $BACKUP_DIR/blobs_$DATE.tar.gz /opt/ocr-sprint-service/storage/blobs

# Keep only last 7 days
find $BACKUP_DIR -name "db_*.sql.gz" -mtime +7 -delete

echo "Backup completed: $DATE"
# Make executable
chmod +x /opt/ocr-sprint-service/backup.sh

# Setup cron job (daily at 2 AM)
sudo crontab -e

# Add line:
0 2 * * * /opt/ocr-sprint-service/backup.sh >> /var/log/ocr-backup.log 2>&1

Log Rotation

sudo nano /etc/logrotate.d/ocr-sprint

Content:

/var/log/nginx/ocr-sprint-*.log {
    daily
    rotate 14
    compress
    delaycompress
    notifempty
    create 0640 www-data adm
    sharedscripts
    postrotate
        [ -f /var/run/nginx.pid ] && kill -USR1 `cat /var/run/nginx.pid`
    endscript
}

Update Application

# Switch ke user ocr
sudo su - ocr
cd /opt/ocr-sprint-service

# Pull latest code
git pull

# Activate venv
source .venv/bin/activate

# Update dependencies
pip install -e ".[ocr]"

# Run migrations
alembic upgrade head

# Exit user ocr
exit

# Restart services
sudo systemctl restart ocr-sprint-api
sudo systemctl restart ocr-sprint-worker

# Check logs
sudo journalctl -u ocr-sprint-api -n 50

Performance Tuning

Increase Worker Concurrency

# Edit worker service
sudo nano /etc/systemd/system/ocr-sprint-worker.service

# Ubah --concurrency sesuai CPU cores
# Untuk 8 cores: --concurrency=4
# Untuk 16 cores: --concurrency=8

# Reload dan restart
sudo systemctl daemon-reload
sudo systemctl restart ocr-sprint-worker

PostgreSQL Tuning

sudo nano /etc/postgresql/14/main/postgresql.conf

Recommended settings untuk 16GB RAM:

shared_buffers = 4GB
effective_cache_size = 12GB
maintenance_work_mem = 1GB
checkpoint_completion_target = 0.9
wal_buffers = 16MB
default_statistics_target = 100
random_page_cost = 1.1
effective_io_concurrency = 200
work_mem = 10MB
min_wal_size = 1GB
max_wal_size = 4GB
max_worker_processes = 4
max_parallel_workers_per_gather = 2
max_parallel_workers = 4
sudo systemctl restart postgresql

Redis Tuning

sudo nano /etc/redis/redis.conf

Recommended settings:

maxmemory 2gb
maxmemory-policy allkeys-lru
save ""  # Disable RDB snapshots untuk performance
sudo systemctl restart redis

Troubleshooting

Service tidak start

# Check logs
sudo journalctl -u ocr-sprint-api -n 100 --no-pager
sudo journalctl -u ocr-sprint-worker -n 100 --no-pager

# Check permissions
ls -la /opt/ocr-sprint-service
ls -la /opt/ocr-sprint-service/storage

# Test manual run
sudo su - ocr
cd /opt/ocr-sprint-service
source .venv/bin/activate
uvicorn ocr_sprint.main:app --host 0.0.0.0 --port 8000

Database connection error

# Test connection
sudo -u ocr psql -h localhost -U ocr -d ocr_sprint

# Check PostgreSQL status
sudo systemctl status postgresql

# Check pg_hba.conf
sudo cat /etc/postgresql/14/main/pg_hba.conf | grep ocr

Redis connection error

# Test Redis
redis-cli ping

# Check Redis status
sudo systemctl status redis

# Check Redis logs
sudo journalctl -u redis -n 50

PaddleOCR model download gagal

# Download manual
sudo su - ocr
cd /opt/ocr-sprint-service
source .venv/bin/activate

python << EOF
from paddleocr import PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='latin')
print("Models downloaded successfully")
EOF

Out of memory

# Check memory usage
free -h
htop

# Reduce worker concurrency
sudo nano /etc/systemd/system/ocr-sprint-worker.service
# Ubah --concurrency=1

# Add swap (jika perlu)
sudo fallocate -l 4G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab

Security Checklist

  • API keys diganti dengan nilai random yang kuat
  • Database password diganti dari default
  • Firewall enabled (UFW) - hanya port 22, 80, 443 terbuka
  • SSL/TLS enabled via Let's Encrypt
  • /metrics endpoint restricted ke internal network
  • Nginx rate limiting configured
  • PostgreSQL hanya listen di localhost
  • Redis hanya listen di localhost
  • Regular backup configured (cron job)
  • Log rotation configured
  • OS security updates enabled (unattended-upgrades)
  • Fail2ban installed untuk SSH protection

Monitoring dengan Prometheus (Opsional)

Install Prometheus

# Download Prometheus
cd /tmp
wget https://github.com/prometheus/prometheus/releases/download/v2.45.0/prometheus-2.45.0.linux-amd64.tar.gz
tar xvfz prometheus-*.tar.gz
sudo mv prometheus-2.45.0.linux-amd64 /opt/prometheus

# Create user
sudo useradd --no-create-home --shell /bin/false prometheus

# Create directories
sudo mkdir /etc/prometheus /var/lib/prometheus
sudo chown prometheus:prometheus /var/lib/prometheus

Configure Prometheus

sudo nano /etc/prometheus/prometheus.yml

Content:

global:
  scrape_interval: 15s

scrape_configs:
  - job_name: 'ocr-sprint'
    static_configs:
      - targets: ['localhost:8000']
    metrics_path: '/metrics'

Create Systemd Service

sudo nano /etc/systemd/system/prometheus.service

Content:

[Unit]
Description=Prometheus
After=network.target

[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/opt/prometheus/prometheus \
    --config.file=/etc/prometheus/prometheus.yml \
    --storage.tsdb.path=/var/lib/prometheus/

[Install]
WantedBy=multi-user.target
sudo systemctl daemon-reload
sudo systemctl enable prometheus
sudo systemctl start prometheus

Access Prometheus di http://localhost:9090

Support

Untuk pertanyaan atau issues, hubungi tim development.