cv-analyser / Dockerfile.hf-ocr
Dzunisani007's picture
Optimize Docker build process and dependencies
78bd45b
FROM python:3.11-slim
# Install system-level dependencies for OCR and PDF processing
RUN apt-get update && apt-get install -y \
build-essential \
curl \
git \
python3-dev \
libmagic-dev \
tesseract-ocr \
tesseract-ocr-eng \
libtesseract-dev \
poppler-utils \
libgl1-mesa-glx \
libglib2.0-0 \
&& apt-get install -y libmagic1 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Upgrade pip
RUN pip install --no-cache-dir --upgrade pip
# Copy requirements and install Python dependencies
COPY requirements.hf.txt requirements.txt
# Pre-install heavy engines to avoid build conflicts
RUN pip install --no-cache-dir torch==2.1.1 numpy==1.24.4
RUN FORCE_CUDA=0 pip install --no-cache-dir -r requirements.txt
# Re-add OCR dependencies to requirements
RUN pip install --no-cache-dir pytesseract pdf2image pdfplumber
# Copy application code
COPY . .
# Expose port for Hugging Face Spaces
EXPOSE 7860
# Command to run the application
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]