pdf2markdown / Dockerfile
broadfield-dev's picture
Update Dockerfile
7aec7b3 verified
# Use an official Python runtime as a parent image
FROM python:3.10-slim
# Install system dependencies (poppler-utils and tesseract-ocr)
RUN apt-get update && apt-get install -y \
poppler-utils \
tesseract-ocr \
libtesseract-dev \
&& rm -rf /var/lib/apt/lists/*
# Create a non-root user
RUN useradd -m appuser
# Set working directory
WORKDIR /app
# Copy requirements.txt and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the app code and templates
COPY app.py .
COPY templates /app/templates/
# If you add a static folder in the future for CSS/JS:
# COPY static /app/static/
# Switch to non-root user
USER appuser
# Expose the port the app runs on (for documentation, HF handles actual mapping)
# Gunicorn will bind to this port. Hugging Face Spaces typically expect apps on 7860.
EXPOSE 7860
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV FLASK_APP=app.py
# Recommended: Set FLASK_ENV to production for real deployments,
# but gunicorn handles this role better than Flask dev server.
# ENV FLASK_ENV=production
# Run the Flask app with Gunicorn
# The 'app:app' means Gunicorn should look for an object named 'app' in a file named 'app.py'.
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120", "app:app"]