pdf2markdown / Dockerfile
broadfield-dev's picture
Update Dockerfile
dbea75b verified
raw
history blame
877 Bytes
# Use an official Python runtime as a parent image
FROM python:3.10-slim
# Install system dependencies (poppler-utils and tesseract-ocr)
RUN apt-get update && apt-get install -y \
poppler-utils \
tesseract-ocr \
libtesseract-dev \
&& rm -rf /var/lib/apt/lists/*
# Create a non-root user
RUN useradd -m appuser
# Set working directory
WORKDIR /app
# Copy requirements.txt and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the app code
COPY app.py .
# Create a writable directory for Gradio flagging
RUN mkdir -p /tmp/flagged && chown -R appuser:appuser /tmp/flagged
# Switch to non-root user
USER appuser
# Expose the port Gradio will run on
EXPOSE 7860
# Set environment variables
ENV GRADIO_SERVER_NAME="0.0.0.0"
ENV GRADIO_SERVER_PORT=7860
# Run the Gradio app
CMD ["python", "app.py"]