docker_mineru / Dockerfile
marcosremar2's picture
Feat: Enable CUDA support
e432715
raw
history blame
2.58 kB
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
# Install base dependencies
RUN apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y \
python3.10 \
python3.10-venv \
python3.10-distutils \
python3-pip \
wget \
git \
libgl1 \
libreoffice \
fonts-noto-cjk \
fonts-wqy-zenhei \
fonts-wqy-microhei \
ttf-mscorefonts-installer \
fontconfig \
libglib2.0-0 \
libxrender1 \
libsm6 \
libxext6 \
poppler-utils && \
rm -rf /var/lib/apt/lists/*
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
# Set up a non-root user
RUN useradd -m -u 1000 user
# Set home directory and update PATH
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH
# Set the working directory
WORKDIR $HOME/app
# Copy requirements first (as user)
COPY --chown=user requirements.txt .
# Install Python dependencies (as root to manage venv properly)
# Note: Ensure PyTorch installed picks up CUDA from the base image
RUN python3 -m venv /opt/mineru_venv && \
. /opt/mineru_venv/bin/activate && \
pip install --upgrade pip && \
pip install -r requirements.txt
# Download model script and config template, set permissions (as root)
RUN wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O $HOME/app/download_models.py && \
chmod +x $HOME/app/download_models.py && \
wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json -O $HOME/app/magic-pdf.json && \
chown user:user $HOME/app/download_models.py $HOME/app/magic-pdf.json
# Create output directory and set ownership (as root)
RUN mkdir -p $HOME/app/output/images && \
chown -R user:user $HOME/app/output
# Switch to non-root user
USER user
# Run model download script and configure magic-pdf for CUDA
RUN . /opt/mineru_venv/bin/activate && \
python3 $HOME/app/download_models.py && \
cp $HOME/app/magic-pdf.json $HOME/magic-pdf.json && \
sed -i 's|"device": "cpu"|"device": "cuda"|g' $HOME/magic-pdf.json
# Copy the rest of the application code as the user
COPY --chown=user . .
# Ensure the output dir still has correct permissions
RUN chmod -R 755 $HOME/app/output
# Expose the port (optional but good practice)
EXPOSE 7860
# Run the application
CMD ["/opt/mineru_venv/bin/uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]