paddle-ocr-api / Dockerfile
TheOneHong's picture
Update Dockerfile
65454e0 verified
# https://huggingface.co/spaces/gaunernst/layoutlm-docvqa-paddleocr/blob/main/Dockerfile
FROM python:3.10
ENV CUDA_VISIBLE_DEVICES=-1
ARG PIP_NO_CACHE_DIR=1
# libssl1.1 for PaddlePaddle - 修复版本
RUN apt-get update && apt-get install -y wget ca-certificates && \
(apt-get install -y libssl1.1 || \
(wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
dpkg -i libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
rm libssl1.1_1.1.1f-1ubuntu2_amd64.deb)) && \
apt-get clean && rm -rf /var/lib/apt/lists/*
# 安装PyTorch CPU版本
RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu
# 直接升级到最新的PaddlePaddle 3.0
RUN pip install paddlepaddle==3.0.0 || \
pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
# 安装基础依赖
RUN pip install transformers gradio Pillow fastapi uvicorn
# 先降级NumPy并锁定版本,然后安装PaddleOCR
RUN pip install "numpy==1.26.4" && \
pip install paddleocr==3.0.3 \
&& pip uninstall -y opencv-python opencv-contrib-python \
&& pip install "opencv-python-headless<4.11" "numpy==1.26.4" --force-reinstall
# PaddleOCR 3.0额外依赖
RUN pip install shapely pyclipper "numpy==1.26.4" --force-reinstall
# 创建用户
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user
WORKDIR $HOME/app
# 创建输出目录
RUN mkdir -p output
# 复制文件
COPY --chown=user app.py $HOME/app
COPY --chown=user table.jpg $HOME/app
COPY --chown=user word_1.jpg $HOME/app
# 使用PaddleOCR 3.0的新CLI命令预热模型
RUN paddleocr ocr -i ./word_1.jpg --lang ch --device cpu
RUN paddleocr table_structure -i ./table.jpg --lang ch --device cpu || echo "Table structure preload completed"
# 启动命令
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]