# https://huggingface.co/spaces/gaunernst/layoutlm-docvqa-paddleocr/blob/main/Dockerfile FROM python:3.10 ENV CUDA_VISIBLE_DEVICES=-1 ARG PIP_NO_CACHE_DIR=1 # libssl1.1 for PaddlePaddle - 修复版本 RUN apt-get update && apt-get install -y wget ca-certificates && \ (apt-get install -y libssl1.1 || \ (wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \ dpkg -i libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \ rm libssl1.1_1.1.1f-1ubuntu2_amd64.deb)) && \ apt-get clean && rm -rf /var/lib/apt/lists/* # 安装PyTorch CPU版本 RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu # 直接升级到最新的PaddlePaddle 3.0 RUN pip install paddlepaddle==3.0.0 || \ pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple # 安装基础依赖 RUN pip install transformers gradio Pillow fastapi uvicorn # 先降级NumPy并锁定版本,然后安装PaddleOCR RUN pip install "numpy==1.26.4" && \ pip install paddleocr==3.0.3 \ && pip uninstall -y opencv-python opencv-contrib-python \ && pip install "opencv-python-headless<4.11" "numpy==1.26.4" --force-reinstall # PaddleOCR 3.0额外依赖 RUN pip install shapely pyclipper "numpy==1.26.4" --force-reinstall # 创建用户 RUN useradd -m -u 1000 user USER user ENV HOME=/home/user WORKDIR $HOME/app # 创建输出目录 RUN mkdir -p output # 复制文件 COPY --chown=user app.py $HOME/app COPY --chown=user table.jpg $HOME/app COPY --chown=user word_1.jpg $HOME/app # 使用PaddleOCR 3.0的新CLI命令预热模型 RUN paddleocr ocr -i ./word_1.jpg --lang ch --device cpu RUN paddleocr table_structure -i ./table.jpg --lang ch --device cpu || echo "Table structure preload completed" # 启动命令 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]