kai-sheng's picture
Update Dockerfile
2a71a10 verified
raw
history blame
932 Bytes
FROM python:3.9
# Set environment variable for Tesseract data directory
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata/
# Install tesseract-ocr
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
&& add-apt-repository -y ppa:alex-p/tesseract-ocr \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
tesseract-ocr-all \
&& rm -rf /var/lib/apt/lists/*
# Install Tesseract and its language data files
# RUN apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-chi-sim
# RUN apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-msa
# Set environment variable for Tesseract executable path
ENV PATH="${PATH}:/usr/bin/"
WORKDIR /code
COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
COPY . .
CMD ["gunicorn", "-b", "0.0.0.0:7860", "main:app"]