llama-cpp-server-7b / Dockerfile
muryshev's picture
Set up threads number
5dea8c2
raw
history blame
616 Bytes
ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION as build
RUN apt-get update && \
apt-get install -y build-essential git wget
WORKDIR /app
RUN git clone https://github.com/ggerganov/llama.cpp.git
WORKDIR /app/llama.cpp
RUN make
WORKDIR /data
RUN wget https://huggingface.co/IlyaGusev/saiga2_7b_gguf/resolve/main/model-q8_0.gguf -nv -O model.gguf
FROM ubuntu:$UBUNTU_VERSION as runtime
WORKDIR /app
COPY --from=build /app/llama.cpp/server /app
COPY --from=build /data/model.gguf /data/model.gguf
ENV LC_ALL=C.utf8
EXPOSE 7860
CMD ./server -m /data/model.gguf -c 4096 -t 2 --port 7860 --host 0.0.0.0