Spaces:

muryshev
/

llama-cpp-server-7b

Runtime error

llama-cpp-server-7b / Dockerfile

Set up threads number

5dea8c2 over 1 year ago

616 Bytes

	ARG UBUNTU_VERSION=22.04

	FROM ubuntu:$UBUNTU_VERSION as build

	RUN apt-get update && \
	apt-get install -y build-essential git wget

	WORKDIR /app
	RUN git clone https://github.com/ggerganov/llama.cpp.git

	WORKDIR /app/llama.cpp
	RUN make

	WORKDIR /data
	RUN wget https://huggingface.co/IlyaGusev/saiga2_7b_gguf/resolve/main/model-q8_0.gguf -nv -O model.gguf

	FROM ubuntu:$UBUNTU_VERSION as runtime

	WORKDIR /app

	COPY --from=build /app/llama.cpp/server /app
	COPY --from=build /data/model.gguf /data/model.gguf

	ENV LC_ALL=C.utf8

	EXPOSE 7860

	CMD ./server -m /data/model.gguf -c 4096 -t 2 --port 7860 --host 0.0.0.0