Spaces:

rbantog
/

llama-cpp-server

Running

llama-cpp-server / Dockerfile

initial commit

4f0cbe4 5 months ago

1.29 kB

	# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
	# you will also find guides on how best to write your Dockerfile

	FROM ubuntu:20.04

	ARG MODEL_DOWNLOAD_LINK
	ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK:-https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf?download=true}

	ENV DEBIAN_FRONTEND=noninteractive

	RUN useradd -m -u 1000 user
	USER user
	ENV PATH="/home/user/.local/bin:$PATH"

	WORKDIR /app


	COPY --chown=user . /app

	USER root

	RUN apt-get update && apt-get install -y git cmake build-essential g++ wget curl python3

	RUN curl -fsSL https://deb.nodesource.com/setup_18.x \| bash -
	RUN apt-get install -y nodejs

	USER user

	RUN python3 replace_hw.py
	RUN git clone https://github.com/ggerganov/llama.cpp.git

	WORKDIR /app/llama.cpp
	RUN git apply ../helloworld.patch

	WORKDIR /app/llama.cpp/examples/server/webui
	RUN npm i
	RUN npm run build

	WORKDIR /app/llama.cpp
	RUN cmake -B build -DBUILD_SHARED_LIBS=OFF
	RUN cmake --build build --config Release -j 8

	WORKDIR /app/llama.cpp/build/bin
	RUN wget -nv -O local_model.gguf ${MODEL_DOWNLOAD_LINK}
	CMD ["/app/llama.cpp/build/bin/llama-server", "--host", "0.0.0.0","--port","8080", "-c", "2048","-m","local_model.gguf", "--cache-type-k", "q8_0" ]