Spaces:
Paused
Paused
File size: 1,998 Bytes
1693bec 3e50743 7131f6c 44cac5d 6cba908 44cac5d 98cbe10 44cac5d 6cba908 79f1838 6cba908 79f1838 636bd97 6520f0e 7131f6c 6db6156 2298350 05c8990 6520f0e 2298350 6db6156 6520f0e 7131f6c 6db6156 6520f0e 6db6156 6520f0e 6db6156 6d815dd 6520f0e b8e402b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04
ENV DEBIAN_FRONTEND=noninteractive
# Add NVIDIA CUDA Repository
RUN apt update && apt install -y wget gnupg && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub && \
add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
# Install Specific libcublas Version
RUN apt update && \
apt install -y libcublas-12-2=12.2.5.6 libcublas-dev-12-2=12.2.5.6 && \
apt clean && rm -rf /var/lib/apt/lists/*
# Install Remaining Packages
RUN apt update && \
apt install --no-install-recommends -y cuda-libraries-12-2 cuda-libraries-dev-12-2 build-essential python3 python3-pip curl git cmake zlib1g-dev libblas-dev && \
apt clean && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Download ggml and mmproj models from HuggingFace
RUN wget https://huggingface.co/mys/ggml_llava-v1.5-13b/raw/main/ggml-model-q4_k.gguf && \
wget https://huggingface.co/mys/ggml_llava-v1.5-13b/raw/main/mmproj-model-f16.gguf
# Clone and build llava-server with CUDA support
RUN git clone https://github.com/matthoffner/llava-cpp-server.git && \
cd llava-cpp-server && \
git submodule init && \
git submodule update && \
LLAMA_CUBLAS=1 make
# Create a non-root user for security reasons
RUN useradd -m -u 1000 user && \
mkdir -p /home/user/app && \
cp /app/ggml-model-q4_k.gguf /home/user/app && \
cp /app/mmproj-model-f16.gguf /home/user/app
USER user
ENV HOME=/home/user
WORKDIR $HOME/app
# Expose the port
EXPOSE 8080
RUN ls -al
# Start the llava-server with models
CMD ["/app/llava-cpp-server/bin/llava-server", "-m", "/home/user/app/ggml-model-q4_k.gguf", "--mmproj", "/home/user/app/mmproj-model-f16.gguf", "--host", "0.0.0
|