Spaces:

matthoffner
/

ggml-mllm

Paused

File size: 1,998 Bytes

1693bec
3e50743
 
7131f6c
44cac5d
6cba908
44cac5d
98cbe10
 
 
44cac5d
6cba908
79f1838
6cba908
 
 
 
 
 
79f1838
636bd97
 
 
6520f0e
7131f6c
 
6db6156
2298350
05c8990
 
6520f0e
 
2298350
6db6156
6520f0e
7131f6c
 
 
 
6db6156
 
6520f0e
6db6156
 
 
6520f0e
 
6db6156
6d815dd
 
6520f0e
b8e402b

FROM nvidia/cuda:12.2.0-devel-ubuntu20.04

ENV DEBIAN_FRONTEND=noninteractive

# Add NVIDIA CUDA Repository
RUN apt update && apt install -y wget gnupg && \
    wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
    mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub && \
    add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"

# Install Specific libcublas Version
RUN apt update && \
    apt install -y libcublas-12-2=12.2.5.6 libcublas-dev-12-2=12.2.5.6 && \
    apt clean && rm -rf /var/lib/apt/lists/*

# Install Remaining Packages
RUN apt update && \
    apt install --no-install-recommends -y cuda-libraries-12-2 cuda-libraries-dev-12-2 build-essential python3 python3-pip curl git cmake zlib1g-dev libblas-dev && \
    apt clean && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Download ggml and mmproj models from HuggingFace
RUN wget https://huggingface.co/mys/ggml_llava-v1.5-13b/raw/main/ggml-model-q4_k.gguf && \
    wget https://huggingface.co/mys/ggml_llava-v1.5-13b/raw/main/mmproj-model-f16.gguf 

# Clone and build llava-server with CUDA support
RUN git clone https://github.com/matthoffner/llava-cpp-server.git && \
    cd llava-cpp-server && \
    git submodule init && \
    git submodule update && \
    LLAMA_CUBLAS=1 make

# Create a non-root user for security reasons
RUN useradd -m -u 1000 user && \
    mkdir -p /home/user/app && \
    cp /app/ggml-model-q4_k.gguf /home/user/app && \
    cp /app/mmproj-model-f16.gguf /home/user/app

USER user
ENV HOME=/home/user

WORKDIR $HOME/app

# Expose the port
EXPOSE 8080

RUN ls -al

# Start the llava-server with models
CMD ["/app/llava-cpp-server/bin/llava-server", "-m", "/home/user/app/ggml-model-q4_k.gguf", "--mmproj", "/home/user/app/mmproj-model-f16.gguf", "--host", "0.0.0