Spaces:

matthoffner
/

ggml-mllm

Paused

File size: 1,833 Bytes

7ff382b
6bcce8b
3e50743
 
7131f6c
7ff382b
6cba908
7ff382b
 
2f314f4
7ff382b
 
 
 
 
 
 
00bd4d6
7ff382b
 
00bd4d6
7ff382b
 
 
 
998196e
636bd97
 
6520f0e
477cb33
 
6db6156
2298350
3db094b
 
6520f0e
2514e86
7a1592b
6db6156
6520f0e
7131f6c
 
823ee94
7131f6c
6db6156
823ee94
79d7c2e
 
6db6156
6520f0e
6db6156
 
 
6520f0e
 
6db6156
6520f0e
78d322f

# Base image
FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive

# Update and install necessary dependencies
RUN apt update && \
    apt install --no-install-recommends -y \
        build-essential \
        nvidia-cuda-toolkit \
        python3 \
        python3-pip \
        wget \
        curl \
        git \
        cmake \
        zlib1g-dev \
        libblas-dev && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*
    
# Setting up CUDA environment variables (this may not be necessary since you're using the official nvidia/cuda image, but it's good to be explicit)
ENV PATH="/usr/local/cuda/bin:$PATH" \
    LD_LIBRARY_PATH="/usr/local/cuda/lib64:$LD_LIBRARY_PATH" \
    CUDA_HOME="/usr/local/cuda"

WORKDIR /app

# Download ggml and mmproj models from HuggingFace
RUN wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf && \
    wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf 

# Clone and build llava-server with CUDA support
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
    cd llama.cpp && \
    git submodule init && \
    git submodule update && \
    make LLAMA_CUBLAS=1

# Create a non-root user for security reasons
RUN useradd -m -u 1000 user && \
    mkdir -p /home/user/app && \
    cp /app/ggml-model-q4_k.gguf /home/user/app && \
    cp /app/mmproj-model-f16.gguf /home/user/app

RUN chown user:user /home/user/app/ggml-model-q4_k.gguf && \
    chown user:user /home/user/app/mmproj-model-f16.gguf

USER user
ENV HOME=/home/user

WORKDIR $HOME/app

# Expose the port
EXPOSE 8080

# Start the llava-server with models
CMD ["/app/llama.cpp/server", "--model", "ggml-model-q4_k.gguf", "--mmproj", "mmproj-model-f16.gguf", "--host", "0.0.0.0", "--threads", "4", "-ngl", "30", "-ts", "100,0"]