File size: 1,428 Bytes
707c746
3e50743
 
7131f6c
7ff382b
6cba908
7ff382b
 
 
 
 
 
 
 
 
00bd4d6
7ff382b
 
998196e
636bd97
 
6520f0e
477cb33
 
6db6156
2298350
3db094b
 
6520f0e
2514e86
707c746
6db6156
6520f0e
7131f6c
 
823ee94
7131f6c
6db6156
823ee94
79d7c2e
 
6db6156
6520f0e
6db6156
 
 
6520f0e
 
6db6156
6520f0e
2c4789e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive

# Update and install necessary dependencies
RUN apt update && \
    apt install --no-install-recommends -y \
        build-essential \
        python3 \
        python3-pip \
        wget \
        curl \
        git \
        cmake \
        zlib1g-dev \
        libblas-dev && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Download ggml and mmproj models from HuggingFace
RUN wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf && \
    wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf 

# Clone and build llava-server with CUDA support
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
    cd llama.cpp && \
    git submodule init && \
    git submodule update && \
    make

# Create a non-root user for security reasons
RUN useradd -m -u 1000 user && \
    mkdir -p /home/user/app && \
    cp /app/ggml-model-q4_k.gguf /home/user/app && \
    cp /app/mmproj-model-f16.gguf /home/user/app

RUN chown user:user /home/user/app/ggml-model-q4_k.gguf && \
    chown user:user /home/user/app/mmproj-model-f16.gguf

USER user
ENV HOME=/home/user

WORKDIR $HOME/app

# Expose the port
EXPOSE 8080

# Start the llava-server with models
CMD ["/app/llama.cpp/server", "--model", "ggml-model-q4_k.gguf", "--mmproj", "mmproj-model-f16.gguf", "--host", "0.0.0.0", "--threads", "10"]