File size: 1,854 Bytes
72f3ed4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f05828e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72f3ed4
 
 
 
 
 
b9601cd
 
 
 
 
 
 
 
 
72f3ed4
7432331
 
 
 
d24ce98
 
7432331
72f3ed4
 
 
7432331
72f3ed4
7432331
b9601cd
23f9321
bf1733a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
FROM python:3.11.9-slim-bullseye

USER root

RUN adduser --quiet --disabled-password \
    --home /home/conv_user \
    --shell /bin/bash conv_user
RUN adduser conv_user sudo

WORKDIR /srv
ENV PYTHONPATH="/srv"

RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*

RUN apt-get -y update
RUN apt-get install -y --no-install-recommends \
    build-essential \
    python3-dev \
    libglib2.0-0 \
    libgl1-mesa-glx \
    libpq-dev \
    make \
    cuda

# Add NVIDIA CUDA repository
RUN apt-get update && apt-get install -y wget
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb
RUN dpkg -i cuda-keyring_1.1-1_all.deb
RUN apt-get update

RUN apt-get install -y --no-install-recommends \
    build-essential \
    python3-dev \
    libglib2.0-0 \
    libgl1-mesa-glx \
    libpq-dev \
    make \
    cuda-toolkit-12-* \
    nvidia-cuda-toolkit

# Set CUDA environment variables
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}

# insall latest vllm

RUN pip install uv
RUN uv venv --python 3.11 --python-preference managed
RUN . .venv/bin/activate
RUN uv pip install setuptools

RUN uv pip install vllm==0.7.2

# Install transformers from source, need specific revision for Qwen2.5 as 
# breaking changes in master
RUN uv pip install --upgrade git+https://github.com/huggingface/transformers.git@336dc69d63d56f232a183a3e7f52790429b871ef

RUN uv pip install ray[adag]
RUN uv pip install cupy-cuda12x

ENTRYPOINT ["/bin/bash", "-c", ". .venv/bin/activate && export VLLM_USE_V1=1 && python -m vllm.entrypoints.openai.api_server --model convergence-ai/proxy-lite --trust-remote-code --tokenizer-pool-size 20 --limit-mm-per-prompt image=1 --enable-auto-tool-choice --tool-call-parser hermes --port 7860"]