Spaces:

convergence-ai
/

demo-api

Running on A100

App Files Files Community

XanderJC commited on Feb 18

Commit

4e0631e

1 Parent(s): e8211bd

reset

Browse files

Files changed (1) hide show

Dockerfile +4 -35

Dockerfile CHANGED Viewed

@@ -19,43 +19,12 @@ RUN apt-get install -y --no-install-recommends \
     libglib2.0-0 \
     libgl1-mesa-glx \
     libpq-dev \
-    make \
-    cuda
-# Add NVIDIA CUDA repository
-RUN apt-get update && apt-get install -y wget
-RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb
-RUN dpkg -i cuda-keyring_1.1-1_all.deb
-RUN apt-get update
-RUN apt-get install -y --no-install-recommends \
-    build-essential \
-    python3-dev \
-    libglib2.0-0 \
-    libgl1-mesa-glx \
-    libpq-dev \
-    make \
-    cuda-toolkit-12-2
-# Set CUDA environment variables
-ENV CUDA_HOME=/usr/local/cuda
-ENV PATH=${CUDA_HOME}/bin:${PATH}
-ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
 # insall latest vllm
-RUN pip install uv
-RUN uv venv --python 3.11 --python-preference managed
-RUN . .venv/bin/activate
-RUN uv pip install setuptools
-RUN uv pip install vllm==0.7.2
 # Install transformers from source, need specific revision for Qwen2.5 as
 # breaking changes in master
-RUN uv pip install --upgrade git+https://github.com/huggingface/transformers.git@336dc69d63d56f232a183a3e7f52790429b871ef
-RUN uv pip install ray[adag]
-RUN uv pip install cupy-cuda12x
-ENTRYPOINT ["/bin/bash", "-c", ". .venv/bin/activate && export VLLM_USE_V1=1 && python -m vllm.entrypoints.openai.api_server --model convergence-ai/proxy-lite --trust-remote-code --tokenizer-pool-size 20 --limit-mm-per-prompt image=1 --enable-auto-tool-choice --tool-call-parser hermes --port 7860"]

     libglib2.0-0 \
     libgl1-mesa-glx \
     libpq-dev \
+    make
 # insall latest vllm
+RUN pip install vllm==0.7.2
 # Install transformers from source, need specific revision for Qwen2.5 as
 # breaking changes in master
+RUN pip install --upgrade git+https://github.com/huggingface/transformers.git@336dc69d63d56f232a183a3e7f52790429b871ef
+ENTRYPOINT ["sh", "-c", "python -m vllm.entrypoints.openai.api_server --model convergence-ai/proxy-lite --trust-remote-code --tokenizer-pool-size 10 --limit-mm-per-prompt image=2 --enable-auto-tool-choice --tool-call-parser hermes --port 7860"]