Spaces:

fedirz
/

faster-whisper-server

Configuration error

App Files Files Community

Fedir Zadniprovskyi commited on Dec 22, 2024

Commit

8fc9285

1 Parent(s): 4005fd1

feat: reorganize docker files

Browse files

Files changed (10) hide show

.github/workflows/docker-build-and-push.yaml +7 -4
Dockerfile.cuda → Dockerfile +17 -11
Dockerfile.cpu +0 -22
README.md +17 -9
Taskfile.yaml +0 -12
compose.cpu.yaml +17 -0
compose.cuda-cdi.yaml +24 -0
compose.cuda.yaml +22 -0
observability-compose.yaml → compose.observability.yaml +0 -0
compose.yaml +3 -37

.github/workflows/docker-build-and-push.yaml CHANGED Viewed

@@ -13,11 +13,12 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
         include:
-          - dockerfile: Dockerfile.cuda
             tag-suffix: -cuda
-          - dockerfile: Dockerfile.cpu
             tag-suffix: -cpu
     steps:
       - uses: actions/checkout@v4
@@ -45,7 +46,9 @@ jobs:
         uses: docker/build-push-action@v6
         with:
           context: .
-          file: ${{ matrix.dockerfile }}
           push: true
           platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}

     runs-on: ubuntu-latest
     strategy:
       matrix:
+        # https://hub.docker.com/r/nvidia/cuda/tags
+        base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04']
         include:
+          - base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
             tag-suffix: -cuda
+          - base-image: ubuntu:24.04
             tag-suffix: -cpu
     steps:
       - uses: actions/checkout@v4
         uses: docker/build-push-action@v6
         with:
           context: .
+          file: Dockerfile
+          build-args: |
+            BASE_IMAGE=${{ matrix.base-image }}
           push: true
           platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}

Dockerfile.cuda → Dockerfile RENAMED Viewed

@@ -1,22 +1,28 @@
-FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
 LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
 # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
 # hadolint ignore=DL3008
 RUN apt-get update && \
-  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists/*
-COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
-WORKDIR /root/faster-whisper-server
 # https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
 RUN --mount=type=cache,target=/root/.cache/uv \
-  --mount=type=bind,source=uv.lock,target=uv.lock \
-  --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
-  uv sync --frozen --no-install-project
-COPY ./src ./pyproject.toml ./uv.lock ./
 RUN --mount=type=cache,target=/root/.cache/uv \
-  uv sync --frozen --extra ui --extra opentelemetry
 ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
 ENV UVICORN_HOST=0.0.0.0
 ENV UVICORN_PORT=8000
 CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]

+ARG BASE_IMAGE=nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
+FROM ${BASE_IMAGE}
 LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
 # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
 # hadolint ignore=DL3008
 RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+USER ubuntu
+ENV HOME=/home/ubuntu \
+    PATH=/home/ubuntu/.local/bin:$PATH
+WORKDIR $HOME/faster-whisper-server
+COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
 # https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
+# https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode
 RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --compile-bytecode --no-install-project
+COPY --chown=ubuntu ./src ./pyproject.toml ./uv.lock ./
 RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --compile-bytecode --extra ui --extra opentelemetry
 ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
 ENV UVICORN_HOST=0.0.0.0
 ENV UVICORN_PORT=8000
+EXPOSE 8000
 CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]

Dockerfile.cpu DELETED Viewed

@@ -1,22 +0,0 @@
-FROM ubuntu:24.04
-LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
-# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
-# hadolint ignore=DL3008
-RUN apt-get update && \
-  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists/*
-COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
-WORKDIR /root/faster-whisper-server
-# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
-RUN --mount=type=cache,target=/root/.cache/uv \
-  --mount=type=bind,source=uv.lock,target=uv.lock \
-  --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
-  uv sync --frozen --no-install-project
-COPY ./src ./pyproject.toml ./uv.lock ./
-RUN --mount=type=cache,target=/root/.cache/uv \
-  uv sync --frozen --extra ui
-ENV WHISPER__MODEL=Systran/faster-whisper-small
-ENV UVICORN_HOST=0.0.0.0
-ENV UVICORN_PORT=8000
-CMD ["uv", "run", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]

README.md CHANGED Viewed

@@ -25,18 +25,26 @@ See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio)
 ![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
-Using Docker
 ```bash
-docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cuda
-# or
-docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu
 ```
-Using Docker Compose
 ```bash
-curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
-docker compose up --detach faster-whisper-server-cuda
-# or
-docker compose up --detach faster-whisper-server-cpu
 ```
 Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)

 ![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
+Using Docker Compose (Recommended)
+NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
 ```bash
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
+# for GPU support
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
+docker compose --file compose.cuda.yaml up --detach
+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
+docker compose --file compose.cpu.yaml up --detach
 ```
+Using Docker
 ```bash
+# for GPU support
+docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
+docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
 ```
 Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)

Taskfile.yaml CHANGED Viewed

@@ -11,19 +11,7 @@ tasks:
       - pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
     sources:
       - src/**/*.py
-  build:
-    cmds:
-      - docker compose build
-    sources:
-      - Dockerfile.*
-      - src/**/*.py
   create-multi-arch-builder: docker buildx create --name main --driver=docker-container
-  docker-build:
-    cmds:
-      - docker compose build --builder main {{.CLI_ARGS}}
-    sources:
-      - Dockerfile.*
-      - src/faster_whisper_server/*.py
   cii:
     cmds:
       - act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}

       - pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
     sources:
       - src/**/*.py
   create-multi-arch-builder: docker buildx create --name main --driver=docker-container
   cii:
     cmds:
       - act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}

compose.cpu.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+# include:
+#   - compose.observability.yaml
+services:
+  faster-whisper-server:
+    extends:
+      file: compose.yaml
+      service: faster-whisper-server
+    image: fedirz/faster-whisper-server:latest-cpu
+    build:
+      args:
+        BASE_IMAGE: ubuntu:24.04
+    environment:
+      - WHISPER__MODEL=Systran/faster-whisper-small
+    volumes:
+      - hugging_face_cache:/root/.cache/huggingface
+volumes:
+  hugging_face_cache:

compose.cuda-cdi.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+# include:
+#   - compose.observability.yaml
+# This file is for those who have the CDI Docker feature enabled
+# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
+# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
+services:
+  faster-whisper-server:
+    extends:
+      file: compose.cuda.yaml
+      service: faster-whisper-server
+    volumes:
+      - hugging_face_cache:/root/.cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          # WARN: requires Docker Compose 2.24.2
+          # https://docs.docker.com/reference/compose-file/merge/#replace-value
+          devices: !override
+            - capabilities: ["gpu"]
+              driver: cdi
+              device_ids:
+                - nvidia.com/gpu=all
+volumes:
+  hugging_face_cache:

compose.cuda.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# include:
+#   - compose.observability.yaml
+services:
+  faster-whisper-server:
+    extends:
+      file: compose.yaml
+      service: faster-whisper-server
+    image: fedirz/faster-whisper-server:latest-cuda
+    build:
+      args:
+        BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
+    environment:
+      - WHISPER__MODEL=Systran/faster-whisper-large-v3
+    volumes:
+      - hugging_face_cache:/root/.cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: ["gpu"]
+volumes:
+  hugging_face_cache:

observability-compose.yaml → compose.observability.yaml RENAMED Viewed

File without changes

compose.yaml CHANGED Viewed

@@ -1,11 +1,9 @@
 # TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
-include:
-  - observability-compose.yaml
 services:
-  faster-whisper-server-cuda:
-    image: fedirz/faster-whisper-server:latest-cuda
     build:
-      dockerfile: Dockerfile.cuda
       context: .
       platforms:
         - linux/amd64
@@ -13,39 +11,7 @@ services:
     restart: unless-stopped
     ports:
       - 8000:8000
-    volumes:
-      - hugging_face_cache:/root/.cache/huggingface
     develop:
       watch:
         - path: faster_whisper_server
           action: rebuild
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - capabilities: ["gpu"]
-              # If you have CDI feature enabled use the following instead
-              # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
-              # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
-              # - driver: cdi
-              #   device_ids:
-              #   - nvidia.com/gpu=all
-  faster-whisper-server-cpu:
-    image: fedirz/faster-whisper-server:latest-cpu
-    build:
-      dockerfile: Dockerfile.cpu
-      context: .
-      platforms:
-        - linux/amd64
-        - linux/arm64
-    restart: unless-stopped
-    ports:
-      - 8000:8000
-    volumes:
-      - hugging_face_cache:/root/.cache/huggingface
-    develop:
-      watch:
-        - path: faster_whisper_server
-          action: rebuild
-volumes:
-  hugging_face_cache:

 # TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
 services:
+  faster-whisper-server:
+    container_name: faster-whisper-server
     build:
+      dockerfile: Dockerfile
       context: .
       platforms:
         - linux/amd64
     restart: unless-stopped
     ports:
       - 8000:8000
     develop:
       watch:
         - path: faster_whisper_server
           action: rebuild