Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
8fc9285
1
Parent(s):
4005fd1
feat: reorganize docker files
Browse files- .github/workflows/docker-build-and-push.yaml +7 -4
- Dockerfile.cuda → Dockerfile +17 -11
- Dockerfile.cpu +0 -22
- README.md +17 -9
- Taskfile.yaml +0 -12
- compose.cpu.yaml +17 -0
- compose.cuda-cdi.yaml +24 -0
- compose.cuda.yaml +22 -0
- observability-compose.yaml → compose.observability.yaml +0 -0
- compose.yaml +3 -37
.github/workflows/docker-build-and-push.yaml
CHANGED
|
@@ -13,11 +13,12 @@ jobs:
|
|
| 13 |
runs-on: ubuntu-latest
|
| 14 |
strategy:
|
| 15 |
matrix:
|
| 16 |
-
|
|
|
|
| 17 |
include:
|
| 18 |
-
-
|
| 19 |
tag-suffix: -cuda
|
| 20 |
-
-
|
| 21 |
tag-suffix: -cpu
|
| 22 |
steps:
|
| 23 |
- uses: actions/checkout@v4
|
|
@@ -45,7 +46,9 @@ jobs:
|
|
| 45 |
uses: docker/build-push-action@v6
|
| 46 |
with:
|
| 47 |
context: .
|
| 48 |
-
file:
|
|
|
|
|
|
|
| 49 |
push: true
|
| 50 |
platforms: linux/amd64,linux/arm64
|
| 51 |
tags: ${{ steps.meta.outputs.tags }}
|
|
|
|
| 13 |
runs-on: ubuntu-latest
|
| 14 |
strategy:
|
| 15 |
matrix:
|
| 16 |
+
# https://hub.docker.com/r/nvidia/cuda/tags
|
| 17 |
+
base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04']
|
| 18 |
include:
|
| 19 |
+
- base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
|
| 20 |
tag-suffix: -cuda
|
| 21 |
+
- base-image: ubuntu:24.04
|
| 22 |
tag-suffix: -cpu
|
| 23 |
steps:
|
| 24 |
- uses: actions/checkout@v4
|
|
|
|
| 46 |
uses: docker/build-push-action@v6
|
| 47 |
with:
|
| 48 |
context: .
|
| 49 |
+
file: Dockerfile
|
| 50 |
+
build-args: |
|
| 51 |
+
BASE_IMAGE=${{ matrix.base-image }}
|
| 52 |
push: true
|
| 53 |
platforms: linux/amd64,linux/arm64
|
| 54 |
tags: ${{ steps.meta.outputs.tags }}
|
Dockerfile.cuda → Dockerfile
RENAMED
|
@@ -1,22 +1,28 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
|
| 3 |
# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
|
| 4 |
# hadolint ignore=DL3008
|
| 5 |
RUN apt-get update && \
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
|
|
|
|
| 12 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
COPY ./src ./pyproject.toml ./uv.lock ./
|
| 17 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 18 |
-
|
| 19 |
ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
|
| 20 |
ENV UVICORN_HOST=0.0.0.0
|
| 21 |
ENV UVICORN_PORT=8000
|
|
|
|
| 22 |
CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
|
|
|
|
| 1 |
+
ARG BASE_IMAGE=nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
|
| 2 |
+
FROM ${BASE_IMAGE}
|
| 3 |
LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
|
| 4 |
# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
|
| 5 |
# hadolint ignore=DL3008
|
| 6 |
RUN apt-get update && \
|
| 7 |
+
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
|
| 8 |
+
apt-get clean && \
|
| 9 |
+
rm -rf /var/lib/apt/lists/*
|
| 10 |
+
USER ubuntu
|
| 11 |
+
ENV HOME=/home/ubuntu \
|
| 12 |
+
PATH=/home/ubuntu/.local/bin:$PATH
|
| 13 |
+
WORKDIR $HOME/faster-whisper-server
|
| 14 |
+
COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
|
| 15 |
# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
|
| 16 |
+
# https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode
|
| 17 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 18 |
+
--mount=type=bind,source=uv.lock,target=uv.lock \
|
| 19 |
+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
| 20 |
+
uv sync --frozen --compile-bytecode --no-install-project
|
| 21 |
+
COPY --chown=ubuntu ./src ./pyproject.toml ./uv.lock ./
|
| 22 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 23 |
+
uv sync --frozen --compile-bytecode --extra ui --extra opentelemetry
|
| 24 |
ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
|
| 25 |
ENV UVICORN_HOST=0.0.0.0
|
| 26 |
ENV UVICORN_PORT=8000
|
| 27 |
+
EXPOSE 8000
|
| 28 |
CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
|
Dockerfile.cpu
DELETED
|
@@ -1,22 +0,0 @@
|
|
| 1 |
-
FROM ubuntu:24.04
|
| 2 |
-
LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
|
| 3 |
-
# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
|
| 4 |
-
# hadolint ignore=DL3008
|
| 5 |
-
RUN apt-get update && \
|
| 6 |
-
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
|
| 7 |
-
apt-get clean && \
|
| 8 |
-
rm -rf /var/lib/apt/lists/*
|
| 9 |
-
COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
|
| 10 |
-
WORKDIR /root/faster-whisper-server
|
| 11 |
-
# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
|
| 12 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 13 |
-
--mount=type=bind,source=uv.lock,target=uv.lock \
|
| 14 |
-
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
| 15 |
-
uv sync --frozen --no-install-project
|
| 16 |
-
COPY ./src ./pyproject.toml ./uv.lock ./
|
| 17 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 18 |
-
uv sync --frozen --extra ui
|
| 19 |
-
ENV WHISPER__MODEL=Systran/faster-whisper-small
|
| 20 |
-
ENV UVICORN_HOST=0.0.0.0
|
| 21 |
-
ENV UVICORN_PORT=8000
|
| 22 |
-
CMD ["uv", "run", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -25,18 +25,26 @@ See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio)
|
|
| 25 |
|
| 26 |

|
| 27 |
|
| 28 |
-
Using Docker
|
|
|
|
|
|
|
| 29 |
```bash
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
```
|
| 34 |
-
|
|
|
|
| 35 |
```bash
|
| 36 |
-
|
| 37 |
-
docker
|
| 38 |
-
#
|
| 39 |
-
docker
|
| 40 |
```
|
| 41 |
|
| 42 |
Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
|
|
|
|
| 25 |
|
| 26 |

|
| 27 |
|
| 28 |
+
Using Docker Compose (Recommended)
|
| 29 |
+
NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
|
| 30 |
+
|
| 31 |
```bash
|
| 32 |
+
curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
|
| 33 |
+
|
| 34 |
+
# for GPU support
|
| 35 |
+
curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
|
| 36 |
+
docker compose --file compose.cuda.yaml up --detach
|
| 37 |
+
# for CPU only (use this if you don't have a GPU, as the image is much smaller)
|
| 38 |
+
curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
|
| 39 |
+
docker compose --file compose.cpu.yaml up --detach
|
| 40 |
```
|
| 41 |
+
|
| 42 |
+
Using Docker
|
| 43 |
```bash
|
| 44 |
+
# for GPU support
|
| 45 |
+
docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
|
| 46 |
+
# for CPU only (use this if you don't have a GPU, as the image is much smaller)
|
| 47 |
+
docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
|
| 48 |
```
|
| 49 |
|
| 50 |
Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
|
Taskfile.yaml
CHANGED
|
@@ -11,19 +11,7 @@ tasks:
|
|
| 11 |
- pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
|
| 12 |
sources:
|
| 13 |
- src/**/*.py
|
| 14 |
-
build:
|
| 15 |
-
cmds:
|
| 16 |
-
- docker compose build
|
| 17 |
-
sources:
|
| 18 |
-
- Dockerfile.*
|
| 19 |
-
- src/**/*.py
|
| 20 |
create-multi-arch-builder: docker buildx create --name main --driver=docker-container
|
| 21 |
-
docker-build:
|
| 22 |
-
cmds:
|
| 23 |
-
- docker compose build --builder main {{.CLI_ARGS}}
|
| 24 |
-
sources:
|
| 25 |
-
- Dockerfile.*
|
| 26 |
-
- src/faster_whisper_server/*.py
|
| 27 |
cii:
|
| 28 |
cmds:
|
| 29 |
- act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}
|
|
|
|
| 11 |
- pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
|
| 12 |
sources:
|
| 13 |
- src/**/*.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
create-multi-arch-builder: docker buildx create --name main --driver=docker-container
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
cii:
|
| 16 |
cmds:
|
| 17 |
- act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}
|
compose.cpu.yaml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# include:
|
| 2 |
+
# - compose.observability.yaml
|
| 3 |
+
services:
|
| 4 |
+
faster-whisper-server:
|
| 5 |
+
extends:
|
| 6 |
+
file: compose.yaml
|
| 7 |
+
service: faster-whisper-server
|
| 8 |
+
image: fedirz/faster-whisper-server:latest-cpu
|
| 9 |
+
build:
|
| 10 |
+
args:
|
| 11 |
+
BASE_IMAGE: ubuntu:24.04
|
| 12 |
+
environment:
|
| 13 |
+
- WHISPER__MODEL=Systran/faster-whisper-small
|
| 14 |
+
volumes:
|
| 15 |
+
- hugging_face_cache:/root/.cache/huggingface
|
| 16 |
+
volumes:
|
| 17 |
+
hugging_face_cache:
|
compose.cuda-cdi.yaml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# include:
|
| 2 |
+
# - compose.observability.yaml
|
| 3 |
+
# This file is for those who have the CDI Docker feature enabled
|
| 4 |
+
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
|
| 5 |
+
# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
|
| 6 |
+
services:
|
| 7 |
+
faster-whisper-server:
|
| 8 |
+
extends:
|
| 9 |
+
file: compose.cuda.yaml
|
| 10 |
+
service: faster-whisper-server
|
| 11 |
+
volumes:
|
| 12 |
+
- hugging_face_cache:/root/.cache/huggingface
|
| 13 |
+
deploy:
|
| 14 |
+
resources:
|
| 15 |
+
reservations:
|
| 16 |
+
# WARN: requires Docker Compose 2.24.2
|
| 17 |
+
# https://docs.docker.com/reference/compose-file/merge/#replace-value
|
| 18 |
+
devices: !override
|
| 19 |
+
- capabilities: ["gpu"]
|
| 20 |
+
driver: cdi
|
| 21 |
+
device_ids:
|
| 22 |
+
- nvidia.com/gpu=all
|
| 23 |
+
volumes:
|
| 24 |
+
hugging_face_cache:
|
compose.cuda.yaml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# include:
|
| 2 |
+
# - compose.observability.yaml
|
| 3 |
+
services:
|
| 4 |
+
faster-whisper-server:
|
| 5 |
+
extends:
|
| 6 |
+
file: compose.yaml
|
| 7 |
+
service: faster-whisper-server
|
| 8 |
+
image: fedirz/faster-whisper-server:latest-cuda
|
| 9 |
+
build:
|
| 10 |
+
args:
|
| 11 |
+
BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
|
| 12 |
+
environment:
|
| 13 |
+
- WHISPER__MODEL=Systran/faster-whisper-large-v3
|
| 14 |
+
volumes:
|
| 15 |
+
- hugging_face_cache:/root/.cache/huggingface
|
| 16 |
+
deploy:
|
| 17 |
+
resources:
|
| 18 |
+
reservations:
|
| 19 |
+
devices:
|
| 20 |
+
- capabilities: ["gpu"]
|
| 21 |
+
volumes:
|
| 22 |
+
hugging_face_cache:
|
observability-compose.yaml → compose.observability.yaml
RENAMED
|
File without changes
|
compose.yaml
CHANGED
|
@@ -1,11 +1,9 @@
|
|
| 1 |
# TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
|
| 2 |
-
include:
|
| 3 |
-
- observability-compose.yaml
|
| 4 |
services:
|
| 5 |
-
faster-whisper-server
|
| 6 |
-
|
| 7 |
build:
|
| 8 |
-
dockerfile: Dockerfile
|
| 9 |
context: .
|
| 10 |
platforms:
|
| 11 |
- linux/amd64
|
|
@@ -13,39 +11,7 @@ services:
|
|
| 13 |
restart: unless-stopped
|
| 14 |
ports:
|
| 15 |
- 8000:8000
|
| 16 |
-
volumes:
|
| 17 |
-
- hugging_face_cache:/root/.cache/huggingface
|
| 18 |
develop:
|
| 19 |
watch:
|
| 20 |
- path: faster_whisper_server
|
| 21 |
action: rebuild
|
| 22 |
-
deploy:
|
| 23 |
-
resources:
|
| 24 |
-
reservations:
|
| 25 |
-
devices:
|
| 26 |
-
- capabilities: ["gpu"]
|
| 27 |
-
# If you have CDI feature enabled use the following instead
|
| 28 |
-
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
|
| 29 |
-
# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
|
| 30 |
-
# - driver: cdi
|
| 31 |
-
# device_ids:
|
| 32 |
-
# - nvidia.com/gpu=all
|
| 33 |
-
faster-whisper-server-cpu:
|
| 34 |
-
image: fedirz/faster-whisper-server:latest-cpu
|
| 35 |
-
build:
|
| 36 |
-
dockerfile: Dockerfile.cpu
|
| 37 |
-
context: .
|
| 38 |
-
platforms:
|
| 39 |
-
- linux/amd64
|
| 40 |
-
- linux/arm64
|
| 41 |
-
restart: unless-stopped
|
| 42 |
-
ports:
|
| 43 |
-
- 8000:8000
|
| 44 |
-
volumes:
|
| 45 |
-
- hugging_face_cache:/root/.cache/huggingface
|
| 46 |
-
develop:
|
| 47 |
-
watch:
|
| 48 |
-
- path: faster_whisper_server
|
| 49 |
-
action: rebuild
|
| 50 |
-
volumes:
|
| 51 |
-
hugging_face_cache:
|
|
|
|
| 1 |
# TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
|
|
|
|
|
|
|
| 2 |
services:
|
| 3 |
+
faster-whisper-server:
|
| 4 |
+
container_name: faster-whisper-server
|
| 5 |
build:
|
| 6 |
+
dockerfile: Dockerfile
|
| 7 |
context: .
|
| 8 |
platforms:
|
| 9 |
- linux/amd64
|
|
|
|
| 11 |
restart: unless-stopped
|
| 12 |
ports:
|
| 13 |
- 8000:8000
|
|
|
|
|
|
|
| 14 |
develop:
|
| 15 |
watch:
|
| 16 |
- path: faster_whisper_server
|
| 17 |
action: rebuild
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|