Upload folder using huggingface_hub
Browse files- docker-compose.yml +47 -0
- inference.py +3 -2
docker-compose.yml
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
services:
|
2 |
+
miner_test_model:
|
3 |
+
build:
|
4 |
+
# context: /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general
|
5 |
+
dockerfile: Dockerfile
|
6 |
+
container_name: miner_test_model
|
7 |
+
deploy:
|
8 |
+
resources:
|
9 |
+
limits:
|
10 |
+
cpus: '4.0'
|
11 |
+
memory: 8G
|
12 |
+
reservations:
|
13 |
+
devices:
|
14 |
+
- capabilities:
|
15 |
+
- gpu
|
16 |
+
- utility
|
17 |
+
- compute
|
18 |
+
count: all
|
19 |
+
driver: nvidia
|
20 |
+
environment:
|
21 |
+
- NVIDIA_VISIBLE_DEVICES=all
|
22 |
+
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
23 |
+
- PYTHONUNBUFFERED=1
|
24 |
+
- MODEL_ID=tezuesh/moshi_general
|
25 |
+
- REPO_ID=tezuesh/moshi_general
|
26 |
+
- CUDA_VISIBLE_DEVICES=0
|
27 |
+
healthcheck:
|
28 |
+
interval: 10s
|
29 |
+
retries: 3
|
30 |
+
start_period: 20s
|
31 |
+
test:
|
32 |
+
- CMD
|
33 |
+
- curl
|
34 |
+
- -f
|
35 |
+
- http://localhost:8000/api/v1/health
|
36 |
+
timeout: 5s
|
37 |
+
image: miner_test_model:latest
|
38 |
+
ports:
|
39 |
+
- 8000:8000
|
40 |
+
restart: unless-stopped
|
41 |
+
shm_size: 2gb
|
42 |
+
ulimits:
|
43 |
+
memlock: -1
|
44 |
+
stack: 67108864
|
45 |
+
volumes:
|
46 |
+
- /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general:/app/src:ro
|
47 |
+
version: '3'
|
inference.py
CHANGED
@@ -144,7 +144,8 @@ class InferenceRecipe:
|
|
144 |
tokens = self.lm_gen.step(codes[:, :, 0:1])
|
145 |
if tokens is not None:
|
146 |
_ = self.mimi.decode(tokens[:, 1:])
|
147 |
-
|
|
|
148 |
logger.info("Warmup pass completed")
|
149 |
|
150 |
except Exception as e:
|
@@ -198,7 +199,7 @@ class InferenceRecipe:
|
|
198 |
dict: Contains generated audio array and optional transcribed text
|
199 |
"""
|
200 |
try:
|
201 |
-
logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate}Hz")
|
202 |
|
203 |
# Load and preprocess audio
|
204 |
wav = self._load_audio(audio_array, sample_rate)
|
|
|
144 |
tokens = self.lm_gen.step(codes[:, :, 0:1])
|
145 |
if tokens is not None:
|
146 |
_ = self.mimi.decode(tokens[:, 1:])
|
147 |
+
|
148 |
+
torch.cuda.synchronize()
|
149 |
logger.info("Warmup pass completed")
|
150 |
|
151 |
except Exception as e:
|
|
|
199 |
dict: Contains generated audio array and optional transcribed text
|
200 |
"""
|
201 |
try:
|
202 |
+
logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate} Hz, self device: {self.device}")
|
203 |
|
204 |
# Load and preprocess audio
|
205 |
wav = self._load_audio(audio_array, sample_rate)
|