tezuesh commited on
Commit
378e3c8
·
verified ·
1 Parent(s): 353f03e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. docker-compose.yml +47 -0
  2. inference.py +3 -2
docker-compose.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ miner_test_model:
3
+ build:
4
+ # context: /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general
5
+ dockerfile: Dockerfile
6
+ container_name: miner_test_model
7
+ deploy:
8
+ resources:
9
+ limits:
10
+ cpus: '4.0'
11
+ memory: 8G
12
+ reservations:
13
+ devices:
14
+ - capabilities:
15
+ - gpu
16
+ - utility
17
+ - compute
18
+ count: all
19
+ driver: nvidia
20
+ environment:
21
+ - NVIDIA_VISIBLE_DEVICES=all
22
+ - NVIDIA_DRIVER_CAPABILITIES=compute,utility
23
+ - PYTHONUNBUFFERED=1
24
+ - MODEL_ID=tezuesh/moshi_general
25
+ - REPO_ID=tezuesh/moshi_general
26
+ - CUDA_VISIBLE_DEVICES=0
27
+ healthcheck:
28
+ interval: 10s
29
+ retries: 3
30
+ start_period: 20s
31
+ test:
32
+ - CMD
33
+ - curl
34
+ - -f
35
+ - http://localhost:8000/api/v1/health
36
+ timeout: 5s
37
+ image: miner_test_model:latest
38
+ ports:
39
+ - 8000:8000
40
+ restart: unless-stopped
41
+ shm_size: 2gb
42
+ ulimits:
43
+ memlock: -1
44
+ stack: 67108864
45
+ volumes:
46
+ - /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general:/app/src:ro
47
+ version: '3'
inference.py CHANGED
@@ -144,7 +144,8 @@ class InferenceRecipe:
144
  tokens = self.lm_gen.step(codes[:, :, 0:1])
145
  if tokens is not None:
146
  _ = self.mimi.decode(tokens[:, 1:])
147
-
 
148
  logger.info("Warmup pass completed")
149
 
150
  except Exception as e:
@@ -198,7 +199,7 @@ class InferenceRecipe:
198
  dict: Contains generated audio array and optional transcribed text
199
  """
200
  try:
201
- logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate}Hz")
202
 
203
  # Load and preprocess audio
204
  wav = self._load_audio(audio_array, sample_rate)
 
144
  tokens = self.lm_gen.step(codes[:, :, 0:1])
145
  if tokens is not None:
146
  _ = self.mimi.decode(tokens[:, 1:])
147
+
148
+ torch.cuda.synchronize()
149
  logger.info("Warmup pass completed")
150
 
151
  except Exception as e:
 
199
  dict: Contains generated audio array and optional transcribed text
200
  """
201
  try:
202
+ logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate} Hz, self device: {self.device}")
203
 
204
  # Load and preprocess audio
205
  wav = self._load_audio(audio_array, sample_rate)