tezuesh commited on
Commit
81a9e0f
·
verified ·
1 Parent(s): 18eda2e

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +8 -18
server.py CHANGED
@@ -8,20 +8,17 @@ import logging
8
  from pathlib import Path
9
  from inference import InferenceRecipe
10
  from fastapi.middleware.cors import CORSMiddleware
 
11
 
12
- # Add these imports and configurations at the top
13
- import torch._inductor
14
- import torch._dynamo
15
-
16
- # Configure Inductor/Triton cache and fallback behavior
17
- os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
18
- os.environ["TORCH_INDUCTOR_CACHE_DIR"] = "/tmp/torch_cache"
19
- torch._inductor.config.suppress_errors = True
20
  torch._dynamo.config.suppress_errors = True
21
 
22
- # Create cache directories with correct permissions
23
- os.makedirs("/tmp/triton_cache", exist_ok=True)
24
- os.makedirs("/tmp/torch_cache", exist_ok=True)
 
 
 
25
 
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger(__name__)
@@ -61,14 +58,12 @@ def initialize_model():
61
  device = "cuda" if torch.cuda.is_available() else "cpu"
62
  logger.info(f"Initializing model on device: {device}")
63
 
64
- # Critical: Use absolute path for model loading
65
  model_path = os.path.abspath(os.path.join('/app/src', 'models'))
66
  logger.info(f"Loading models from: {model_path}")
67
 
68
  if not os.path.exists(model_path):
69
  raise RuntimeError(f"Model path {model_path} does not exist")
70
 
71
- # Log available model files for debugging
72
  model_files = os.listdir(model_path)
73
  logger.info(f"Available model files: {model_files}")
74
 
@@ -115,23 +110,18 @@ async def inference(request: AudioRequest) -> AudioResponse:
115
  )
116
 
117
  try:
118
- # Log input validation
119
  logger.info(f"Received inference request with sample rate: {request.sample_rate}")
120
 
121
- # Decode audio
122
  audio_bytes = base64.b64decode(request.audio_data)
123
  audio_array = np.load(io.BytesIO(audio_bytes))
124
  logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
125
 
126
- # Validate input format
127
  if len(audio_array.shape) != 2:
128
  raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
129
 
130
- # Run inference
131
  result = model.inference(audio_array, request.sample_rate)
132
  logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
133
 
134
- # Encode output
135
  buffer = io.BytesIO()
136
  np.save(buffer, result['audio'])
137
  audio_b64 = base64.b64encode(buffer.getvalue()).decode()
 
8
  from pathlib import Path
9
  from inference import InferenceRecipe
10
  from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel
12
 
13
+ # Configure PyTorch behavior - only use supported configs
 
 
 
 
 
 
 
14
  torch._dynamo.config.suppress_errors = True
15
 
16
+ # Disable optimizations via environment variables
17
+ os.environ["TORCH_LOGS"] = "+dynamo"
18
+ os.environ["TORCHDYNAMO_VERBOSE"] = "1"
19
+ os.environ["TORCH_COMPILE_DEBUG"] = "1"
20
+ os.environ["TORCHINDUCTOR_DISABLE_CUDAGRAPHS"] = "1"
21
+ os.environ["TORCH_COMPILE"] = "0" # Disable torch.compile
22
 
23
  logging.basicConfig(level=logging.INFO)
24
  logger = logging.getLogger(__name__)
 
58
  device = "cuda" if torch.cuda.is_available() else "cpu"
59
  logger.info(f"Initializing model on device: {device}")
60
 
 
61
  model_path = os.path.abspath(os.path.join('/app/src', 'models'))
62
  logger.info(f"Loading models from: {model_path}")
63
 
64
  if not os.path.exists(model_path):
65
  raise RuntimeError(f"Model path {model_path} does not exist")
66
 
 
67
  model_files = os.listdir(model_path)
68
  logger.info(f"Available model files: {model_files}")
69
 
 
110
  )
111
 
112
  try:
 
113
  logger.info(f"Received inference request with sample rate: {request.sample_rate}")
114
 
 
115
  audio_bytes = base64.b64decode(request.audio_data)
116
  audio_array = np.load(io.BytesIO(audio_bytes))
117
  logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
118
 
 
119
  if len(audio_array.shape) != 2:
120
  raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
121
 
 
122
  result = model.inference(audio_array, request.sample_rate)
123
  logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
124
 
 
125
  buffer = io.BytesIO()
126
  np.save(buffer, result['audio'])
127
  audio_b64 = base64.b64encode(buffer.getvalue()).decode()