Update server.py
Browse files
server.py
CHANGED
@@ -8,20 +8,17 @@ import logging
|
|
8 |
from pathlib import Path
|
9 |
from inference import InferenceRecipe
|
10 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
11 |
|
12 |
-
#
|
13 |
-
import torch._inductor
|
14 |
-
import torch._dynamo
|
15 |
-
|
16 |
-
# Configure Inductor/Triton cache and fallback behavior
|
17 |
-
os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
|
18 |
-
os.environ["TORCH_INDUCTOR_CACHE_DIR"] = "/tmp/torch_cache"
|
19 |
-
torch._inductor.config.suppress_errors = True
|
20 |
torch._dynamo.config.suppress_errors = True
|
21 |
|
22 |
-
#
|
23 |
-
os.
|
24 |
-
os.
|
|
|
|
|
|
|
25 |
|
26 |
logging.basicConfig(level=logging.INFO)
|
27 |
logger = logging.getLogger(__name__)
|
@@ -61,14 +58,12 @@ def initialize_model():
|
|
61 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
62 |
logger.info(f"Initializing model on device: {device}")
|
63 |
|
64 |
-
# Critical: Use absolute path for model loading
|
65 |
model_path = os.path.abspath(os.path.join('/app/src', 'models'))
|
66 |
logger.info(f"Loading models from: {model_path}")
|
67 |
|
68 |
if not os.path.exists(model_path):
|
69 |
raise RuntimeError(f"Model path {model_path} does not exist")
|
70 |
|
71 |
-
# Log available model files for debugging
|
72 |
model_files = os.listdir(model_path)
|
73 |
logger.info(f"Available model files: {model_files}")
|
74 |
|
@@ -115,23 +110,18 @@ async def inference(request: AudioRequest) -> AudioResponse:
|
|
115 |
)
|
116 |
|
117 |
try:
|
118 |
-
# Log input validation
|
119 |
logger.info(f"Received inference request with sample rate: {request.sample_rate}")
|
120 |
|
121 |
-
# Decode audio
|
122 |
audio_bytes = base64.b64decode(request.audio_data)
|
123 |
audio_array = np.load(io.BytesIO(audio_bytes))
|
124 |
logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
|
125 |
|
126 |
-
# Validate input format
|
127 |
if len(audio_array.shape) != 2:
|
128 |
raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
|
129 |
|
130 |
-
# Run inference
|
131 |
result = model.inference(audio_array, request.sample_rate)
|
132 |
logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
|
133 |
|
134 |
-
# Encode output
|
135 |
buffer = io.BytesIO()
|
136 |
np.save(buffer, result['audio'])
|
137 |
audio_b64 = base64.b64encode(buffer.getvalue()).decode()
|
|
|
8 |
from pathlib import Path
|
9 |
from inference import InferenceRecipe
|
10 |
from fastapi.middleware.cors import CORSMiddleware
|
11 |
+
from pydantic import BaseModel
|
12 |
|
13 |
+
# Configure PyTorch behavior - only use supported configs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
torch._dynamo.config.suppress_errors = True
|
15 |
|
16 |
+
# Disable optimizations via environment variables
|
17 |
+
os.environ["TORCH_LOGS"] = "+dynamo"
|
18 |
+
os.environ["TORCHDYNAMO_VERBOSE"] = "1"
|
19 |
+
os.environ["TORCH_COMPILE_DEBUG"] = "1"
|
20 |
+
os.environ["TORCHINDUCTOR_DISABLE_CUDAGRAPHS"] = "1"
|
21 |
+
os.environ["TORCH_COMPILE"] = "0" # Disable torch.compile
|
22 |
|
23 |
logging.basicConfig(level=logging.INFO)
|
24 |
logger = logging.getLogger(__name__)
|
|
|
58 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
59 |
logger.info(f"Initializing model on device: {device}")
|
60 |
|
|
|
61 |
model_path = os.path.abspath(os.path.join('/app/src', 'models'))
|
62 |
logger.info(f"Loading models from: {model_path}")
|
63 |
|
64 |
if not os.path.exists(model_path):
|
65 |
raise RuntimeError(f"Model path {model_path} does not exist")
|
66 |
|
|
|
67 |
model_files = os.listdir(model_path)
|
68 |
logger.info(f"Available model files: {model_files}")
|
69 |
|
|
|
110 |
)
|
111 |
|
112 |
try:
|
|
|
113 |
logger.info(f"Received inference request with sample rate: {request.sample_rate}")
|
114 |
|
|
|
115 |
audio_bytes = base64.b64decode(request.audio_data)
|
116 |
audio_array = np.load(io.BytesIO(audio_bytes))
|
117 |
logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
|
118 |
|
|
|
119 |
if len(audio_array.shape) != 2:
|
120 |
raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
|
121 |
|
|
|
122 |
result = model.inference(audio_array, request.sample_rate)
|
123 |
logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
|
124 |
|
|
|
125 |
buffer = io.BytesIO()
|
126 |
np.save(buffer, result['audio'])
|
127 |
audio_b64 = base64.b64encode(buffer.getvalue()).decode()
|