tezuesh
/

moshi_general

Model card Files Files and versions Community

tezuesh commited on Jan 27

Commit

18eda2e

·

verified ·

1 Parent(s): 3a00ea9

Update server.py

Files changed (1) hide show

server.py +14 -31

server.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from fastapi import FastAPI, HTTPException
 import numpy as np
 import torch
-from pydantic import BaseModel
 import base64
 import io
 import os
@@ -10,6 +9,20 @@ from pathlib import Path
 from inference import InferenceRecipe
 from fastapi.middleware.cors import CORSMiddleware
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -41,7 +54,6 @@ INITIALIZATION_STATUS = {
 # Global model instance
 model = None
 def initialize_model():
     """Initialize the model with correct path resolution"""
     global model, INITIALIZATION_STATUS
@@ -93,35 +105,6 @@ def health_check():
     return status
-# @app.post("/api/v1/inference")
-# async def inference(request: AudioRequest) -> AudioResponse:
-#     """Run inference on audio input"""
-#     if not INITIALIZATION_STATUS["model_loaded"]:
-#         raise HTTPException(
-#             status_code=503,
-#             detail=f"Model not ready. Status: {INITIALIZATION_STATUS}"
-#         )
-#     try:
-#         # Decode audio from base64
-#         audio_bytes = base64.b64decode(request.audio_data)
-#         audio_array = np.load(io.BytesIO(audio_bytes))
-#         # Run inference
-#         result = model.inference(audio_array, request.sample_rate)
-#         # Encode output audio
-#         buffer = io.BytesIO()
-#         np.save(buffer, result['audio'])
-#         audio_b64 = base64.b64encode(buffer.getvalue()).decode()
-#         return AudioResponse(
-#             audio_data=audio_b64,
-#             text=result.get("text", "")
-#         )
-#     except Exception as e:
-#         logger.error(f"Inference failed: {str(e)}")
-#         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/v1/inference")
 async def inference(request: AudioRequest) -> AudioResponse:
     """Run inference with enhanced error handling and logging"""

 from fastapi import FastAPI, HTTPException
 import numpy as np
 import torch
 import base64
 import io
 import os
 from inference import InferenceRecipe
 from fastapi.middleware.cors import CORSMiddleware
+# Add these imports and configurations at the top
+import torch._inductor
+import torch._dynamo
+# Configure Inductor/Triton cache and fallback behavior
+os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
+os.environ["TORCH_INDUCTOR_CACHE_DIR"] = "/tmp/torch_cache"
+torch._inductor.config.suppress_errors = True
+torch._dynamo.config.suppress_errors = True
+# Create cache directories with correct permissions
+os.makedirs("/tmp/triton_cache", exist_ok=True)
+os.makedirs("/tmp/torch_cache", exist_ok=True)
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Global model instance
 model = None
 def initialize_model():
     """Initialize the model with correct path resolution"""
     global model, INITIALIZATION_STATUS
     return status
 @app.post("/api/v1/inference")
 async def inference(request: AudioRequest) -> AudioResponse:
     """Run inference with enhanced error handling and logging"""