Spaces:

slabstech
/

dhwani-internal-api-server

Paused

App Files Files Community

sachin commited on 14 days ago

Commit

0fb44d7

1 Parent(s): 7fbf9f0

test

Browse files

Files changed (1) hide show

src/server/main.py +23 -20

src/server/main.py CHANGED Viewed

@@ -14,7 +14,7 @@ from pydantic_settings import BaseSettings
 from slowapi import Limiter
 from slowapi.util import get_remote_address
 import torch
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoProcessor, BitsAndBytesConfig, AutoModel, Gemma3ForConditionalGeneration
 from IndicTransToolkit import IndicProcessor
 import json
 import asyncio
@@ -91,9 +91,10 @@ class LLMManager:
     async def load(self):
         if not self.is_loaded:
             try:
                 self.model = await asyncio.to_thread(
                     Gemma3ForConditionalGeneration.from_pretrained,
-                    self.model_name,
                     device_map="auto",
                     quantization_config=quantization_config,
                     torch_dtype=self.torch_dtype
@@ -101,10 +102,10 @@ class LLMManager:
                 self.model.eval()
                 self.processor = await asyncio.to_thread(
                     AutoProcessor.from_pretrained,
-                    self.model_name
                 )
                 self.is_loaded = True
-                logger.info(f"LLM {self.model_name} loaded asynchronously on {self.device}")
             except Exception as e:
                 logger.error(f"Failed to load LLM: {str(e)}")
                 raise
@@ -268,14 +269,15 @@ class TTSManager:
     async def load(self):
         if not self.model:
-            logger.info("Loading TTS model IndicF5 asynchronously...")
             self.model = await asyncio.to_thread(
                 AutoModel.from_pretrained,
-                self.repo_id,
                 trust_remote_code=True
             )
             self.model = self.model.to(self.device_type)
-            logger.info("TTS model IndicF5 loaded asynchronously")
     def synthesize(self, text, ref_audio_path, ref_text):
         if not self.model:
@@ -362,29 +364,29 @@ class TranslateManager:
     async def load(self):
         if not self.tokenizer or not self.model:
             if self.src_lang.startswith("eng") and not self.tgt_lang.startswith("eng"):
-                model_name = "ai4bharat/indictrans2-en-indic-dist-200M" if self.use_distilled else "ai4bharat/indictrans2-en-indic-1B"
             elif not self.src_lang.startswith("eng") and self.tgt_lang.startswith("eng"):
-                model_name = "ai4bharat/indictrans2-indic-en-dist-200M" if self.use_distilled else "ai4bharat/indictrans2-indic-en-1B"
             elif not self.src_lang.startswith("eng") and not self.tgt_lang.startswith("eng"):
-                model_name = "ai4bharat/indictrans2-indic-indic-dist-320M" if self.use_distilled else "ai4bharat/indictrans2-indic-indic-1B"
             else:
                 raise ValueError("Invalid language combination")
             self.tokenizer = await asyncio.to_thread(
                 AutoTokenizer.from_pretrained,
-                model_name,
                 trust_remote_code=True
             )
             self.model = await asyncio.to_thread(
                 AutoModelForSeq2SeqLM.from_pretrained,
-                model_name,
                 trust_remote_code=True,
                 torch_dtype=torch.float16,
                 attn_implementation="flash_attention_2"
             )
             self.model = self.model.to(self.device_type)
             self.model = torch.compile(self.model, mode="reduce-overhead")
-            logger.info(f"Translation model {model_name} loaded asynchronously")
 class ModelManager:
     def __init__(self, device_type=device, use_distilled=True, is_lazy_loading=False):
@@ -394,11 +396,11 @@ class ModelManager:
         self.is_lazy_loading = is_lazy_loading
     async def load_model(self, src_lang, tgt_lang, key):
-        logger.info(f"Loading translation model for {src_lang} -> {tgt_lang} asynchronously")
         translate_manager = TranslateManager(src_lang, tgt_lang, self.device_type, self.use_distilled)
         await translate_manager.load()
         self.models[key] = translate_manager
-        logger.info(f"Loaded translation model for {key} asynchronously")
     def get_model(self, src_lang, tgt_lang):
         key = self._get_model_key(src_lang, tgt_lang)
@@ -427,14 +429,15 @@ class ASRModelManager:
     async def load(self):
         if not self.model:
-            logger.info("Loading ASR model asynchronously...")
             self.model = await asyncio.to_thread(
                 AutoModel.from_pretrained,
-                "ai4bharat/indic-conformer-600m-multilingual",
                 trust_remote_code=True
             )
             self.model = self.model.to(self.device_type)
-            logger.info("ASR model loaded asynchronously")
 # Global Managers
 llm_manager = LLMManager(settings.llm_model_name)
@@ -505,12 +508,12 @@ async def lifespan(app: FastAPI):
                 translation_tasks.append(model_manager.load_model(src_lang, tgt_lang, key))
             await asyncio.gather(*tasks, *translation_tasks)
-            logger.info("All models loaded successfully asynchronously")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
-    logger.info("Starting asynchronous model loading...")
     await load_all_models()
     yield
     llm_manager.unload()

 from slowapi import Limiter
 from slowapi.util import get_remote_address
 import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoProcessor, AutoModel, BitsAndBytesConfig, Gemma3ForConditionalGeneration
 from IndicTransToolkit import IndicProcessor
 import json
 import asyncio
     async def load(self):
         if not self.is_loaded:
             try:
+                local_path = "/app/models/llm_model"
                 self.model = await asyncio.to_thread(
                     Gemma3ForConditionalGeneration.from_pretrained,
+                    local_path,
                     device_map="auto",
                     quantization_config=quantization_config,
                     torch_dtype=self.torch_dtype
                 self.model.eval()
                 self.processor = await asyncio.to_thread(
                     AutoProcessor.from_pretrained,
+                    local_path
                 )
                 self.is_loaded = True
+                logger.info(f"LLM loaded from {local_path} on {self.device}")
             except Exception as e:
                 logger.error(f"Failed to load LLM: {str(e)}")
                 raise
     async def load(self):
         if not self.model:
+            logger.info("Loading TTS model from local path asynchronously...")
+            local_path = "/app/models/tts_model"
             self.model = await asyncio.to_thread(
                 AutoModel.from_pretrained,
+                local_path,
                 trust_remote_code=True
             )
             self.model = self.model.to(self.device_type)
+            logger.info("TTS model loaded from local path asynchronously")
     def synthesize(self, text, ref_audio_path, ref_text):
         if not self.model:
     async def load(self):
         if not self.tokenizer or not self.model:
             if self.src_lang.startswith("eng") and not self.tgt_lang.startswith("eng"):
+                local_path = "/app/models/trans_en_indic"
             elif not self.src_lang.startswith("eng") and self.tgt_lang.startswith("eng"):
+                local_path = "/app/models/trans_indic_en"
             elif not self.src_lang.startswith("eng") and not self.tgt_lang.startswith("eng"):
+                local_path = "/app/models/trans_indic_indic"
             else:
                 raise ValueError("Invalid language combination")
             self.tokenizer = await asyncio.to_thread(
                 AutoTokenizer.from_pretrained,
+                local_path,
                 trust_remote_code=True
             )
             self.model = await asyncio.to_thread(
                 AutoModelForSeq2SeqLM.from_pretrained,
+                local_path,
                 trust_remote_code=True,
                 torch_dtype=torch.float16,
                 attn_implementation="flash_attention_2"
             )
             self.model = self.model.to(self.device_type)
             self.model = torch.compile(self.model, mode="reduce-overhead")
+            logger.info(f"Translation model loaded from {local_path} asynchronously")
 class ModelManager:
     def __init__(self, device_type=device, use_distilled=True, is_lazy_loading=False):
         self.is_lazy_loading = is_lazy_loading
     async def load_model(self, src_lang, tgt_lang, key):
+        logger.info(f"Loading translation model for {src_lang} -> {tgt_lang} from local path")
         translate_manager = TranslateManager(src_lang, tgt_lang, self.device_type, self.use_distilled)
         await translate_manager.load()
         self.models[key] = translate_manager
+        logger.info(f"Loaded translation model for {key} from local path")
     def get_model(self, src_lang, tgt_lang):
         key = self._get_model_key(src_lang, tgt_lang)
     async def load(self):
         if not self.model:
+            logger.info("Loading ASR model from local path asynchronously...")
+            local_path = "/app/models/asr_model"
             self.model = await asyncio.to_thread(
                 AutoModel.from_pretrained,
+                local_path,
                 trust_remote_code=True
             )
             self.model = self.model.to(self.device_type)
+            logger.info("ASR model loaded from local path asynchronously")
 # Global Managers
 llm_manager = LLMManager(settings.llm_model_name)
                 translation_tasks.append(model_manager.load_model(src_lang, tgt_lang, key))
             await asyncio.gather(*tasks, *translation_tasks)
+            logger.info("All models loaded successfully from local paths")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
+    logger.info("Starting asynchronous model loading from local paths...")
     await load_all_models()
     yield
     llm_manager.unload()