wakeupmh commited on
Commit
46f7de3
·
1 Parent(s): 2dd9d2c
Files changed (2) hide show
  1. .gitignore +2 -1
  2. services/model_handler.py +132 -9
.gitignore CHANGED
@@ -1 +1,2 @@
1
- __pycache__
 
 
1
+ __pycache__
2
+ model_cache/
services/model_handler.py CHANGED
@@ -185,6 +185,64 @@ class LocalHuggingFaceModel(Model):
185
  error_message = str(e)
186
  return f"Error during generation: {error_message}"
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  class ModelHandler:
189
  def __init__(self):
190
  """Initialize the model handler"""
@@ -306,22 +364,65 @@ Output:"""
306
  @staticmethod
307
  @st.cache_resource
308
  def _load_model():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  try:
310
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
311
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH, device_map="cpu", low_cpu_mem_usage=True)
312
- return model, tokenizer
313
- except Exception as e:
314
- st.error(f"Error loading model: {str(e)}")
315
- logging.error(f"Error loading model: {str(e)}")
316
- return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
  def _initialize_local_model(self):
319
  """Initialize local model as fallback"""
320
  if self.model is None or self.tokenizer is None:
321
  self.model, self.tokenizer = self._load_model()
322
-
323
  if self.model is None or self.tokenizer is None:
324
- raise ValueError("Failed to load model and tokenizer")
 
 
325
 
326
  # Create a LocalHuggingFaceModel instance compatible with Agno
327
  return LocalHuggingFaceModel(self.model, self.tokenizer, max_length=512)
@@ -335,6 +436,28 @@ Output:"""
335
  logging.error("Empty query provided")
336
  return "Error: Please provide a non-empty query"
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  # Format translation prompt
339
  translation_prompt = self._format_prompt(
340
  role="Translate the following text to English",
 
185
  error_message = str(e)
186
  return f"Error during generation: {error_message}"
187
 
188
+ class DummyModel(Model):
189
+ def __init__(self):
190
+ super().__init__(id="dummy-model")
191
+
192
+ async def ainvoke(self, prompt: str, **kwargs) -> str:
193
+ """Async invoke method"""
194
+ return await self.invoke(prompt=prompt, **kwargs)
195
+
196
+ async def ainvoke_stream(self, prompt: str, **kwargs):
197
+ """Async streaming invoke method"""
198
+ result = await self.invoke(prompt=prompt, **kwargs)
199
+ yield result
200
+
201
+ def invoke(self, prompt: str, **kwargs) -> str:
202
+ """Synchronous invoke method"""
203
+ return Response("Sorry, the model is not available. Please try again later.")
204
+
205
+ def invoke_stream(self, prompt: str, **kwargs):
206
+ """Synchronous streaming invoke method"""
207
+ result = self.invoke(prompt=prompt, **kwargs)
208
+ yield result
209
+
210
+ def parse_provider_response(self, response: str) -> str:
211
+ """Parse the provider response"""
212
+ return response
213
+
214
+ def parse_provider_response_delta(self, delta: str) -> str:
215
+ """Parse the provider response delta for streaming"""
216
+ return delta
217
+
218
+ async def aresponse(self, prompt=None, **kwargs):
219
+ """Async response method - required abstract method"""
220
+ if prompt is None:
221
+ prompt = kwargs.get('input', '')
222
+ content = await self.ainvoke(prompt=prompt, **kwargs)
223
+ return Response(content)
224
+
225
+ async def aresponse_stream(self, prompt=None, **kwargs):
226
+ """Async streaming response method - required abstract method"""
227
+ if prompt is None:
228
+ prompt = kwargs.get('input', '')
229
+ async for chunk in self.ainvoke_stream(prompt=prompt, **kwargs):
230
+ yield Response(chunk)
231
+
232
+ def response(self, prompt=None, **kwargs):
233
+ """Synchronous response method - required abstract method"""
234
+ if prompt is None:
235
+ prompt = kwargs.get('input', '')
236
+ content = self.invoke(prompt=prompt, **kwargs)
237
+ return Response(content)
238
+
239
+ def response_stream(self, prompt=None, **kwargs):
240
+ """Synchronous streaming response method - required abstract method"""
241
+ if prompt is None:
242
+ prompt = kwargs.get('input', '')
243
+ for chunk in self.invoke_stream(prompt=prompt, **kwargs):
244
+ yield Response(chunk)
245
+
246
  class ModelHandler:
247
  def __init__(self):
248
  """Initialize the model handler"""
 
364
  @staticmethod
365
  @st.cache_resource
366
  def _load_model():
367
+ """Load the model and tokenizer with retry logic"""
368
+ # Define retry decorator for model loading
369
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
370
+ def load_with_retry(model_path):
371
+ try:
372
+ logging.info(f"Attempting to load model from {model_path}")
373
+ tokenizer = AutoTokenizer.from_pretrained(model_path, cache_dir="./model_cache")
374
+ model = AutoModelForSeq2SeqLM.from_pretrained(
375
+ model_path,
376
+ device_map="cpu",
377
+ low_cpu_mem_usage=True,
378
+ cache_dir="./model_cache"
379
+ )
380
+ logging.info(f"Successfully loaded model from {model_path}")
381
+ return model, tokenizer
382
+ except Exception as e:
383
+ logging.error(f"Error loading model from {model_path}: {str(e)}")
384
+ raise e
385
+
386
+ # Try primary model first
387
  try:
388
+ return load_with_retry(MODEL_PATH)
389
+ except Exception as primary_error:
390
+ logging.error(f"Failed to load primary model ({MODEL_PATH}): {str(primary_error)}")
391
+
392
+ # Try fallback models
393
+ fallback_models = [
394
+ "google/flan-t5-base",
395
+ "google/flan-t5-small",
396
+ "facebook/bart-base",
397
+ "t5-small"
398
+ ]
399
+
400
+ for fallback_model in fallback_models:
401
+ if fallback_model != MODEL_PATH: # Skip if it's the same as the primary model
402
+ try:
403
+ logging.info(f"Trying fallback model: {fallback_model}")
404
+ return load_with_retry(fallback_model)
405
+ except Exception as fallback_error:
406
+ logging.error(f"Failed to load fallback model ({fallback_model}): {str(fallback_error)}")
407
+
408
+ # If all models fail, try a final tiny model
409
+ try:
410
+ logging.info("Trying final fallback to t5-small")
411
+ return load_with_retry("t5-small")
412
+ except Exception as final_error:
413
+ logging.error(f"All model loading attempts failed. Final error: {str(final_error)}")
414
+ st.error("Failed to load any model. Please check your internet connection and try again.")
415
+ return None, None
416
 
417
  def _initialize_local_model(self):
418
  """Initialize local model as fallback"""
419
  if self.model is None or self.tokenizer is None:
420
  self.model, self.tokenizer = self._load_model()
421
+
422
  if self.model is None or self.tokenizer is None:
423
+ # Create a dummy model that returns a helpful message
424
+ logging.error("Failed to load any model. Creating a dummy model.")
425
+ return DummyModel()
426
 
427
  # Create a LocalHuggingFaceModel instance compatible with Agno
428
  return LocalHuggingFaceModel(self.model, self.tokenizer, max_length=512)
 
436
  logging.error("Empty query provided")
437
  return "Error: Please provide a non-empty query"
438
 
439
+ # Check if models are available
440
+ if isinstance(self.translator, DummyModel) or isinstance(self.researcher, DummyModel) or \
441
+ isinstance(self.summarizer, DummyModel) or isinstance(self.presenter, DummyModel):
442
+ logging.error("One or more models are not available")
443
+ return """
444
+ # 🚨 Serviço Temporariamente Indisponível 🚨
445
+
446
+ Desculpe, estamos enfrentando problemas de conexão com nossos serviços de modelo de linguagem.
447
+
448
+ ## Possíveis causas:
449
+ - Problemas de conexão com a internet
450
+ - Servidores do Hugging Face podem estar sobrecarregados ou temporariamente indisponíveis
451
+ - Limitações de recursos do sistema
452
+
453
+ ## O que você pode fazer:
454
+ - Tente novamente mais tarde
455
+ - Verifique sua conexão com a internet
456
+ - Entre em contato com o suporte se o problema persistir
457
+
458
+ Agradecemos sua compreensão!
459
+ """
460
+
461
  # Format translation prompt
462
  translation_prompt = self._format_prompt(
463
  role="Translate the following text to English",