Spaces:
Sleeping
Sleeping
fix
Browse files- .gitignore +2 -1
- services/model_handler.py +132 -9
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
__pycache__
|
|
|
|
1 |
+
__pycache__
|
2 |
+
model_cache/
|
services/model_handler.py
CHANGED
@@ -185,6 +185,64 @@ class LocalHuggingFaceModel(Model):
|
|
185 |
error_message = str(e)
|
186 |
return f"Error during generation: {error_message}"
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
class ModelHandler:
|
189 |
def __init__(self):
|
190 |
"""Initialize the model handler"""
|
@@ -306,22 +364,65 @@ Output:"""
|
|
306 |
@staticmethod
|
307 |
@st.cache_resource
|
308 |
def _load_model():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
try:
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
|
318 |
def _initialize_local_model(self):
|
319 |
"""Initialize local model as fallback"""
|
320 |
if self.model is None or self.tokenizer is None:
|
321 |
self.model, self.tokenizer = self._load_model()
|
322 |
-
|
323 |
if self.model is None or self.tokenizer is None:
|
324 |
-
|
|
|
|
|
325 |
|
326 |
# Create a LocalHuggingFaceModel instance compatible with Agno
|
327 |
return LocalHuggingFaceModel(self.model, self.tokenizer, max_length=512)
|
@@ -335,6 +436,28 @@ Output:"""
|
|
335 |
logging.error("Empty query provided")
|
336 |
return "Error: Please provide a non-empty query"
|
337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
# Format translation prompt
|
339 |
translation_prompt = self._format_prompt(
|
340 |
role="Translate the following text to English",
|
|
|
185 |
error_message = str(e)
|
186 |
return f"Error during generation: {error_message}"
|
187 |
|
188 |
+
class DummyModel(Model):
|
189 |
+
def __init__(self):
|
190 |
+
super().__init__(id="dummy-model")
|
191 |
+
|
192 |
+
async def ainvoke(self, prompt: str, **kwargs) -> str:
|
193 |
+
"""Async invoke method"""
|
194 |
+
return await self.invoke(prompt=prompt, **kwargs)
|
195 |
+
|
196 |
+
async def ainvoke_stream(self, prompt: str, **kwargs):
|
197 |
+
"""Async streaming invoke method"""
|
198 |
+
result = await self.invoke(prompt=prompt, **kwargs)
|
199 |
+
yield result
|
200 |
+
|
201 |
+
def invoke(self, prompt: str, **kwargs) -> str:
|
202 |
+
"""Synchronous invoke method"""
|
203 |
+
return Response("Sorry, the model is not available. Please try again later.")
|
204 |
+
|
205 |
+
def invoke_stream(self, prompt: str, **kwargs):
|
206 |
+
"""Synchronous streaming invoke method"""
|
207 |
+
result = self.invoke(prompt=prompt, **kwargs)
|
208 |
+
yield result
|
209 |
+
|
210 |
+
def parse_provider_response(self, response: str) -> str:
|
211 |
+
"""Parse the provider response"""
|
212 |
+
return response
|
213 |
+
|
214 |
+
def parse_provider_response_delta(self, delta: str) -> str:
|
215 |
+
"""Parse the provider response delta for streaming"""
|
216 |
+
return delta
|
217 |
+
|
218 |
+
async def aresponse(self, prompt=None, **kwargs):
|
219 |
+
"""Async response method - required abstract method"""
|
220 |
+
if prompt is None:
|
221 |
+
prompt = kwargs.get('input', '')
|
222 |
+
content = await self.ainvoke(prompt=prompt, **kwargs)
|
223 |
+
return Response(content)
|
224 |
+
|
225 |
+
async def aresponse_stream(self, prompt=None, **kwargs):
|
226 |
+
"""Async streaming response method - required abstract method"""
|
227 |
+
if prompt is None:
|
228 |
+
prompt = kwargs.get('input', '')
|
229 |
+
async for chunk in self.ainvoke_stream(prompt=prompt, **kwargs):
|
230 |
+
yield Response(chunk)
|
231 |
+
|
232 |
+
def response(self, prompt=None, **kwargs):
|
233 |
+
"""Synchronous response method - required abstract method"""
|
234 |
+
if prompt is None:
|
235 |
+
prompt = kwargs.get('input', '')
|
236 |
+
content = self.invoke(prompt=prompt, **kwargs)
|
237 |
+
return Response(content)
|
238 |
+
|
239 |
+
def response_stream(self, prompt=None, **kwargs):
|
240 |
+
"""Synchronous streaming response method - required abstract method"""
|
241 |
+
if prompt is None:
|
242 |
+
prompt = kwargs.get('input', '')
|
243 |
+
for chunk in self.invoke_stream(prompt=prompt, **kwargs):
|
244 |
+
yield Response(chunk)
|
245 |
+
|
246 |
class ModelHandler:
|
247 |
def __init__(self):
|
248 |
"""Initialize the model handler"""
|
|
|
364 |
@staticmethod
|
365 |
@st.cache_resource
|
366 |
def _load_model():
|
367 |
+
"""Load the model and tokenizer with retry logic"""
|
368 |
+
# Define retry decorator for model loading
|
369 |
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
|
370 |
+
def load_with_retry(model_path):
|
371 |
+
try:
|
372 |
+
logging.info(f"Attempting to load model from {model_path}")
|
373 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, cache_dir="./model_cache")
|
374 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
375 |
+
model_path,
|
376 |
+
device_map="cpu",
|
377 |
+
low_cpu_mem_usage=True,
|
378 |
+
cache_dir="./model_cache"
|
379 |
+
)
|
380 |
+
logging.info(f"Successfully loaded model from {model_path}")
|
381 |
+
return model, tokenizer
|
382 |
+
except Exception as e:
|
383 |
+
logging.error(f"Error loading model from {model_path}: {str(e)}")
|
384 |
+
raise e
|
385 |
+
|
386 |
+
# Try primary model first
|
387 |
try:
|
388 |
+
return load_with_retry(MODEL_PATH)
|
389 |
+
except Exception as primary_error:
|
390 |
+
logging.error(f"Failed to load primary model ({MODEL_PATH}): {str(primary_error)}")
|
391 |
+
|
392 |
+
# Try fallback models
|
393 |
+
fallback_models = [
|
394 |
+
"google/flan-t5-base",
|
395 |
+
"google/flan-t5-small",
|
396 |
+
"facebook/bart-base",
|
397 |
+
"t5-small"
|
398 |
+
]
|
399 |
+
|
400 |
+
for fallback_model in fallback_models:
|
401 |
+
if fallback_model != MODEL_PATH: # Skip if it's the same as the primary model
|
402 |
+
try:
|
403 |
+
logging.info(f"Trying fallback model: {fallback_model}")
|
404 |
+
return load_with_retry(fallback_model)
|
405 |
+
except Exception as fallback_error:
|
406 |
+
logging.error(f"Failed to load fallback model ({fallback_model}): {str(fallback_error)}")
|
407 |
+
|
408 |
+
# If all models fail, try a final tiny model
|
409 |
+
try:
|
410 |
+
logging.info("Trying final fallback to t5-small")
|
411 |
+
return load_with_retry("t5-small")
|
412 |
+
except Exception as final_error:
|
413 |
+
logging.error(f"All model loading attempts failed. Final error: {str(final_error)}")
|
414 |
+
st.error("Failed to load any model. Please check your internet connection and try again.")
|
415 |
+
return None, None
|
416 |
|
417 |
def _initialize_local_model(self):
|
418 |
"""Initialize local model as fallback"""
|
419 |
if self.model is None or self.tokenizer is None:
|
420 |
self.model, self.tokenizer = self._load_model()
|
421 |
+
|
422 |
if self.model is None or self.tokenizer is None:
|
423 |
+
# Create a dummy model that returns a helpful message
|
424 |
+
logging.error("Failed to load any model. Creating a dummy model.")
|
425 |
+
return DummyModel()
|
426 |
|
427 |
# Create a LocalHuggingFaceModel instance compatible with Agno
|
428 |
return LocalHuggingFaceModel(self.model, self.tokenizer, max_length=512)
|
|
|
436 |
logging.error("Empty query provided")
|
437 |
return "Error: Please provide a non-empty query"
|
438 |
|
439 |
+
# Check if models are available
|
440 |
+
if isinstance(self.translator, DummyModel) or isinstance(self.researcher, DummyModel) or \
|
441 |
+
isinstance(self.summarizer, DummyModel) or isinstance(self.presenter, DummyModel):
|
442 |
+
logging.error("One or more models are not available")
|
443 |
+
return """
|
444 |
+
# 🚨 Serviço Temporariamente Indisponível 🚨
|
445 |
+
|
446 |
+
Desculpe, estamos enfrentando problemas de conexão com nossos serviços de modelo de linguagem.
|
447 |
+
|
448 |
+
## Possíveis causas:
|
449 |
+
- Problemas de conexão com a internet
|
450 |
+
- Servidores do Hugging Face podem estar sobrecarregados ou temporariamente indisponíveis
|
451 |
+
- Limitações de recursos do sistema
|
452 |
+
|
453 |
+
## O que você pode fazer:
|
454 |
+
- Tente novamente mais tarde
|
455 |
+
- Verifique sua conexão com a internet
|
456 |
+
- Entre em contato com o suporte se o problema persistir
|
457 |
+
|
458 |
+
Agradecemos sua compreensão!
|
459 |
+
"""
|
460 |
+
|
461 |
# Format translation prompt
|
462 |
translation_prompt = self._format_prompt(
|
463 |
role="Translate the following text to English",
|