Spaces:

wakeupmh
/

alem-do-espectro

Sleeping

App Files Files Community

wakeupmh commited on Feb 26

Commit

2269229

1 Parent(s): e3b3253

revert

Browse files

Files changed (7) hide show

README.md +1 -1
app.py +6 -23
requirements.txt +3 -6
services/__pycache__/__init__.cpython-311.pyc +0 -0
services/__pycache__/model_handler.cpython-311.pyc +0 -0
services/__pycache__/research_fetcher.cpython-311.pyc +0 -0
services/model_handler.py +85 -801

README.md CHANGED Viewed

@@ -8,7 +8,7 @@ sdk_version: 1.42.1
 app_file: app.py
 pinned: false
 license: mit
-short_description: '"explorando a riqueza das neurodivergências"'
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: mit
+short_description: 'Explorando a riqueza das neurodivergências'
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import streamlit as st
 import logging
-import asyncio
 from services.model_handler import ModelHandler
 # Configure logging
@@ -23,45 +22,29 @@ class AutismResearchApp:
             Pergunte o que quiser e eu vou analisar os últimos artigos científicos e fornecer uma resposta baseada em evidências.
         """)
-    async def run(self):
         """Run the main application loop"""
         self._setup_streamlit()
         # Initialize session state for papers
         if 'papers' not in st.session_state:
             st.session_state.papers = []
-        # Carregar modelos assincronamente
-        with st.status("Carregando modelos...") as status:
-            status.write("🔄 Inicializando modelos de linguagem...")
-            await self.model_handler._load_models_async()
-            status.write("✅ Modelos carregados com sucesso!")
         # Get user query
         col1, col2 = st.columns(2, vertical_alignment="bottom", gap="small")
         query = col1.text_input("O que você precisa saber?")
         if col2.button("Enviar"):
-            if not query:
-                st.error("Por favor, digite uma pergunta.")
-                return
             # Show status while processing
             with st.status("Processando sua Pergunta...") as status:
-                status.write("🔍 Buscando informações relevantes...")
-                status.write("📚 Analisando dados...")
                 status.write("✍️ Gerando resposta...")
-                # Sempre usar o modelo, nunca a resposta padrão
-                self.model_handler.force_default_response = False
-                answer = await self.model_handler.generate_answer_async(query)
                 status.write("✨ Resposta gerada! Exibindo resultados...")
-            st.success("✅ Resposta gerada com sucesso!")
             st.markdown("### Resposta")
@@ -69,7 +52,7 @@ class AutismResearchApp:
 def main():
     app = AutismResearchApp()
-    asyncio.run(app.run())
 if __name__ == "__main__":
     main()

 import streamlit as st
 import logging
 from services.model_handler import ModelHandler
 # Configure logging
             Pergunte o que quiser e eu vou analisar os últimos artigos científicos e fornecer uma resposta baseada em evidências.
         """)
+    def run(self):
         """Run the main application loop"""
         self._setup_streamlit()
         # Initialize session state for papers
         if 'papers' not in st.session_state:
             st.session_state.papers = []
         # Get user query
         col1, col2 = st.columns(2, vertical_alignment="bottom", gap="small")
         query = col1.text_input("O que você precisa saber?")
         if col2.button("Enviar"):
             # Show status while processing
             with st.status("Processando sua Pergunta...") as status:
+                status.write("🔍 Buscando papers de pesquisa relevantes...")
+                status.write("📚 Analisando papers de pesquisa...")
                 status.write("✍️ Gerando resposta...")
+                answer = self.model_handler.generate_answer(query)
                 status.write("✨ Resposta gerada! Exibindo resultados...")
+            st.success("✅ Resposta gerada com base nos artigos de pesquisa encontrados.")
             st.markdown("### Resposta")
 def main():
     app = AutismResearchApp()
+    app.run()
 if __name__ == "__main__":
     main()

requirements.txt CHANGED Viewed

@@ -1,13 +1,10 @@
 transformers>=4.36.2
 streamlit>=1.29.0
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch>=2.1.0
 accelerate>=0.26.0
 arxiv>=1.4.7
 python-dotenv>=1.0.0
-agno==1.1.5
 pypdf>=3.11.1
-watchdog>=2.3.1
-sentencepiece>=0.1.99
-tenacity>=8.2.2
-asyncio

 transformers>=4.36.2
 streamlit>=1.29.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 accelerate>=0.26.0
 arxiv>=1.4.7
 python-dotenv>=1.0.0
+agno==1.0.6
+ollama>=0.4.7
 pypdf>=3.11.1
+watchdog>=2.3.1

services/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (179 Bytes). View file

services/__pycache__/model_handler.cpython-311.pyc ADDED Viewed

Binary file (6.53 kB). View file

services/__pycache__/research_fetcher.cpython-311.pyc ADDED Viewed

Binary file (17.9 kB). View file

services/model_handler.py CHANGED Viewed

@@ -1,844 +1,128 @@
 import logging
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import streamlit as st
 from agno.agent import Agent
 from agno.tools.arxiv import ArxivTools
 from agno.tools.pubmed import PubmedTools
-from agno.models.base import Model
-from tenacity import retry, stop_after_attempt, wait_exponential
-import time
-import datetime
-import os
-from typing import Tuple, Optional, Dict, Any, List
-# Configuração de logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# Configurações dos modelos
-MODEL_CONFIG = {
-    "translator": {
-        "primary": "facebook/nllb-200-distilled-600M",
-        "fallback": "google/flan-t5-base"
-    },
-    "researcher": {
-        "primary": "google/flan-t5-large",
-        "fallback": "google/flan-t5-base"
-    },
-    "presenter": {
-        "primary": "google/flan-t5-base",
-        "fallback": "google/flan-t5-small"
-    }
-}
-# Simple Response class to wrap the model output
-class Response:
-    def __init__(self, content):
-        # Ensure content is a string and not empty
-        if content is None:
-            content = ""
-        if not isinstance(content, str):
-            content = str(content)
-        # Store the content
-        self.content = content
-        # Add tool_calls attribute with default empty list
-        self.tool_calls = []
-        # Add other attributes that might be needed
-        self.audio = None
-        self.images = []
-        self.citations = []
-        self.metadata = {}
-        self.finish_reason = "stop"
-        self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-        # Add timestamp attributes
-        current_time = time.time()
-        self.created_at = int(current_time)  # Convert to integer
-        self.created = int(current_time)
-        self.timestamp = datetime.datetime.now().isoformat()
-        # Add model info attributes
-        self.id = "local-model-response"
-        self.model = "local-huggingface"
-        self.object = "chat.completion"
-        self.choices = [{"index": 0, "message": {"role": "assistant", "content": content}, "finish_reason": "stop"}]
-        # Add additional attributes that might be needed
-        self.system_fingerprint = ""
-        self.is_truncated = False
-        self.role = "assistant"
-    def __str__(self):
-        return self.content if self.content else ""
-    def __repr__(self):
-        return f"Response(content='{self.content[:50]}{'...' if len(self.content) > 50 else ''}')"
-# Personalizada classe para modelos locais
-class LocalHuggingFaceModel(Model):
-    def __init__(self, model, tokenizer, model_id="local-huggingface", max_length=512):
-        super().__init__(id=model_id)
-        self.model = model
-        self.tokenizer = tokenizer
-        self.max_length = max_length
-        self.model_name = model_id
-    async def ainvoke(self, prompt: str, **kwargs) -> str:
-        """Async invoke method"""
-        try:
-            logging.info(f"[{self.model_name}] ainvoke called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            # Não usar await com o método invoke que é síncrono
-            return self.invoke(prompt, **kwargs)
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in ainvoke: {str(e)}")
-            return Response(f"Error in ainvoke: {str(e)}")
-    async def ainvoke_stream(self, prompt: str, **kwargs):
-        """Async streaming invoke method"""
-        try:
-            logging.info(f"[{self.model_name}] ainvoke_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            result = self.invoke(prompt, **kwargs)
-            yield result
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in ainvoke_stream: {str(e)}")
-            yield Response(f"Error in ainvoke_stream: {str(e)}")
-    async def aresponse_stream(self, prompt: str, **kwargs):
-        """
-        Método abstrato necessário para implementar a interface Model da biblioteca agno.
-        Este método deve retornar um gerador assíncrono de objetos Response.
-        """
-        try:
-            logging.info(f"[{self.model_name}] aresponse_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            result = self.invoke(prompt, **kwargs)
-            yield result
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in aresponse_stream: {str(e)}")
-            yield Response(f"Error in aresponse_stream: {str(e)}")
-    def invoke(self, prompt: str, **kwargs) -> str:
-        """Synchronous invoke method"""
-        try:
-            logging.info(f"[{self.model_name}] Invoking model with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            # Check if prompt is None or empty
-            if prompt is None:
-                logging.warning(f"[{self.model_name}] None prompt provided to invoke method")
-                return Response("No input provided. Please provide a valid prompt.")
-            if not isinstance(prompt, str):
-                logging.warning(f"[{self.model_name}] Non-string prompt provided: {type(prompt)}")
-                try:
-                    prompt = str(prompt)
-                    logging.info(f"[{self.model_name}] Converted prompt to string: {prompt[:100]}...")
-                except:
-                    return Response("Invalid input type. Please provide a string prompt.")
-            if not prompt.strip():
-                logging.warning(f"[{self.model_name}] Empty prompt provided to invoke method")
-                return Response("No input provided. Please provide a non-empty prompt.")
-            inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
-            #  Configure generation parameters
-            generation_config = {
-                "max_length": self.max_length,
-                "num_return_sequences": 1,
-                "do_sample": kwargs.get("do_sample", False),
-                "temperature": kwargs.get("temperature", 1.0),
-                "top_p": kwargs.get("top_p", 1.0),
-            }
-            # Generate the answer
-            outputs = self.model.generate(**inputs, **generation_config)
-            decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Check if output is empty
-            if not decoded_output or not decoded_output.strip():
-                logging.warning(f"[{self.model_name}] Model generated empty output")
-                return Response("The model did not generate any output. Please try with a different prompt.")
-            logging.info(f"[{self.model_name}] Model generated output: {decoded_output[:100]}...")
-            return Response(decoded_output)
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in local model generation: {str(e)}")
-            if hasattr(e, 'args') and len(e.args) > 0:
-                error_message = e.args[0]
-            else:
-                error_message = str(e)
-            return Response(f"Error during generation: {error_message}")
-    def invoke_stream(self, prompt: str, **kwargs):
-        """Synchronous streaming invoke method"""
-        try:
-            logging.info(f"[{self.model_name}] invoke_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            result = self.invoke(prompt, **kwargs)
-            yield result
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in invoke_stream: {str(e)}")
-            yield Response(f"Error in invoke_stream: {str(e)}")
-    def parse_provider_response(self, response: str) -> str:
-        """Parse the provider response"""
-        return response
-    def parse_provider_response_delta(self, delta: str) -> str:
-        """Parse the provider response delta for streaming"""
-        return delta
-    async def aresponse(self, prompt=None, **kwargs):
-        """Async response method - required abstract method"""
-        try:
-            # Log detalhado de todos os argumentos
-            logging.info(f"[{self.model_name}] aresponse args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
-            # Extrair o prompt das mensagens se estiverem disponíveis
-            if prompt is None and 'messages' in kwargs and kwargs['messages']:
-                messages = kwargs['messages']
-                # Procurar pela mensagem do usuário
-                for message in messages:
-                    if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
-                        prompt = message.content
-                        logging.info(f"[{self.model_name}] Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-                        break
-            # Verificar se o prompt está em kwargs['input']
-            if prompt is None:
-                if 'input' in kwargs:
-                    prompt = kwargs.get('input')
-                    logging.info(f"[{self.model_name}] Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-            logging.info(f"[{self.model_name}] aresponse called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            if not prompt or not isinstance(prompt, str) or not prompt.strip():
-                logging.warning(f"[{self.model_name}] Empty or invalid prompt in aresponse")
-                return Response("No input provided. Please provide a valid prompt.")
-            content = await self.ainvoke(prompt, **kwargs)
-            return content if isinstance(content, Response) else Response(content)
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in aresponse: {str(e)}")
-            return Response(f"Error in aresponse: {str(e)}")
-    def response(self, prompt=None, **kwargs):
-        """Synchronous response method - required abstract method"""
-        try:
-            # Log detalhado de todos os argumentos
-            logging.info(f"[{self.model_name}] response args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
-            # Extrair o prompt das mensagens se estiverem disponíveis
-            if prompt is None and 'messages' in kwargs and kwargs['messages']:
-                messages = kwargs['messages']
-                # Procurar pela mensagem do usuário
-                for message in messages:
-                    if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
-                        prompt = message.content
-                        logging.info(f"[{self.model_name}] Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-                        break
-            # Verificar se o prompt está em kwargs['input']
-            if prompt is None:
-                if 'input' in kwargs:
-                    prompt = kwargs.get('input')
-                    logging.info(f"[{self.model_name}] Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-            logging.info(f"[{self.model_name}] response called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            if not prompt or not isinstance(prompt, str) or not prompt.strip():
-                logging.warning(f"[{self.model_name}] Empty or invalid prompt in response")
-                return Response("No input provided. Please provide a valid prompt.")
-            content = self.invoke(prompt, **kwargs)
-            return content if isinstance(content, Response) else Response(content)
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in response: {str(e)}")
-            return Response(f"Error in response: {str(e)}")
-    def response_stream(self, prompt=None, **kwargs):
-        """Synchronous streaming response method - required abstract method"""
-        try:
-            # Log detalhado de todos os argumentos
-            logging.info(f"[{self.model_name}] response_stream args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
-            # Extrair o prompt das mensagens se estiverem disponíveis
-            if prompt is None and 'messages' in kwargs and kwargs['messages']:
-                messages = kwargs['messages']
-                # Procurar pela mensagem do usuário
-                for message in messages:
-                    if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
-                        prompt = message.content
-                        logging.info(f"[{self.model_name}] Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-                        break
-            # Verificar se o prompt está em kwargs['input']
-            if prompt is None:
-                if 'input' in kwargs:
-                    prompt = kwargs.get('input')
-                    logging.info(f"[{self.model_name}] Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-            logging.info(f"[{self.model_name}] response_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
-            if not prompt or not isinstance(prompt, str) or not prompt.strip():
-                logging.warning(f"[{self.model_name}] Empty or invalid prompt in response_stream")
-                yield Response("No input provided. Please provide a valid prompt.")
-                return
-            for chunk in self.invoke_stream(prompt, **kwargs):
-                yield chunk if isinstance(chunk, Response) else Response(chunk)
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in response_stream: {str(e)}")
-            yield Response(f"Error in response_stream: {str(e)}")
-    def generate(self, prompt: str, **kwargs):
-        try:
-            inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
-            #  Configure generation parameters
-            generation_config = {
-                "max_length": self.max_length,
-                "num_return_sequences": 1,
-                "do_sample": kwargs.get("do_sample", False),
-                "temperature": kwargs.get("temperature", 1.0),
-                "top_p": kwargs.get("top_p", 1.0),
-            }
-            # Generate the answer
-            outputs = self.model.generate(**inputs, **generation_config)
-            decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            return decoded_output
-        except Exception as e:
-            logging.error(f"[{self.model_name}] Error in generate method: {str(e)}")
-            if hasattr(e, 'args') and len(e.args) > 0:
-                error_message = e.args[0]
-            else:
-                error_message = str(e)
-            return f"Error during generation: {error_message}"
 class ModelHandler:
-    """
-    Classe para gerenciar múltiplos modelos e gerar respostas.
-    """
     def __init__(self):
-        """
-        Inicializa o ModelHandler com múltiplos modelos.
-        """
         self.translator = None
         self.researcher = None
         self.presenter = None
-        self.force_default_response = False
-        self.models = {}
-        # Os modelos serão carregados posteriormente de forma assíncrona
-        logging.info("ModelHandler initialized. Models will be loaded asynchronously.")
-    def _extract_content(self, result):
-        """
-        Extrai o conteúdo de uma resposta do modelo.
-        Args:
-            result: A resposta do modelo, que pode ser um objeto RunResponse ou uma string
-        Returns:
-            O conteúdo da resposta como string
-        """
-        try:
-            if result is None:
-                return ""
-            if hasattr(result, 'content'):
-                return result.content
-            return str(result)
-        except Exception as e:
-            logging.error(f"Error extracting content: {str(e)}")
-            return ""
-    async def _load_models_async(self):
-        """
-        Carrega os modelos de forma assíncrona.
-        """
-        logging.info("Loading models asynchronously...")
-        self._load_models()
-        logging.info("Models loaded asynchronously")
-    def _format_prompt(self, prompt_type, content):
-        """
-        Formata o prompt de acordo com o tipo.
-        Args:
-            prompt_type: O tipo de prompt (translation, research, presentation)
-            content: O conteúdo a ser incluído no prompt
-        Returns:
-            O prompt formatado
-        """
-        if not content or not content.strip():
-            logging.warning(f"Empty content provided to _format_prompt for {prompt_type}")
-            return "No input provided."
-        if prompt_type == "translation":
-            return f"""Task: Translate the following text to English
-Instructions:
-Provide a direct English translation of the input text.
-Input: {content}
-Output:"""
-        elif prompt_type == "research":
-            return f"""Task: Research Assistant
-Instructions:
-You are a research assistant tasked with providing comprehensive information.
-Please provide a detailed explanation about the topic, including:
-- Definition and key characteristics
-- Causes or origins if applicable
-- Current scientific understanding
-- Important facts and statistics
-- Recent developments or research
-- Real-world implications and applications
-Search for relevant academic papers and medical resources using the provided tools.
-Make sure to include findings from recent research in your response.
-Use ArxivTools and PubmedTools to find the most relevant and up-to-date information.
-Aim to write at least 4-5 paragraphs with detailed information.
-Be thorough and informative, covering all important aspects of the topic.
-Use clear and accessible language suitable for a general audience.
-Input: {content}
-Output:"""
-        elif prompt_type == "presentation":
-            return f"""Task: Presentation Assistant
-Instructions:
-You are presenting research findings to a general audience.
-Please format the information in a clear, engaging, and accessible way.
-Include:
-- A clear introduction to the topic with a compelling title
-- Key points organized with headings or bullet points
-- Simple explanations of complex concepts
-- A brief conclusion or summary
-- Translate the entire response to Portuguese
-- Add appropriate emojis to make the presentation more engaging
-- Format the text using markdown for better readability
-Input: {content}
-Output:"""
-        else:
-            logging.error(f"Unknown prompt type: {prompt_type}")
-            return f"Unknown prompt type: {prompt_type}"
-    @staticmethod
-    def _load_specific_model(model_name: str, purpose: str) -> Tuple[Optional[Any], Optional[Any]]:
-        """
-        Load a specific model with retry logic
-        Args:
-            model_name: The name of the model to load
-            purpose: What the model will be used for (logging purposes)
-        Returns:
-            A tuple of (model, tokenizer) or (None, None) if loading fails
-        """
-        try:
-            logging.info(f"Attempting to load {purpose} model: {model_name}")
-            # Criar diretório de cache se não existir
-            cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "model_cache")
-            os.makedirs(cache_dir, exist_ok=True)
-            # Carregar modelo e tokenizer
-            tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
-            model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir=cache_dir)
-            logging.info(f"Successfully loaded {purpose} model: {model_name}")
-            return model, tokenizer
-        except Exception as e:
-            logging.error(f"Error loading {purpose} model {model_name}: {str(e)}")
-            return None, None
-    @staticmethod
-    @st.cache_resource
-    def _load_fallback_model():
-        """Load a fallback model"""
-        # Define retry decorator for model loading
-        @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
-        def load_with_retry(model_name):
-            try:
-                logging.info(f"Attempting to load fallback model from {model_name}")
-                # Criar diretório de cache se não existir
-                cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "model_cache")
-                os.makedirs(cache_dir, exist_ok=True)
-                # Carregar modelo e tokenizer
-                tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
-                model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir=cache_dir)
-                logging.info(f"Successfully loaded fallback model from {model_name}")
-                return model, tokenizer
-            except Exception as e:
-                logging.error(f"Error loading fallback model {model_name}: {str(e)}")
-                raise
-        # Lista de modelos para tentar, em ordem de preferência
-        model_names = ["google/flan-t5-small", "google/flan-t5-base"]
-        # Tentar carregar cada modelo na lista
-        for model_name in model_names:
-            try:
-                return load_with_retry(model_name)
-            except Exception as e:
-                logging.error(f"Failed to load fallback model {model_name}: {str(e)}")
-                continue
-        # Se todos os modelos falharem, retornar None
-        logging.error("All fallback models failed to load")
-        return None, None
-    def _get_default_research_content(self, topic):
-        """
-        Gera conteúdo de pesquisa padrão quando não for possível gerar com o modelo.
-        Args:
-            topic: O tópico da pesquisa
-        Returns:
-            Conteúdo de pesquisa padrão
-        """
-        return f"""
-# Research on {topic}
-## Definition and Key Characteristics
-{topic} is a subject of significant interest in various fields. While detailed information is currently limited in our system, we understand that it encompasses several key characteristics and has important implications.
-## Current Understanding
-Research on {topic} continues to evolve, with new findings emerging regularly. The current understanding suggests multiple dimensions to consider when approaching this topic.
-## Applications and Implications
-The study of {topic} has several real-world applications and implications that affect various sectors including healthcare, education, and social services.
-## Conclusion
-While our current information on {topic} is limited, it represents an important area for continued research and understanding. For more detailed information, consulting specialized literature and experts is recommended.
-        """
-    def _get_default_presentation_content(self):
-        """
-        Gera conteúdo de apresentação padrão quando não for possível gerar com o modelo.
-        Returns:
-            Conteúdo de apresentação padrão
-        """
-        return """
-🧠 **Entendendo o Tópico** 🧠
-## O que é?
-Este é um tópico complexo com múltiplas dimensões e implicações. Embora as informações detalhadas sejam limitadas no momento, podemos fornecer uma visão geral.
-## Características Principais:
-- 🔍 Possui características distintas que o definem
-- 📊 Apresenta variações significativas entre diferentes casos
-- 🔬 É objeto de pesquisa contínua em diversos campos
-## Aplicações e Implicações:
-- 🏥 Impacto em áreas como saúde e bem-estar
-- 🎓 Relevância para educação e desenvolvimento
-- 👪 Influência em dinâmicas sociais e familiares
-## Conclusão:
-Para informações mais detalhadas e específicas, recomendamos consultar literatura especializada e profissionais da área. A compreensão deste tópico continua a evoluir com novas pesquisas.
-*Fonte: Análise de pesquisas científicas atuais*
-        """
-    def _load_models(self):
-        """Carrega múltiplos modelos para diferentes propósitos"""
-        # Carregar modelo de tradução
-        translator_model, translator_tokenizer = self._load_specific_model(
-            MODEL_CONFIG["translator"]["primary"], "translator"
-        )
-        # Carregar modelo de pesquisa
-        researcher_model, researcher_tokenizer = self._load_specific_model(
-            MODEL_CONFIG["researcher"]["primary"], "researcher"
-        )
-        # Carregar modelo de apresentação
-        presenter_model, presenter_tokenizer = self._load_specific_model(
-            MODEL_CONFIG["presenter"]["primary"], "presenter"
-        )
-        # Carregar modelo de fallback
-        fallback_model, fallback_tokenizer = self._load_fallback_model()
-        # Criar modelos locais
-        if translator_model and translator_tokenizer:
-            self.models["translator"] = LocalHuggingFaceModel(
-                translator_model,
-                translator_tokenizer,
-                model_id=MODEL_CONFIG["translator"]["primary"]
-            )
-        else:
-            # Tentar carregar o modelo fallback para tradutor
-            fallback_translator, fallback_translator_tokenizer = self._load_specific_model(
-                MODEL_CONFIG["translator"]["fallback"], "translator fallback"
-            )
-            if fallback_translator and fallback_translator_tokenizer:
-                self.models["translator"] = LocalHuggingFaceModel(
-                    fallback_translator,
-                    fallback_translator_tokenizer,
-                    model_id=MODEL_CONFIG["translator"]["fallback"]
-                )
-            else:
-                self.models["translator"] = LocalHuggingFaceModel(
-                    fallback_model,
-                    fallback_tokenizer,
-                    model_id="fallback-model"
-                )
-        if researcher_model and researcher_tokenizer:
-            self.models["researcher"] = LocalHuggingFaceModel(
-                researcher_model,
-                researcher_tokenizer,
-                model_id=MODEL_CONFIG["researcher"]["primary"]
-            )
-        else:
-            # Tentar carregar o modelo fallback para pesquisador
-            fallback_researcher, fallback_researcher_tokenizer = self._load_specific_model(
-                MODEL_CONFIG["researcher"]["fallback"], "researcher fallback"
-            )
-            if fallback_researcher and fallback_researcher_tokenizer:
-                self.models["researcher"] = LocalHuggingFaceModel(
-                    fallback_researcher,
-                    fallback_researcher_tokenizer,
-                    model_id=MODEL_CONFIG["researcher"]["fallback"]
-                )
-            else:
-                self.models["researcher"] = LocalHuggingFaceModel(
-                    fallback_model,
-                    fallback_tokenizer,
-                    model_id="fallback-model"
-                )
-        if presenter_model and presenter_tokenizer:
-            self.models["presenter"] = LocalHuggingFaceModel(
-                presenter_model,
-                presenter_tokenizer,
-                model_id=MODEL_CONFIG["presenter"]["primary"]
-            )
-        else:
-            # Tentar carregar o modelo fallback para apresentador
-            fallback_presenter, fallback_presenter_tokenizer = self._load_specific_model(
-                MODEL_CONFIG["presenter"]["fallback"], "presenter fallback"
-            )
-            if fallback_presenter and fallback_presenter_tokenizer:
-                self.models["presenter"] = LocalHuggingFaceModel(
-                    fallback_presenter,
-                    fallback_presenter_tokenizer,
-                    model_id=MODEL_CONFIG["presenter"]["fallback"]
-                )
-            else:
-                self.models["presenter"] = LocalHuggingFaceModel(
-                    fallback_model,
-                    fallback_tokenizer,
-                    model_id="fallback-model"
-                )
-        # Configurar agentes com seus respectivos modelos
         self.translator = Agent(
             name="Translator",
             role="You will translate the query to English",
-            model=self.models["translator"],
             goal="Translate to English",
             instructions=[
-                "Translate the query to English",
-                "Preserve all key information from the original query",
-                "Return only the translated text without additional comments"
             ]
         )
-        # Configurar o agente de pesquisa com as ferramentas ArxivTools e PubmedTools
         self.researcher = Agent(
             name="Researcher",
             role="You are a research scholar who specializes in autism research.",
-            model=self.models["researcher"],
             instructions=[
-                "You need to understand the context of the question to provide the best answer.",
-                "Be precise and provide detailed information.",
-                "You must create an accessible explanation.",
                 "The content must be for people without autism knowledge.",
-                "Focus on providing comprehensive information about the topic.",
-                "Include definition, characteristics, causes, and current understanding.",
-                "ALWAYS use the provided tools (ArxivTools and PubmedTools) to search for relevant information.",
-                "Cite specific papers and studies in your response when appropriate.",
-                "When using tools, specify the search query clearly in your thoughts before making the call."
             ],
-            tools=[
-                ArxivTools(),  # Usar ferramentas ArxivTools
-                PubmedTools()  # Usar ferramentas PubmedTools
             ],
         )
         self.presenter = Agent(
             name="Presenter",
             role="You are a professional researcher who presents the results of the research.",
-            model=self.models["presenter"],
             instructions=[
                 "You are multilingual",
-                "You must present the results in a clear and engaging manner.",
-                "Format the information with headings and bullet points.",
-                "Provide simple explanations of complex concepts.",
-                "Include a brief conclusion or summary.",
-                "Add emojis to make the presentation more interactive.",
-                "Translate the answer to Portuguese.",
-                "Maintain any citations or references from the research in your presentation.",
-                "Do not add fictional information not present in the research."
-            ]
         )
-        logging.info("Models and agents loaded successfully.")
-    async def _run_with_tools(self, agent, prompt, max_steps=5):
-        """
-        Executa um agente com suporte a ferramentas e gerencia a execução.
-        Args:
-            agent: O agente a ser executado
-            prompt: O prompt a ser enviado para o agente
-            max_steps: Número máximo de passos para execução
-        Returns:
-            O resultado da execução do agente
-        """
         try:
-            logging.info(f"Running agent {agent.name} with tools")
-            # O método arun retorna um coroutine que precisa ser awaited
-            result = await agent.arun(prompt, max_steps=max_steps)
-            logging.info(f"Agent {agent.name} execution complete")
-            return result
         except Exception as e:
-            logging.error(f"Error during agent {agent.name} execution: {str(e)}")
-            return f"Error during {agent.name} execution: {str(e)}"
-    async def generate_answer_async(self, query: str) -> str:
-        """
-        Gera uma resposta baseada na consulta do usuário usando execução assíncrona.
-        Args:
-            query: A consulta do usuário
-        Returns:
-            Uma resposta formatada
-        """
         try:
-            if not query or not query.strip():
-                logging.error("Empty query provided")
-                return "Erro: Por favor, forneça uma consulta não vazia."
-            logging.info(f"Generating answer for query: {query}")
-            # Verificar se os modelos estão disponíveis
-            if not self.translator or not self.researcher or not self.presenter:
-                logging.error("Models not available")
-                return "Desculpe, o serviço está temporariamente indisponível. Por favor, tente novamente mais tarde."
-            # Traduzir a consulta para inglês
-            translation_prompt = self._format_prompt("translation", query)
-            logging.info(f"Translation prompt: {translation_prompt}")
-            try:
-                # O método arun retorna um coroutine que precisa ser awaited
-                result = await self.translator.arun(translation_prompt)
-                logging.info(f"Translation result type: {type(result)}")
-                # Extrair o conteúdo da resposta
-                translation_content = self._extract_content(result)
-                logging.info(f"Translation content: {translation_content}")
-                if not translation_content or not translation_content.strip():
-                    logging.error("Empty translation result")
-                    return "Desculpe, não foi possível processar sua consulta. Por favor, tente novamente com uma pergunta diferente."
-                # Realizar a pesquisa com ferramentas
-                research_prompt = self._format_prompt("research", translation_content)
-                logging.info(f"Research prompt: {research_prompt}")
-                research_result = await self._run_with_tools(self.researcher, research_prompt)
-                logging.info(f"Research result type: {type(research_result)}")
-                # Extrair o conteúdo da pesquisa
-                research_content = self._extract_content(research_result)
-                logging.info(f"Research content: {research_content}")
-                # Verificar se a resposta da pesquisa é muito curta
-                research_length = len(research_content.strip()) if research_content and isinstance(research_content, str) else 0
-                logging.info(f"Research content length: {research_length} characters")
-                if not research_content or not research_content.strip() or research_length < 150:
-                    logging.warning(f"Research result too short ({research_length} chars), trying with a more specific prompt")
-                    # Tentar novamente com um prompt mais específico
-                    enhanced_prompt = f"""Task: Detailed Research
-    Instructions:
-    Provide a comprehensive explanation about '{translation_content}'.
-    Include definition, characteristics, causes, and current understanding.
-    Write at least 4-5 paragraphs with detailed information.
-    Be thorough and informative, covering all important aspects of the topic.
-    Use clear and accessible language suitable for a general audience.
-    Output:"""
-                    logging.info(f"Enhanced research prompt: {enhanced_prompt}")
-                    research_result = await self._run_with_tools(self.researcher, enhanced_prompt)
-                    research_content = self._extract_content(research_result)
-                    research_length = len(research_content.strip()) if research_content and isinstance(research_content, str) else 0
-                    logging.info(f"Enhanced research content: {research_content}")
-                    logging.info(f"Enhanced research content length: {research_length} characters")
-                    if not research_content or not research_content.strip() or research_length < 150:
-                        logging.warning(f"Research result still too short ({research_length} chars), using default response")
-                        # Usar resposta padrão
-                        logging.info("Using default research content")
-                        research_content = self._get_default_research_content(translation_content)
-                # Gerar a apresentação
-                presentation_prompt = self._format_prompt("presentation", research_content)
-                logging.info(f"Presentation prompt: {presentation_prompt}")
-                # O método arun retorna um coroutine que precisa ser awaited
-                result = await self.presenter.arun(presentation_prompt)
-                logging.info(f"Presentation type: {type(result)}")
-                presentation_content = self._extract_content(result)
-                logging.info(f"Presentation content: {presentation_content}")
-                presentation_length = len(presentation_content.strip()) if presentation_content and isinstance(presentation_content, str) else 0
-                logging.info(f"Presentation content length: {presentation_length} characters")
-                if not presentation_content or not presentation_content.strip() or presentation_length < 150:
-                    logging.warning(f"Presentation result too short ({presentation_length} chars), using default presentation")
-                logging.info("Answer generated successfully")
-                return presentation_content
-            except Exception as e:
-                logging.error(f"Error during answer generation: {str(e)}")
-                return f"Desculpe, ocorreu um erro ao processar sua consulta: {str(e)}. Por favor, tente novamente mais tarde."
         except Exception as e:
-            logging.error(f"Unexpected error in generate_answer_async: {str(e)}")
-            return "Desculpe, ocorreu um erro inesperado. Por favor, tente novamente mais tarde."

 import logging
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import streamlit as st
 from agno.agent import Agent
+from agno.models.ollama import Ollama
 from agno.tools.arxiv import ArxivTools
 from agno.tools.pubmed import PubmedTools
+MODEL_PATH = "meta-llama/Llama-3.2-1B"
 class ModelHandler:
     def __init__(self):
+        """Initialize the model handler"""
+        self.model = None
+        self.tokenizer = None
         self.translator = None
         self.researcher = None
+        self.summarizer = None
         self.presenter = None
+        self._initialize_model()
+    def _initialize_model(self):
+        """Initialize model and tokenizer"""
+        self.model, self.tokenizer = self._load_model()
         self.translator = Agent(
             name="Translator",
             role="You will translate the query to English",
+            model=Ollama(id="llama3.2:1b"),
             goal="Translate to English",
             instructions=[
+                "Translate the query to English"
             ]
         )
         self.researcher = Agent(
             name="Researcher",
             role="You are a research scholar who specializes in autism research.",
+            model=Ollama(id="llama3.2:1b"),
+            tools=[ArxivTools(), PubmedTools()],
             instructions=[
+                "You need to understand the context of the question to provide the best answer based on your tools."
+                "Be precise and provide just enough information to be useful",
+                "You must cite the sources used in your answer."
+                "You must create an accessible summary.",
                 "The content must be for people without autism knowledge.",
+                "Focus in the main findings of the paper taking in consideration the question.",
+                "The answer must be brief."
             ],
+            show_tool_calls=True,
+        )
+        self.summarizer = Agent(
+            name="Summarizer",
+            role="You are a specialist in summarizing research papers for people without autism knowledge.",
+            model=Ollama(id="llama3.2:1b"),
+            instructions=[
+                "You must provide just enough information to be useful",
+                "You must cite the sources used in your answer.",
+                "You must be clear and concise.",
+                "You must create an accessible summary.",
+                "The content must be for people without autism knowledge.",
+                "Focus in the main findings of the paper taking in consideration the question.",
+                "The answer must be brief."
+                "Remove everything related to the run itself like: 'Running: transfer_', just use plain text",
+                "You must use the language provided by the user to present the results.",
+                "Add references to the sources used in the answer.",
+                "Add emojis to make the presentation more interactive."
+                "Translaste the answer to Portuguese."
             ],
+            show_tool_calls=True,
+            markdown=True,
+            add_references=True,
         )
         self.presenter = Agent(
             name="Presenter",
             role="You are a professional researcher who presents the results of the research.",
+            model=Ollama(id="llama3.2:1b"),
             instructions=[
                 "You are multilingual",
+                "You must present the results in a clear and concise manner.",
+                "Clenaup the presentation to make it more readable.",
+                "Remove unnecessary information.",
+                "Remove everything related to the run itself like: 'Running: transfer_', just use plain text",
+                "You must use the language provided by the user to present the results.",
+                "Add references to the sources used in the answer.",
+                "Add emojis to make the presentation more interactive."
+                "Translaste the answer to Portuguese."
+            ],
+            add_references=True,
         )
+    @staticmethod
+    @st.cache_resource
+    @st.cache_data
+    def _load_model():
         try:
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
+            model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)
+            return model, tokenizer
         except Exception as e:
+            logging.error(f"Error loading model: {str(e)}")
+            return None, None
+    def generate_answer(self, query: str) -> str:
         try:
+            translator = self.translator.run(query, stream=False)
+            logging.info(f"Translated query")
+            research = self.researcher.run(translator.content, stream=False)
+            logging.info(f"Generated research")
+            summary = self.summarizer.run(research.content, stream=False)
+            logging.info(f"Generated summary")
+            presentation = self.presenter.run(summary.content, stream=False)
+            logging.info(f"Generated presentation")
+            if not presentation.content:
+                return self._get_fallback_response()
+            return presentation.content
         except Exception as e:
+            logging.error(f"Error generating answer: {str(e)}")
+            return self._get_fallback_response()
+    @staticmethod
+    def _get_fallback_response() -> str:
+        """Provide a friendly, helpful fallback response"""
+        return """
+            Peço descula, mas encontrei um erro ao gerar a resposta. Tente novamente ou refaça a sua pergunta.
+        """