Spaces:

wakeupmh
/

alem-do-espectro

Sleeping

App Files Files Community

wakeupmh commited on Feb 20

Commit

9b12849

1 Parent(s): dfb96be

refactor: light model

Browse files

Files changed (2) hide show

requirements.txt +4 -2
services/model_handler.py +13 -8

requirements.txt CHANGED Viewed

@@ -1,10 +1,12 @@
 transformers>=4.36.2
 streamlit>=1.29.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 accelerate>=0.26.0
 arxiv>=1.4.7
 python-dotenv>=1.0.0
 agno==1.0.6
-ollama>=0.4.7
 pypdf>=3.11.1
-watchdog>=2.3.1

 transformers>=4.36.2
 streamlit>=1.29.0
 --extra-index-url https://download.pytorch.org/whl/cpu
+torch>=2.1.0
 accelerate>=0.26.0
 arxiv>=1.4.7
 python-dotenv>=1.0.0
 agno==1.0.6
 pypdf>=3.11.1
+watchdog>=2.3.1
+bitsandbytes>=0.41.0
+sentencepiece>=0.1.99

services/model_handler.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import streamlit as st
 from agno.agent import Agent
@@ -6,7 +7,7 @@ from agno.models.ollama import Ollama
 from agno.tools.arxiv import ArxivTools
 from agno.tools.pubmed import PubmedTools
-MODEL_PATH = "meta-llama/Llama-3.2-1B"
 class ModelHandler:
     def __init__(self):
@@ -22,10 +23,14 @@ class ModelHandler:
     def _initialize_model(self):
         """Initialize model and tokenizer"""
         self.model, self.tokenizer = self._load_model()
         self.translator = Agent(
             name="Translator",
             role="You will translate the query to English",
-            model=Ollama(id="llama3.2:1b"),
             goal="Translate to English",
             instructions=[
                 "Translate the query to English"
@@ -35,7 +40,7 @@ class ModelHandler:
         self.researcher = Agent(
             name="Researcher",
             role="You are a research scholar who specializes in autism research.",
-            model=Ollama(id="llama3.2:1b"),
             tools=[ArxivTools(), PubmedTools()],
             instructions=[
                 "You need to understand the context of the question to provide the best answer based on your tools."
@@ -48,10 +53,11 @@ class ModelHandler:
             ],
             show_tool_calls=True,
         )
         self.summarizer = Agent(
             name="Summarizer",
             role="You are a specialist in summarizing research papers for people without autism knowledge.",
-            model=Ollama(id="llama3.2:1b"),
             instructions=[
                 "You must provide just enough information to be useful",
                 "You must cite the sources used in your answer.",
@@ -74,7 +80,7 @@ class ModelHandler:
         self.presenter = Agent(
             name="Presenter",
             role="You are a professional researcher who presents the results of the research.",
-            model=Ollama(id="llama3.2:1b"),
             instructions=[
                 "You are multilingual",
                 "You must present the results in a clear and concise manner.",
@@ -89,16 +95,15 @@ class ModelHandler:
             add_references=True,
         )
     @staticmethod
     @st.cache_resource
-    @st.cache_data
     def _load_model():
         try:
             tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
-            model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)
             return model, tokenizer
         except Exception as e:
             logging.error(f"Error loading model: {str(e)}")
             return None, None

 import logging
+from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import streamlit as st
 from agno.agent import Agent
 from agno.tools.arxiv import ArxivTools
 from agno.tools.pubmed import PubmedTools
+MODEL_PATH = "facebook/opt-125m"  # Modelo muito mais leve
 class ModelHandler:
     def __init__(self):
     def _initialize_model(self):
         """Initialize model and tokenizer"""
         self.model, self.tokenizer = self._load_model()
+        # Usando pipeline para text-generation em vez do Ollama
+        text_generation = pipeline("text-generation", model=MODEL_PATH, device="cpu")
         self.translator = Agent(
             name="Translator",
             role="You will translate the query to English",
+            model=text_generation,
             goal="Translate to English",
             instructions=[
                 "Translate the query to English"
         self.researcher = Agent(
             name="Researcher",
             role="You are a research scholar who specializes in autism research.",
+            model=text_generation,
             tools=[ArxivTools(), PubmedTools()],
             instructions=[
                 "You need to understand the context of the question to provide the best answer based on your tools."
             ],
             show_tool_calls=True,
         )
         self.summarizer = Agent(
             name="Summarizer",
             role="You are a specialist in summarizing research papers for people without autism knowledge.",
+            model=text_generation,
             instructions=[
                 "You must provide just enough information to be useful",
                 "You must cite the sources used in your answer.",
         self.presenter = Agent(
             name="Presenter",
             role="You are a professional researcher who presents the results of the research.",
+            model=text_generation,
             instructions=[
                 "You are multilingual",
                 "You must present the results in a clear and concise manner.",
             add_references=True,
         )
     @staticmethod
     @st.cache_resource
     def _load_model():
         try:
             tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
+            model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto", load_in_8bit=True)
             return model, tokenizer
         except Exception as e:
+            st.error(f"Error loading model: {str(e)}")
             logging.error(f"Error loading model: {str(e)}")
             return None, None