wakeupmh commited on
Commit
9b12849
·
1 Parent(s): dfb96be

refactor: light model

Browse files
Files changed (2) hide show
  1. requirements.txt +4 -2
  2. services/model_handler.py +13 -8
requirements.txt CHANGED
@@ -1,10 +1,12 @@
1
  transformers>=4.36.2
2
  streamlit>=1.29.0
3
  --extra-index-url https://download.pytorch.org/whl/cpu
 
4
  accelerate>=0.26.0
5
  arxiv>=1.4.7
6
  python-dotenv>=1.0.0
7
  agno==1.0.6
8
- ollama>=0.4.7
9
  pypdf>=3.11.1
10
- watchdog>=2.3.1
 
 
 
1
  transformers>=4.36.2
2
  streamlit>=1.29.0
3
  --extra-index-url https://download.pytorch.org/whl/cpu
4
+ torch>=2.1.0
5
  accelerate>=0.26.0
6
  arxiv>=1.4.7
7
  python-dotenv>=1.0.0
8
  agno==1.0.6
 
9
  pypdf>=3.11.1
10
+ watchdog>=2.3.1
11
+ bitsandbytes>=0.41.0
12
+ sentencepiece>=0.1.99
services/model_handler.py CHANGED
@@ -1,4 +1,5 @@
1
  import logging
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import streamlit as st
4
  from agno.agent import Agent
@@ -6,7 +7,7 @@ from agno.models.ollama import Ollama
6
  from agno.tools.arxiv import ArxivTools
7
  from agno.tools.pubmed import PubmedTools
8
 
9
- MODEL_PATH = "meta-llama/Llama-3.2-1B"
10
 
11
  class ModelHandler:
12
  def __init__(self):
@@ -22,10 +23,14 @@ class ModelHandler:
22
  def _initialize_model(self):
23
  """Initialize model and tokenizer"""
24
  self.model, self.tokenizer = self._load_model()
 
 
 
 
25
  self.translator = Agent(
26
  name="Translator",
27
  role="You will translate the query to English",
28
- model=Ollama(id="llama3.2:1b"),
29
  goal="Translate to English",
30
  instructions=[
31
  "Translate the query to English"
@@ -35,7 +40,7 @@ class ModelHandler:
35
  self.researcher = Agent(
36
  name="Researcher",
37
  role="You are a research scholar who specializes in autism research.",
38
- model=Ollama(id="llama3.2:1b"),
39
  tools=[ArxivTools(), PubmedTools()],
40
  instructions=[
41
  "You need to understand the context of the question to provide the best answer based on your tools."
@@ -48,10 +53,11 @@ class ModelHandler:
48
  ],
49
  show_tool_calls=True,
50
  )
 
51
  self.summarizer = Agent(
52
  name="Summarizer",
53
  role="You are a specialist in summarizing research papers for people without autism knowledge.",
54
- model=Ollama(id="llama3.2:1b"),
55
  instructions=[
56
  "You must provide just enough information to be useful",
57
  "You must cite the sources used in your answer.",
@@ -74,7 +80,7 @@ class ModelHandler:
74
  self.presenter = Agent(
75
  name="Presenter",
76
  role="You are a professional researcher who presents the results of the research.",
77
- model=Ollama(id="llama3.2:1b"),
78
  instructions=[
79
  "You are multilingual",
80
  "You must present the results in a clear and concise manner.",
@@ -89,16 +95,15 @@ class ModelHandler:
89
  add_references=True,
90
  )
91
 
92
-
93
  @staticmethod
94
  @st.cache_resource
95
- @st.cache_data
96
  def _load_model():
97
  try:
98
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
99
- model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)
100
  return model, tokenizer
101
  except Exception as e:
 
102
  logging.error(f"Error loading model: {str(e)}")
103
  return None, None
104
 
 
1
  import logging
2
+ from transformers import pipeline
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import streamlit as st
5
  from agno.agent import Agent
 
7
  from agno.tools.arxiv import ArxivTools
8
  from agno.tools.pubmed import PubmedTools
9
 
10
+ MODEL_PATH = "facebook/opt-125m" # Modelo muito mais leve
11
 
12
  class ModelHandler:
13
  def __init__(self):
 
23
  def _initialize_model(self):
24
  """Initialize model and tokenizer"""
25
  self.model, self.tokenizer = self._load_model()
26
+
27
+ # Usando pipeline para text-generation em vez do Ollama
28
+ text_generation = pipeline("text-generation", model=MODEL_PATH, device="cpu")
29
+
30
  self.translator = Agent(
31
  name="Translator",
32
  role="You will translate the query to English",
33
+ model=text_generation,
34
  goal="Translate to English",
35
  instructions=[
36
  "Translate the query to English"
 
40
  self.researcher = Agent(
41
  name="Researcher",
42
  role="You are a research scholar who specializes in autism research.",
43
+ model=text_generation,
44
  tools=[ArxivTools(), PubmedTools()],
45
  instructions=[
46
  "You need to understand the context of the question to provide the best answer based on your tools."
 
53
  ],
54
  show_tool_calls=True,
55
  )
56
+
57
  self.summarizer = Agent(
58
  name="Summarizer",
59
  role="You are a specialist in summarizing research papers for people without autism knowledge.",
60
+ model=text_generation,
61
  instructions=[
62
  "You must provide just enough information to be useful",
63
  "You must cite the sources used in your answer.",
 
80
  self.presenter = Agent(
81
  name="Presenter",
82
  role="You are a professional researcher who presents the results of the research.",
83
+ model=text_generation,
84
  instructions=[
85
  "You are multilingual",
86
  "You must present the results in a clear and concise manner.",
 
95
  add_references=True,
96
  )
97
 
 
98
  @staticmethod
99
  @st.cache_resource
 
100
  def _load_model():
101
  try:
102
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
103
+ model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto", load_in_8bit=True)
104
  return model, tokenizer
105
  except Exception as e:
106
+ st.error(f"Error loading model: {str(e)}")
107
  logging.error(f"Error loading model: {str(e)}")
108
  return None, None
109