Spaces:
Sleeping
Sleeping
Commit
·
062179e
1
Parent(s):
00b67ca
added support for npu
Browse files- app.py +20 -12
- llm/llm.py +1 -5
- repository/intel_npu.py +47 -0
- repository/ollama.py +11 -4
- repository/repository.py +29 -0
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
from pathlib import Path
|
2 |
|
3 |
from prompts.prompts_manager import PromptsManager
|
|
|
4 |
from repository.ollama import OllamaRepository
|
5 |
-
from llm.llm import
|
|
|
6 |
from form.form import work_categories, build_form_data_from_answers, write_pdf_form
|
7 |
|
8 |
|
@@ -21,21 +23,27 @@ if __name__ == '__main__':
|
|
21 |
user_prompt = input(f"Please describe what you need to do. To get the best results "
|
22 |
f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
missing_answers = check_for_missing_answers(answers)
|
35 |
while missing_answers:
|
36 |
ask_again(missing_answers, prompts_manager.questions, answers)
|
37 |
missing_answers = check_for_missing_answers(answers)
|
38 |
-
answer =
|
39 |
f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
|
40 |
categories = []
|
41 |
for category in answer["content"].split(";"):
|
|
|
1 |
from pathlib import Path
|
2 |
|
3 |
from prompts.prompts_manager import PromptsManager
|
4 |
+
from repository.intel_npu import IntelNpuRepository
|
5 |
from repository.ollama import OllamaRepository
|
6 |
+
from llm.llm import Model
|
7 |
+
from repository.repository import ModelRoles
|
8 |
from form.form import work_categories, build_form_data_from_answers, write_pdf_form
|
9 |
|
10 |
|
|
|
23 |
user_prompt = input(f"Please describe what you need to do. To get the best results "
|
24 |
f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
|
25 |
|
26 |
+
# repository = OllamaRepository(Model("llama3.1",
|
27 |
+
# ModelRoles("system", "user", "assistant")),
|
28 |
+
# prompts_manager.system_prompt,
|
29 |
+
# )
|
30 |
+
repository = IntelNpuRepository("meta-llama/Meta-Llama-3-8B-Instruct")
|
31 |
+
repository.init()
|
32 |
+
repository.send_prompt(f"Ingest the following information: {user_prompt}")
|
33 |
+
answers = {x:None for x in range(0,11)}
|
34 |
+
answer = repository.send_prompt(f"Answer the following questions, if the answer is not present just answer null. Put the answers between curly braces, separate each answer with a comma, keep the answer brief and maintain the order in which the questions are asked. Do not add any preamble: {"\n".join(prompts_manager.verification_prompt)}")
|
35 |
+
for idx, a in enumerate(answer['content'].split(",")):
|
36 |
+
answers[idx] = None if 'null' in a else a
|
37 |
+
|
38 |
+
# for idx, q in enumerate(prompts_manager.verification_prompt):
|
39 |
+
# answer = repository.send_prompt(
|
40 |
+
# f"Answer the following questions, if the answer is not present just answer null. Keep the answer brief and separate each answer with a comma and maintain the order in which the questions are asked: {q}")
|
41 |
+
# answers[idx] = None if 'null' in answer["content"].lower() else answer['content']
|
42 |
missing_answers = check_for_missing_answers(answers)
|
43 |
while missing_answers:
|
44 |
ask_again(missing_answers, prompts_manager.questions, answers)
|
45 |
missing_answers = check_for_missing_answers(answers)
|
46 |
+
answer = repository.send_prompt(
|
47 |
f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
|
48 |
categories = []
|
49 |
for category in answer["content"].split(";"):
|
llm/llm.py
CHANGED
@@ -1,8 +1,4 @@
|
|
1 |
-
|
2 |
-
def __init__(self, system_role: str, user_role: str, ai_role: str):
|
3 |
-
self.system_role: str = system_role
|
4 |
-
self.user_role: str = user_role
|
5 |
-
self.ai_role: str = ai_role
|
6 |
|
7 |
|
8 |
class Model:
|
|
|
1 |
+
from repository.repository import ModelRoles
|
|
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
class Model:
|
repository/intel_npu.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from intel_npu_acceleration_library import NPUModelForCausalLM, int4, int8
|
2 |
+
from intel_npu_acceleration_library.compiler import CompilerConfig
|
3 |
+
from transformers import AutoTokenizer
|
4 |
+
|
5 |
+
from repository.repository import Repository, ModelRoles
|
6 |
+
|
7 |
+
|
8 |
+
class IntelNpuRepository(Repository):
|
9 |
+
def __init__(self, model_name: str):
|
10 |
+
self.model_name = model_name
|
11 |
+
self.message_history: list[dict[str, str]] = []
|
12 |
+
self.roles = ModelRoles("system", "user", "assistant")
|
13 |
+
self.model = None
|
14 |
+
self.tokenizer = None
|
15 |
+
self.terminators = None
|
16 |
+
|
17 |
+
def get_model_roles(self) -> ModelRoles:
|
18 |
+
return self.roles
|
19 |
+
|
20 |
+
def get_model_name(self) -> str:
|
21 |
+
return self.model_name
|
22 |
+
|
23 |
+
def get_message_history(self) -> list[dict[str, str]]:
|
24 |
+
return self.message_history
|
25 |
+
|
26 |
+
def set_message_for_role(self, message:str, role: str):
|
27 |
+
self.get_message_history().append({"role": role, "content": message})
|
28 |
+
|
29 |
+
def init(self):
|
30 |
+
compiler_conf = CompilerConfig(dtype=int4)
|
31 |
+
self.model = NPUModelForCausalLM.from_pretrained(self.get_model_name(), use_cache=True, config=compiler_conf, export=True, temperature=0).eval()
|
32 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.get_model_name())
|
33 |
+
self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
|
34 |
+
|
35 |
+
def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
|
36 |
+
self.get_message_history().append({"role":self.get_model_roles().user_role, "content":prompt})
|
37 |
+
input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True, return_tensors="pt")
|
38 |
+
.to(self.model.device))
|
39 |
+
outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000)
|
40 |
+
generated_token_array = outputs[0][len(input_ids[0]):]
|
41 |
+
generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
|
42 |
+
answer = {"role": self.get_model_roles().ai_role, "content":generated_tokens}
|
43 |
+
if add_to_history:
|
44 |
+
self.message_history.append(answer)
|
45 |
+
else:
|
46 |
+
self.message_history.pop()
|
47 |
+
return answer
|
repository/ollama.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
import ollama
|
2 |
from ollama import Options
|
3 |
|
4 |
-
from llm.llm import
|
|
|
5 |
|
6 |
|
7 |
-
class OllamaRepository:
|
8 |
-
def __init__(self, model:Model, system_msg):
|
9 |
self.model: Model = model
|
10 |
self.system_msg: str = system_msg
|
11 |
self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
|
12 |
|
13 |
-
def send_prompt(self, prompt:str, add_to_history:bool = True) -> dict[str, str]:
|
14 |
options: Options = Options(temperature=0)
|
15 |
self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
|
16 |
response = ollama.chat(self.model.name, self.message_history, options=options)
|
@@ -20,3 +21,9 @@ class OllamaRepository:
|
|
20 |
else:
|
21 |
self.message_history.pop()
|
22 |
return answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import ollama
|
2 |
from ollama import Options
|
3 |
|
4 |
+
from llm.llm import Model
|
5 |
+
from repository.repository import Repository, ModelRoles
|
6 |
|
7 |
|
8 |
+
class OllamaRepository(Repository):
|
9 |
+
def __init__(self, model: Model, system_msg):
|
10 |
self.model: Model = model
|
11 |
self.system_msg: str = system_msg
|
12 |
self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
|
13 |
|
14 |
+
def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
|
15 |
options: Options = Options(temperature=0)
|
16 |
self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
|
17 |
response = ollama.chat(self.model.name, self.message_history, options=options)
|
|
|
21 |
else:
|
22 |
self.message_history.pop()
|
23 |
return answer
|
24 |
+
|
25 |
+
def get_model_name(self) -> str:
|
26 |
+
return self.model.name
|
27 |
+
|
28 |
+
def get_model_roles(self) -> ModelRoles:
|
29 |
+
return self.model.roles
|
repository/repository.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import abc
|
2 |
+
|
3 |
+
|
4 |
+
class ModelRoles:
|
5 |
+
def __init__(self, system_role: str, user_role: str, ai_role: str):
|
6 |
+
self.system_role: str = system_role
|
7 |
+
self.user_role: str = user_role
|
8 |
+
self.ai_role: str = ai_role
|
9 |
+
|
10 |
+
|
11 |
+
class Repository(abc.ABC):
|
12 |
+
|
13 |
+
def get_model_name(self) -> str:
|
14 |
+
pass
|
15 |
+
|
16 |
+
def get_model_roles(self) -> ModelRoles:
|
17 |
+
pass
|
18 |
+
|
19 |
+
def get_message_history(self) -> list[dict[str, str]]:
|
20 |
+
pass
|
21 |
+
|
22 |
+
def send_prompt(self, prompt: str, add_to_history: bool) -> dict[str, str]:
|
23 |
+
pass
|
24 |
+
|
25 |
+
def set_message_for_role(self, message:str, role: ModelRoles):
|
26 |
+
pass
|
27 |
+
|
28 |
+
def init(self):
|
29 |
+
pass
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
PyPDFForm
|
2 |
-
ollama
|
|
|
|
1 |
PyPDFForm
|
2 |
+
ollama
|
3 |
+
intel-npu-acceleration-library
|