Spaces:
Sleeping
Sleeping
Commit
·
062179e
1
Parent(s):
00b67ca
added support for npu
Browse files- app.py +20 -12
- llm/llm.py +1 -5
- repository/intel_npu.py +47 -0
- repository/ollama.py +11 -4
- repository/repository.py +29 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
| 3 |
from prompts.prompts_manager import PromptsManager
|
|
|
|
| 4 |
from repository.ollama import OllamaRepository
|
| 5 |
-
from llm.llm import
|
|
|
|
| 6 |
from form.form import work_categories, build_form_data_from_answers, write_pdf_form
|
| 7 |
|
| 8 |
|
|
@@ -21,21 +23,27 @@ if __name__ == '__main__':
|
|
| 21 |
user_prompt = input(f"Please describe what you need to do. To get the best results "
|
| 22 |
f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
missing_answers = check_for_missing_answers(answers)
|
| 35 |
while missing_answers:
|
| 36 |
ask_again(missing_answers, prompts_manager.questions, answers)
|
| 37 |
missing_answers = check_for_missing_answers(answers)
|
| 38 |
-
answer =
|
| 39 |
f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
|
| 40 |
categories = []
|
| 41 |
for category in answer["content"].split(";"):
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
| 3 |
from prompts.prompts_manager import PromptsManager
|
| 4 |
+
from repository.intel_npu import IntelNpuRepository
|
| 5 |
from repository.ollama import OllamaRepository
|
| 6 |
+
from llm.llm import Model
|
| 7 |
+
from repository.repository import ModelRoles
|
| 8 |
from form.form import work_categories, build_form_data_from_answers, write_pdf_form
|
| 9 |
|
| 10 |
|
|
|
|
| 23 |
user_prompt = input(f"Please describe what you need to do. To get the best results "
|
| 24 |
f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
|
| 25 |
|
| 26 |
+
# repository = OllamaRepository(Model("llama3.1",
|
| 27 |
+
# ModelRoles("system", "user", "assistant")),
|
| 28 |
+
# prompts_manager.system_prompt,
|
| 29 |
+
# )
|
| 30 |
+
repository = IntelNpuRepository("meta-llama/Meta-Llama-3-8B-Instruct")
|
| 31 |
+
repository.init()
|
| 32 |
+
repository.send_prompt(f"Ingest the following information: {user_prompt}")
|
| 33 |
+
answers = {x:None for x in range(0,11)}
|
| 34 |
+
answer = repository.send_prompt(f"Answer the following questions, if the answer is not present just answer null. Put the answers between curly braces, separate each answer with a comma, keep the answer brief and maintain the order in which the questions are asked. Do not add any preamble: {"\n".join(prompts_manager.verification_prompt)}")
|
| 35 |
+
for idx, a in enumerate(answer['content'].split(",")):
|
| 36 |
+
answers[idx] = None if 'null' in a else a
|
| 37 |
+
|
| 38 |
+
# for idx, q in enumerate(prompts_manager.verification_prompt):
|
| 39 |
+
# answer = repository.send_prompt(
|
| 40 |
+
# f"Answer the following questions, if the answer is not present just answer null. Keep the answer brief and separate each answer with a comma and maintain the order in which the questions are asked: {q}")
|
| 41 |
+
# answers[idx] = None if 'null' in answer["content"].lower() else answer['content']
|
| 42 |
missing_answers = check_for_missing_answers(answers)
|
| 43 |
while missing_answers:
|
| 44 |
ask_again(missing_answers, prompts_manager.questions, answers)
|
| 45 |
missing_answers = check_for_missing_answers(answers)
|
| 46 |
+
answer = repository.send_prompt(
|
| 47 |
f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
|
| 48 |
categories = []
|
| 49 |
for category in answer["content"].split(";"):
|
llm/llm.py
CHANGED
|
@@ -1,8 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
def __init__(self, system_role: str, user_role: str, ai_role: str):
|
| 3 |
-
self.system_role: str = system_role
|
| 4 |
-
self.user_role: str = user_role
|
| 5 |
-
self.ai_role: str = ai_role
|
| 6 |
|
| 7 |
|
| 8 |
class Model:
|
|
|
|
| 1 |
+
from repository.repository import ModelRoles
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
class Model:
|
repository/intel_npu.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from intel_npu_acceleration_library import NPUModelForCausalLM, int4, int8
|
| 2 |
+
from intel_npu_acceleration_library.compiler import CompilerConfig
|
| 3 |
+
from transformers import AutoTokenizer
|
| 4 |
+
|
| 5 |
+
from repository.repository import Repository, ModelRoles
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class IntelNpuRepository(Repository):
|
| 9 |
+
def __init__(self, model_name: str):
|
| 10 |
+
self.model_name = model_name
|
| 11 |
+
self.message_history: list[dict[str, str]] = []
|
| 12 |
+
self.roles = ModelRoles("system", "user", "assistant")
|
| 13 |
+
self.model = None
|
| 14 |
+
self.tokenizer = None
|
| 15 |
+
self.terminators = None
|
| 16 |
+
|
| 17 |
+
def get_model_roles(self) -> ModelRoles:
|
| 18 |
+
return self.roles
|
| 19 |
+
|
| 20 |
+
def get_model_name(self) -> str:
|
| 21 |
+
return self.model_name
|
| 22 |
+
|
| 23 |
+
def get_message_history(self) -> list[dict[str, str]]:
|
| 24 |
+
return self.message_history
|
| 25 |
+
|
| 26 |
+
def set_message_for_role(self, message:str, role: str):
|
| 27 |
+
self.get_message_history().append({"role": role, "content": message})
|
| 28 |
+
|
| 29 |
+
def init(self):
|
| 30 |
+
compiler_conf = CompilerConfig(dtype=int4)
|
| 31 |
+
self.model = NPUModelForCausalLM.from_pretrained(self.get_model_name(), use_cache=True, config=compiler_conf, export=True, temperature=0).eval()
|
| 32 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.get_model_name())
|
| 33 |
+
self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
|
| 34 |
+
|
| 35 |
+
def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
|
| 36 |
+
self.get_message_history().append({"role":self.get_model_roles().user_role, "content":prompt})
|
| 37 |
+
input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True, return_tensors="pt")
|
| 38 |
+
.to(self.model.device))
|
| 39 |
+
outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000)
|
| 40 |
+
generated_token_array = outputs[0][len(input_ids[0]):]
|
| 41 |
+
generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
|
| 42 |
+
answer = {"role": self.get_model_roles().ai_role, "content":generated_tokens}
|
| 43 |
+
if add_to_history:
|
| 44 |
+
self.message_history.append(answer)
|
| 45 |
+
else:
|
| 46 |
+
self.message_history.pop()
|
| 47 |
+
return answer
|
repository/ollama.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
| 1 |
import ollama
|
| 2 |
from ollama import Options
|
| 3 |
|
| 4 |
-
from llm.llm import
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
-
class OllamaRepository:
|
| 8 |
-
def __init__(self, model:Model, system_msg):
|
| 9 |
self.model: Model = model
|
| 10 |
self.system_msg: str = system_msg
|
| 11 |
self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
|
| 12 |
|
| 13 |
-
def send_prompt(self, prompt:str, add_to_history:bool = True) -> dict[str, str]:
|
| 14 |
options: Options = Options(temperature=0)
|
| 15 |
self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
|
| 16 |
response = ollama.chat(self.model.name, self.message_history, options=options)
|
|
@@ -20,3 +21,9 @@ class OllamaRepository:
|
|
| 20 |
else:
|
| 21 |
self.message_history.pop()
|
| 22 |
return answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import ollama
|
| 2 |
from ollama import Options
|
| 3 |
|
| 4 |
+
from llm.llm import Model
|
| 5 |
+
from repository.repository import Repository, ModelRoles
|
| 6 |
|
| 7 |
|
| 8 |
+
class OllamaRepository(Repository):
|
| 9 |
+
def __init__(self, model: Model, system_msg):
|
| 10 |
self.model: Model = model
|
| 11 |
self.system_msg: str = system_msg
|
| 12 |
self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
|
| 13 |
|
| 14 |
+
def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
|
| 15 |
options: Options = Options(temperature=0)
|
| 16 |
self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
|
| 17 |
response = ollama.chat(self.model.name, self.message_history, options=options)
|
|
|
|
| 21 |
else:
|
| 22 |
self.message_history.pop()
|
| 23 |
return answer
|
| 24 |
+
|
| 25 |
+
def get_model_name(self) -> str:
|
| 26 |
+
return self.model.name
|
| 27 |
+
|
| 28 |
+
def get_model_roles(self) -> ModelRoles:
|
| 29 |
+
return self.model.roles
|
repository/repository.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import abc
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class ModelRoles:
|
| 5 |
+
def __init__(self, system_role: str, user_role: str, ai_role: str):
|
| 6 |
+
self.system_role: str = system_role
|
| 7 |
+
self.user_role: str = user_role
|
| 8 |
+
self.ai_role: str = ai_role
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Repository(abc.ABC):
|
| 12 |
+
|
| 13 |
+
def get_model_name(self) -> str:
|
| 14 |
+
pass
|
| 15 |
+
|
| 16 |
+
def get_model_roles(self) -> ModelRoles:
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
def get_message_history(self) -> list[dict[str, str]]:
|
| 20 |
+
pass
|
| 21 |
+
|
| 22 |
+
def send_prompt(self, prompt: str, add_to_history: bool) -> dict[str, str]:
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
def set_message_for_role(self, message:str, role: ModelRoles):
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
def init(self):
|
| 29 |
+
pass
|
requirements.txt
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
PyPDFForm
|
| 2 |
-
ollama
|
|
|
|
|
|
| 1 |
PyPDFForm
|
| 2 |
+
ollama
|
| 3 |
+
intel-npu-acceleration-library
|