Spaces:

enricorampazzo
/

DAMHelper

Sleeping

App Files Files Community

enricorampazzo commited on Sep 8, 2024

Commit

062179e

1 Parent(s): 00b67ca

added support for npu

Browse files

Files changed (6) hide show

app.py +20 -12
llm/llm.py +1 -5
repository/intel_npu.py +47 -0
repository/ollama.py +11 -4
repository/repository.py +29 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from pathlib import Path
 from prompts.prompts_manager import PromptsManager
 from repository.ollama import OllamaRepository
-from llm.llm import ModelRoles, Model
 from form.form import work_categories, build_form_data_from_answers, write_pdf_form
@@ -21,21 +23,27 @@ if __name__ == '__main__':
     user_prompt = input(f"Please describe what you need to do. To get the best results "
                         f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
-    ollama_repository = OllamaRepository(Model("llama3.1",
-                                               ModelRoles("system", "user", "assistant")),
-                                         prompts_manager.system_prompt,
-                                         )
-    ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
-    answers = {}
-    for idx, q in enumerate(prompts_manager.verification_prompt):
-        answer = ollama_repository.send_prompt(
-            f"Answer the following question, if the answer is not present just answer null. Keep the answer brief: {q}")
-        answers[idx] = None if 'null' in answer["content"].lower() else answer['content']
     missing_answers = check_for_missing_answers(answers)
     while missing_answers:
         ask_again(missing_answers, prompts_manager.questions, answers)
         missing_answers = check_for_missing_answers(answers)
-    answer = ollama_repository.send_prompt(
         f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
     categories = []
     for category in answer["content"].split(";"):

 from pathlib import Path
 from prompts.prompts_manager import PromptsManager
+from repository.intel_npu import IntelNpuRepository
 from repository.ollama import OllamaRepository
+from llm.llm import Model
+from repository.repository import ModelRoles
 from form.form import work_categories, build_form_data_from_answers, write_pdf_form
     user_prompt = input(f"Please describe what you need to do. To get the best results "
                         f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
+    # repository = OllamaRepository(Model("llama3.1",
+    #                                     ModelRoles("system", "user", "assistant")),
+    #                               prompts_manager.system_prompt,
+    #                               )
+    repository = IntelNpuRepository("meta-llama/Meta-Llama-3-8B-Instruct")
+    repository.init()
+    repository.send_prompt(f"Ingest the following information: {user_prompt}")
+    answers = {x:None for x in range(0,11)}
+    answer = repository.send_prompt(f"Answer the following questions, if the answer is not present just answer null. Put the answers between curly braces, separate each answer with a comma, keep the answer brief  and maintain the order in which the questions are asked. Do not add any preamble: {"\n".join(prompts_manager.verification_prompt)}")
+    for idx, a in enumerate(answer['content'].split(",")):
+        answers[idx] = None if 'null' in a else a
+    # for idx, q in enumerate(prompts_manager.verification_prompt):
+    #     answer = repository.send_prompt(
+    #         f"Answer the following questions, if the answer is not present just answer null. Keep the answer brief and separate each answer with a comma and maintain the order in which the questions are asked: {q}")
+    #     answers[idx] = None if 'null' in answer["content"].lower() else answer['content']
     missing_answers = check_for_missing_answers(answers)
     while missing_answers:
         ask_again(missing_answers, prompts_manager.questions, answers)
         missing_answers = check_for_missing_answers(answers)
+    answer = repository.send_prompt(
         f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
     categories = []
     for category in answer["content"].split(";"):

llm/llm.py CHANGED Viewed

@@ -1,8 +1,4 @@
-class ModelRoles:
-    def __init__(self, system_role: str, user_role: str, ai_role: str):
-        self.system_role: str = system_role
-        self.user_role: str = user_role
-        self.ai_role: str = ai_role
 class Model:


1	+ from repository.repository import ModelRoles




2
3
4	class Model:

repository/intel_npu.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from intel_npu_acceleration_library import NPUModelForCausalLM, int4, int8
+from intel_npu_acceleration_library.compiler import CompilerConfig
+from transformers import AutoTokenizer
+from repository.repository import Repository, ModelRoles
+class IntelNpuRepository(Repository):
+    def __init__(self, model_name: str):
+        self.model_name = model_name
+        self.message_history: list[dict[str, str]] = []
+        self.roles = ModelRoles("system", "user", "assistant")
+        self.model = None
+        self.tokenizer = None
+        self.terminators = None
+    def get_model_roles(self) -> ModelRoles:
+        return self.roles
+    def get_model_name(self) -> str:
+        return self.model_name
+    def get_message_history(self) -> list[dict[str, str]]:
+        return self.message_history
+    def set_message_for_role(self, message:str, role: str):
+        self.get_message_history().append({"role": role, "content": message})
+    def init(self):
+        compiler_conf = CompilerConfig(dtype=int4)
+        self.model = NPUModelForCausalLM.from_pretrained(self.get_model_name(), use_cache=True, config=compiler_conf, export=True, temperature=0).eval()
+        self.tokenizer = AutoTokenizer.from_pretrained(self.get_model_name())
+        self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
+    def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
+        self.get_message_history().append({"role":self.get_model_roles().user_role, "content":prompt})
+        input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True, return_tensors="pt")
+                     .to(self.model.device))
+        outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000)
+        generated_token_array = outputs[0][len(input_ids[0]):]
+        generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
+        answer = {"role": self.get_model_roles().ai_role, "content":generated_tokens}
+        if add_to_history:
+            self.message_history.append(answer)
+        else:
+            self.message_history.pop()
+        return answer

repository/ollama.py CHANGED Viewed

@@ -1,16 +1,17 @@
 import ollama
 from ollama import Options
-from llm.llm import ModelRoles, Model
-class OllamaRepository:
-    def __init__(self, model:Model, system_msg):
         self.model: Model = model
         self.system_msg: str = system_msg
         self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
-    def send_prompt(self, prompt:str, add_to_history:bool = True) -> dict[str, str]:
         options: Options = Options(temperature=0)
         self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
         response = ollama.chat(self.model.name, self.message_history, options=options)
@@ -20,3 +21,9 @@ class OllamaRepository:
         else:
             self.message_history.pop()
         return answer

 import ollama
 from ollama import Options
+from llm.llm import Model
+from repository.repository import Repository, ModelRoles
+class OllamaRepository(Repository):
+    def __init__(self, model: Model, system_msg):
         self.model: Model = model
         self.system_msg: str = system_msg
         self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
+    def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
         options: Options = Options(temperature=0)
         self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
         response = ollama.chat(self.model.name, self.message_history, options=options)
         else:
             self.message_history.pop()
         return answer
+    def get_model_name(self) -> str:
+        return self.model.name
+    def get_model_roles(self) -> ModelRoles:
+        return self.model.roles

repository/repository.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import abc
+class ModelRoles:
+    def __init__(self, system_role: str, user_role: str, ai_role: str):
+        self.system_role: str = system_role
+        self.user_role: str = user_role
+        self.ai_role: str = ai_role
+class Repository(abc.ABC):
+    def get_model_name(self) -> str:
+        pass
+    def get_model_roles(self) -> ModelRoles:
+        pass
+    def get_message_history(self) -> list[dict[str, str]]:
+        pass
+    def send_prompt(self, prompt: str, add_to_history: bool) -> dict[str, str]:
+        pass
+    def set_message_for_role(self, message:str, role: ModelRoles):
+        pass
+    def init(self):
+        pass

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 PyPDFForm
-ollama

 PyPDFForm
+ollama
+intel-npu-acceleration-library