enricorampazzo commited on
Commit
0298edc
·
1 Parent(s): 7dfaba7

now referencing the intel-npu library. Let's see if HF still complains

Browse files
Files changed (2) hide show
  1. repository/intel_npu.py +58 -58
  2. repository/repository.py +3 -2
repository/intel_npu.py CHANGED
@@ -1,58 +1,58 @@
1
- # import json
2
- # from pathlib import Path
3
- #
4
- # from intel_npu_acceleration_library import NPUModelForCausalLM, int4
5
- # from intel_npu_acceleration_library.compiler import CompilerConfig
6
- # from transformers import AutoTokenizer
7
- #
8
- # from repository.repository_abc import Repository, Model
9
- #
10
- #
11
- # class IntelNpuRepository(Repository):
12
- # def __init__(self, model_info: Model, system_msg: str = None, log_to_file: Path = None):
13
- # self.model_info: Model = model_info
14
- # self.message_history: list[dict[str, str]] = []
15
- # self.set_message_for_role(self.model_info.roles.system_role, system_msg)
16
- # self.model = None
17
- # self.tokenizer = None
18
- # self.terminators = None
19
- # self.log_to_file = log_to_file
20
- #
21
- # def get_model_info(self) -> Model:
22
- # return self.model_info
23
- #
24
- # def get_message_history(self) -> list[dict[str, str]]:
25
- # return self.message_history
26
- #
27
- # def init(self):
28
- # compiler_conf = CompilerConfig(dtype=int4)
29
- # self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf,
30
- # export=True, temperature=0).eval()
31
- # self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name)
32
- # self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
33
- #
34
- # def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
35
- # pass
36
- # print("prompt to be sent: " + prompt)
37
- # user_prompt = {"role": self.model_info.roles.user_role, "content": prompt}
38
- # if self.log_to_file:
39
- # with open(self.log_to_file, "a+") as log_file:
40
- # log_file.write(json.dumps(user_prompt, indent=2))
41
- # log_file.write("\n")
42
- # self.get_message_history().append(user_prompt)
43
- # input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True,
44
- # return_tensors="pt")
45
- # .to(self.model.device))
46
- # outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None)
47
- # generated_token_array = outputs[0][len(input_ids[0]):]
48
- # generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
49
- # answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens}
50
- # if self.log_to_file:
51
- # with open(self.log_to_file, "a+") as log_file:
52
- # log_file.write(json.dumps(answer, indent=2))
53
- # log_file.write("\n")
54
- # if add_to_history:
55
- # self.message_history.append(answer)
56
- # else:
57
- # self.message_history.pop()
58
- # return answer
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from intel_npu_acceleration_library import NPUModelForCausalLM, int4
5
+ from intel_npu_acceleration_library.compiler import CompilerConfig
6
+ from transformers import AutoTokenizer
7
+
8
+ from repository.repository_abc import Repository, Model
9
+
10
+
11
+ class IntelNpuRepository(Repository):
12
+ def __init__(self, model_info: Model, system_msg: str = None, log_to_file: Path = None):
13
+ self.model_info: Model = model_info
14
+ self.message_history: list[dict[str, str]] = []
15
+ self.set_message_for_role(self.model_info.roles.system_role, system_msg)
16
+ self.model = None
17
+ self.tokenizer = None
18
+ self.terminators = None
19
+ self.log_to_file = log_to_file
20
+
21
+ def get_model_info(self) -> Model:
22
+ return self.model_info
23
+
24
+ def get_message_history(self) -> list[dict[str, str]]:
25
+ return self.message_history
26
+
27
+ def init(self):
28
+ compiler_conf = CompilerConfig(dtype=int4)
29
+ self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf,
30
+ export=True, temperature=0).eval()
31
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name)
32
+ self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
33
+
34
+ def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
35
+ pass
36
+ print("prompt to be sent: " + prompt)
37
+ user_prompt = {"role": self.model_info.roles.user_role, "content": prompt}
38
+ if self.log_to_file:
39
+ with open(self.log_to_file, "a+") as log_file:
40
+ log_file.write(json.dumps(user_prompt, indent=2))
41
+ log_file.write("\n")
42
+ self.get_message_history().append(user_prompt)
43
+ input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True,
44
+ return_tensors="pt")
45
+ .to(self.model.device))
46
+ outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None)
47
+ generated_token_array = outputs[0][len(input_ids[0]):]
48
+ generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
49
+ answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens}
50
+ if self.log_to_file:
51
+ with open(self.log_to_file, "a+") as log_file:
52
+ log_file.write(json.dumps(answer, indent=2))
53
+ log_file.write("\n")
54
+ if add_to_history:
55
+ self.message_history.append(answer)
56
+ else:
57
+ self.message_history.pop()
58
+ return answer
repository/repository.py CHANGED
@@ -1,5 +1,6 @@
1
  from pathlib import Path
2
 
 
3
  # from repository.intel_npu import IntelNpuRepository
4
  from repository.ollama import OllamaRepository
5
  from repository.ondemand import OndemandRepository
@@ -13,8 +14,8 @@ def get_repository(implementation: str, model: Model, system_msg: str = None, lo
13
  raise ValueError(f"Unknown implementation {implementation}. Known implementations: {known_implementations}")
14
  if "ollama" == implementation:
15
  return OllamaRepository(model, system_msg)
16
- # if "intel_npu" == implementation:
17
- # return IntelNpuRepository(model, system_msg, log_to_file)
18
  if "ondemand" == implementation:
19
  return OndemandRepository(model, system_msg, log_to_file)
20
  if "testing" == implementation:
 
1
  from pathlib import Path
2
 
3
+ from repository.intel_npu import IntelNpuRepository
4
  # from repository.intel_npu import IntelNpuRepository
5
  from repository.ollama import OllamaRepository
6
  from repository.ondemand import OndemandRepository
 
14
  raise ValueError(f"Unknown implementation {implementation}. Known implementations: {known_implementations}")
15
  if "ollama" == implementation:
16
  return OllamaRepository(model, system_msg)
17
+ if "intel_npu" == implementation:
18
+ return IntelNpuRepository(model, system_msg, log_to_file)
19
  if "ondemand" == implementation:
20
  return OndemandRepository(model, system_msg, log_to_file)
21
  if "testing" == implementation: