File size: 2,906 Bytes
e7cf363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# import json
# from pathlib import Path
#
# from intel_npu_acceleration_library import NPUModelForCausalLM, int4
# from intel_npu_acceleration_library.compiler import CompilerConfig
# from transformers import AutoTokenizer
#
# from repository.repository_abc import Repository, Model
#
#
# class IntelNpuRepository(Repository):
#     def __init__(self, model_info: Model, system_msg: str = None, log_to_file: Path = None):
#         self.model_info: Model = model_info
#         self.message_history: list[dict[str, str]] = []
#         self.set_message_for_role(self.model_info.roles.system_role, system_msg)
#         self.model = None
#         self.tokenizer = None
#         self.terminators = None
#         self.log_to_file = log_to_file
#
#     def get_model_info(self) -> Model:
#         return self.model_info
#
#     def get_message_history(self) -> list[dict[str, str]]:
#         return self.message_history
#
#     def init(self):
#         compiler_conf = CompilerConfig(dtype=int4)
#         self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf,
#                                                           export=True, temperature=0).eval()
#         self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name)
#         self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
#
#     def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
#         pass
#         print("prompt to be sent: " + prompt)
#         user_prompt = {"role": self.model_info.roles.user_role, "content": prompt}
#         if self.log_to_file:
#             with open(self.log_to_file, "a+") as log_file:
#                 log_file.write(json.dumps(user_prompt, indent=2))
#                 log_file.write("\n")
#         self.get_message_history().append(user_prompt)
#         input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True,
#                                                         return_tensors="pt")
#                      .to(self.model.device))
#         outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None)
#         generated_token_array = outputs[0][len(input_ids[0]):]
#         generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
#         answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens}
#         if self.log_to_file:
#             with open(self.log_to_file, "a+") as log_file:
#                 log_file.write(json.dumps(answer, indent=2))
#                 log_file.write("\n")
#         if add_to_history:
#             self.message_history.append(answer)
#         else:
#             self.message_history.pop()
#         return answer