enricorampazzo commited on
Commit
a823c3f
·
1 Parent(s): e31f9c6

removed dependency from intel-npu library otherwise builds in huggingface will fail

Browse files
Files changed (2) hide show
  1. repository/intel_npu.py +32 -30
  2. requirements.txt +1 -1
repository/intel_npu.py CHANGED
@@ -1,8 +1,8 @@
1
  import json
2
  from pathlib import Path
3
 
4
- from intel_npu_acceleration_library import NPUModelForCausalLM, int4
5
- from intel_npu_acceleration_library.compiler import CompilerConfig
6
  from transformers import AutoTokenizer
7
 
8
  from repository.repository_abc import Repository, Model
@@ -25,33 +25,35 @@ class IntelNpuRepository(Repository):
25
  return self.message_history
26
 
27
  def init(self):
28
- compiler_conf = CompilerConfig(dtype=int4)
29
- self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf,
30
- export=True, temperature=0).eval()
31
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name)
32
- self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
 
33
 
34
  def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
35
- print("prompt to be sent: " + prompt)
36
- user_prompt = {"role": self.model_info.roles.user_role, "content": prompt}
37
- if self.log_to_file:
38
- with open(self.log_to_file, "a+") as log_file:
39
- log_file.write(json.dumps(user_prompt, indent=2))
40
- log_file.write("\n")
41
- self.get_message_history().append(user_prompt)
42
- input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True,
43
- return_tensors="pt")
44
- .to(self.model.device))
45
- outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None)
46
- generated_token_array = outputs[0][len(input_ids[0]):]
47
- generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
48
- answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens}
49
- if self.log_to_file:
50
- with open(self.log_to_file, "a+") as log_file:
51
- log_file.write(json.dumps(answer, indent=2))
52
- log_file.write("\n")
53
- if add_to_history:
54
- self.message_history.append(answer)
55
- else:
56
- self.message_history.pop()
57
- return answer
 
 
1
  import json
2
  from pathlib import Path
3
 
4
+ # from intel_npu_acceleration_library import NPUModelForCausalLM, int4
5
+ # from intel_npu_acceleration_library.compiler import CompilerConfig
6
  from transformers import AutoTokenizer
7
 
8
  from repository.repository_abc import Repository, Model
 
25
  return self.message_history
26
 
27
  def init(self):
28
+ pass
29
+ # compiler_conf = CompilerConfig(dtype=int4)
30
+ # self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf,
31
+ # export=True, temperature=0).eval()
32
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name)
33
+ # self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
34
 
35
  def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
36
+ pass
37
+ # print("prompt to be sent: " + prompt)
38
+ # user_prompt = {"role": self.model_info.roles.user_role, "content": prompt}
39
+ # if self.log_to_file:
40
+ # with open(self.log_to_file, "a+") as log_file:
41
+ # log_file.write(json.dumps(user_prompt, indent=2))
42
+ # log_file.write("\n")
43
+ # self.get_message_history().append(user_prompt)
44
+ # input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True,
45
+ # return_tensors="pt")
46
+ # .to(self.model.device))
47
+ # outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None)
48
+ # generated_token_array = outputs[0][len(input_ids[0]):]
49
+ # generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
50
+ # answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens}
51
+ # if self.log_to_file:
52
+ # with open(self.log_to_file, "a+") as log_file:
53
+ # log_file.write(json.dumps(answer, indent=2))
54
+ # log_file.write("\n")
55
+ # if add_to_history:
56
+ # self.message_history.append(answer)
57
+ # else:
58
+ # self.message_history.pop()
59
+ # return answer
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  PyPDFForm
2
  ollama
3
  transformers
4
- intel-npu-acceleration-library
 
1
  PyPDFForm
2
  ollama
3
  transformers
4
+ # intel-npu-acceleration-library