Spaces:
Sleeping
Sleeping
import json | |
from pathlib import Path | |
from intel_npu_acceleration_library import NPUModelForCausalLM, float16 | |
from intel_npu_acceleration_library.compiler import CompilerConfig | |
from transformers import AutoTokenizer | |
from repository import Model, Repository | |
class IntelNpuRepository(Repository): | |
def __init__(self, model_info: Model, system_msg: str = None, log_to_file: Path = None): | |
self.model_info: Model = model_info | |
self.message_history: list[dict[str, str]] = [] | |
self.set_message_for_role(self.model_info.roles.system_role, system_msg) | |
self.model = None | |
self.tokenizer = None | |
self.terminators = None | |
self.log_to_file = log_to_file | |
def get_model_info(self) -> Model: | |
return self.model_info | |
def get_message_history(self) -> list[dict[str, str]]: | |
return self.message_history | |
def init(self): | |
self._init_model() | |
self._init_tokenizer() | |
def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]: | |
print("prompt to be sent: " + prompt) | |
user_prompt = {"role": self.model_info.roles.user_role, "content": prompt} | |
if self.log_to_file: | |
with open(self.log_to_file, "a+") as log_file: | |
log_file.write(json.dumps(user_prompt, indent=2)) | |
log_file.write("\n") | |
self.get_message_history().append(user_prompt) | |
if self.model is None: | |
self._init_model() | |
if self.tokenizer is None: | |
self._init_tokenizer() | |
input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True, | |
return_tensors="pt") | |
.to(self.model.device)) | |
outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None) | |
generated_token_array = outputs[0][len(input_ids[0]):] | |
generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True)) | |
answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens} | |
if self.log_to_file: | |
with open(self.log_to_file, "a+") as log_file: | |
log_file.write(json.dumps(answer, indent=2)) | |
log_file.write("\n") | |
if add_to_history: | |
self.message_history.append(answer) | |
else: | |
self.message_history.pop() | |
return answer | |
def _init_tokenizer(self): | |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name) | |
self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")] | |
def _init_model(self): | |
compiler_conf = CompilerConfig(dtype=float16) | |
self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf, | |
export=True, temperature=0.1).eval() |