from gettext import npgettext | |
from prompt_injection.evaluators.base import PromptEvaluator | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel,GPT2Model | |
class GPT2SequenceLengthPromptEvaluator(PromptEvaluator): | |
def __init__(self) -> None: | |
super().__init__() | |
def __calculate_sequence_length(self,sentence, model_name='gpt2'): | |
# Load pre-trained model and tokenizer | |
tokenizer = GPT2Tokenizer.from_pretrained(model_name) | |
inputs = tokenizer(sentence, return_tensors='pt') | |
return inputs['input_ids'].shape[1] | |
def eval_sample(self,sample): | |
try: | |
return self.__calculate_sequence_length(sample) | |
except Exception as err: | |
print(err) | |
return npgettext.nan | |
def get_name(self): | |
return 'Length' |