File size: 2,193 Bytes
243a6fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import hashlib
from prompt_injection.evaluators.gpt2_perplexity import GPT2PerplexityEvaluator
from prompt_injection.evaluators.gpt2_sequence_length import GPT2SequenceLengthPromptEvaluator
from prompt_injection.evaluators.mini_llm_perplexity import MiniLMEmbeddingPromptEvaluator
from prompt_injection.evaluators.sha256_evaluator import Sha256PromptEvaluator
from prompt_injection.evaluators.utils import evaluate_all
from prompt_injection.mutators.llm_mutator import AttackerLLMBasicPromptMutator
from prompt_injection.mutators.roundtrip_mutator import RoundTripPromptMutator
from prompt_injection.mutators.typo_mutator import TypoPromptMutator
from prompt_injection.mutators.utils import mutate_all
import pandas as pd
def get_sig(sample:str)->str:
# Encode the text to bytes
text_bytes = sample.encode('utf-8')
# Create a sha256 hash object
sha256_hash = hashlib.sha256()
# Update the hash object with the bytes
sha256_hash.update(text_bytes)
# Get the hexadecimal representation of the hash
hash_hex = sha256_hash.hexdigest()
return hash_hex
mutators=[
TypoPromptMutator(0.05),
TypoPromptMutator(0.1),
TypoPromptMutator(0.2),
AttackerLLMBasicPromptMutator(),
RoundTripPromptMutator(label="en->ch->en")
]
evaluators=[
GPT2PerplexityEvaluator(),
GPT2SequenceLengthPromptEvaluator(),
MiniLMEmbeddingPromptEvaluator()
]
texts=["Hello my friend"]
def generate_dataset(texts,file_name):
data=[]
original=[]
idx_list=[]
sigs=[]
for _,row in mutate_all(texts,mutators,file_name+"tmp_mutate.pkl").iterrows():
idx=row['idx']
original_prompt=row['Prompt']
prompts_variations=(row.values[2:])
for prompt_variation in prompts_variations:
idx_list.append(idx)
data.append(prompt_variation)
original.append(original_prompt)
sigs.append(get_sig(original_prompt))
results=evaluate_all(data,evaluators,file_name+"tmp_evaluate.pkl")
results['idx']=idx
results['Original_Prompt']=original
results['sha256']=sigs
results.to_csv(file_name+".csv")
return results
generate_dataset(texts,"example") |