|
|
|
import hashlib |
|
from prompt_injection.evaluators.gpt2_perplexity import GPT2PerplexityEvaluator |
|
from prompt_injection.evaluators.gpt2_sequence_length import GPT2SequenceLengthPromptEvaluator |
|
from prompt_injection.evaluators.mini_llm_perplexity import MiniLMEmbeddingPromptEvaluator |
|
from prompt_injection.evaluators.sha256_evaluator import Sha256PromptEvaluator |
|
from prompt_injection.evaluators.utils import evaluate_all |
|
from prompt_injection.mutators.llm_mutator import AttackerLLMBasicPromptMutator |
|
from prompt_injection.mutators.roundtrip_mutator import RoundTripPromptMutator |
|
from prompt_injection.mutators.typo_mutator import TypoPromptMutator |
|
from prompt_injection.mutators.utils import mutate_all |
|
|
|
|
|
import pandas as pd |
|
def get_sig(sample:str)->str: |
|
|
|
text_bytes = sample.encode('utf-8') |
|
|
|
|
|
sha256_hash = hashlib.sha256() |
|
|
|
|
|
sha256_hash.update(text_bytes) |
|
|
|
|
|
hash_hex = sha256_hash.hexdigest() |
|
|
|
return hash_hex |
|
|
|
mutators=[ |
|
TypoPromptMutator(0.05), |
|
TypoPromptMutator(0.1), |
|
TypoPromptMutator(0.2), |
|
AttackerLLMBasicPromptMutator(), |
|
RoundTripPromptMutator(label="en->ch->en") |
|
] |
|
evaluators=[ |
|
GPT2PerplexityEvaluator(), |
|
GPT2SequenceLengthPromptEvaluator(), |
|
MiniLMEmbeddingPromptEvaluator() |
|
] |
|
|
|
|
|
texts=["Hello my friend"] |
|
|
|
def generate_dataset(texts,file_name): |
|
data=[] |
|
original=[] |
|
idx_list=[] |
|
sigs=[] |
|
for _,row in mutate_all(texts,mutators,file_name+"tmp_mutate.pkl").iterrows(): |
|
idx=row['idx'] |
|
original_prompt=row['Prompt'] |
|
|
|
prompts_variations=(row.values[2:]) |
|
for prompt_variation in prompts_variations: |
|
idx_list.append(idx) |
|
data.append(prompt_variation) |
|
original.append(original_prompt) |
|
sigs.append(get_sig(original_prompt)) |
|
|
|
results=evaluate_all(data,evaluators,file_name+"tmp_evaluate.pkl") |
|
results['idx']=idx |
|
results['Original_Prompt']=original |
|
results['sha256']=sigs |
|
results.to_csv(file_name+".csv") |
|
return results |
|
|
|
|
|
generate_dataset(texts,"example") |