Adapters
khulnasoft commited on
Commit
243a6fc
·
verified ·
1 Parent(s): 2060d42

Create generate_paraphrase.py

Browse files
Files changed (1) hide show
  1. generate_paraphrase.py +70 -0
generate_paraphrase.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import hashlib
3
+ from prompt_injection.evaluators.gpt2_perplexity import GPT2PerplexityEvaluator
4
+ from prompt_injection.evaluators.gpt2_sequence_length import GPT2SequenceLengthPromptEvaluator
5
+ from prompt_injection.evaluators.mini_llm_perplexity import MiniLMEmbeddingPromptEvaluator
6
+ from prompt_injection.evaluators.sha256_evaluator import Sha256PromptEvaluator
7
+ from prompt_injection.evaluators.utils import evaluate_all
8
+ from prompt_injection.mutators.llm_mutator import AttackerLLMBasicPromptMutator
9
+ from prompt_injection.mutators.roundtrip_mutator import RoundTripPromptMutator
10
+ from prompt_injection.mutators.typo_mutator import TypoPromptMutator
11
+ from prompt_injection.mutators.utils import mutate_all
12
+
13
+
14
+ import pandas as pd
15
+ def get_sig(sample:str)->str:
16
+ # Encode the text to bytes
17
+ text_bytes = sample.encode('utf-8')
18
+
19
+ # Create a sha256 hash object
20
+ sha256_hash = hashlib.sha256()
21
+
22
+ # Update the hash object with the bytes
23
+ sha256_hash.update(text_bytes)
24
+
25
+ # Get the hexadecimal representation of the hash
26
+ hash_hex = sha256_hash.hexdigest()
27
+
28
+ return hash_hex
29
+
30
+ mutators=[
31
+ TypoPromptMutator(0.05),
32
+ TypoPromptMutator(0.1),
33
+ TypoPromptMutator(0.2),
34
+ AttackerLLMBasicPromptMutator(),
35
+ RoundTripPromptMutator(label="en->ch->en")
36
+ ]
37
+ evaluators=[
38
+ GPT2PerplexityEvaluator(),
39
+ GPT2SequenceLengthPromptEvaluator(),
40
+ MiniLMEmbeddingPromptEvaluator()
41
+ ]
42
+
43
+
44
+ texts=["Hello my friend"]
45
+
46
+ def generate_dataset(texts,file_name):
47
+ data=[]
48
+ original=[]
49
+ idx_list=[]
50
+ sigs=[]
51
+ for _,row in mutate_all(texts,mutators,file_name+"tmp_mutate.pkl").iterrows():
52
+ idx=row['idx']
53
+ original_prompt=row['Prompt']
54
+
55
+ prompts_variations=(row.values[2:])
56
+ for prompt_variation in prompts_variations:
57
+ idx_list.append(idx)
58
+ data.append(prompt_variation)
59
+ original.append(original_prompt)
60
+ sigs.append(get_sig(original_prompt))
61
+
62
+ results=evaluate_all(data,evaluators,file_name+"tmp_evaluate.pkl")
63
+ results['idx']=idx
64
+ results['Original_Prompt']=original
65
+ results['sha256']=sigs
66
+ results.to_csv(file_name+".csv")
67
+ return results
68
+
69
+
70
+ generate_dataset(texts,"example")