File size: 1,973 Bytes
729b0f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import asyncio
import json
import os
from datasets import Dataset, load_dataset
from langchain_openai import ChatOpenAI
from aihack.aihack.data_generation.malicious_instruction_generator import (
JailBreakExample,
MaliciousInstructionGenerator,
)
from aihack.aihack.data_generation.repo import JailBreakExampleRepo
DATA_FILE_NAME = "malicious_data.json"
MAX_CONCURRENT_REQUESTS = 5
MAX_EXAMPLES_TO_GENERATE = 2600
async def main():
examples = []
if os.path.exists(DATA_FILE_NAME):
with open(DATA_FILE_NAME) as f:
examples = [JailBreakExample.from_json(example) for example in json.load(f)]
jailbreak_dataset = load_dataset("jackhhao/jailbreak-classification")
def filter_for_type(data: Dataset, type: str) -> Dataset:
return data.filter(lambda example: example["type"] == type)
jailbreak_dataset_train = filter_for_type(jailbreak_dataset["train"], "jailbreak")
jailbreak_example_repo_train = JailBreakExampleRepo(jailbreak_dataset_train)
model = ChatOpenAI(
model="gpt-3.5-turbo",
temperature=0.9,
)
malicious_data_generator = MaliciousInstructionGenerator(
model, jailbreak_example_repo_train
)
while True:
if len(examples) >= MAX_EXAMPLES_TO_GENERATE:
print(f"Generated {len(examples)} examples. Stopping the generation")
break
print("=" * 50)
print(
f"Generating malicious data iteration. Current examples count: {len(examples)}. Target examples count: {MAX_EXAMPLES_TO_GENERATE}"
)
malicious_data = await malicious_data_generator.generate_malicious_instruction(
max_conccurrent_requests=MAX_CONCURRENT_REQUESTS
)
examples.extend(malicious_data)
MaliciousInstructionGenerator.save_to_file(examples, DATA_FILE_NAME)
print(f"Generated {len(malicious_data)} malicious data examples")
if __name__ == "__main__":
asyncio.run(main())
|