safeguard / aihack /data_generation /generate_data.py
sijju's picture
Upload folder using huggingface_hub
729b0f4 verified
raw
history blame
1.97 kB
import asyncio
import json
import os
from datasets import Dataset, load_dataset
from langchain_openai import ChatOpenAI
from aihack.aihack.data_generation.malicious_instruction_generator import (
JailBreakExample,
MaliciousInstructionGenerator,
)
from aihack.aihack.data_generation.repo import JailBreakExampleRepo
DATA_FILE_NAME = "malicious_data.json"
MAX_CONCURRENT_REQUESTS = 5
MAX_EXAMPLES_TO_GENERATE = 2600
async def main():
examples = []
if os.path.exists(DATA_FILE_NAME):
with open(DATA_FILE_NAME) as f:
examples = [JailBreakExample.from_json(example) for example in json.load(f)]
jailbreak_dataset = load_dataset("jackhhao/jailbreak-classification")
def filter_for_type(data: Dataset, type: str) -> Dataset:
return data.filter(lambda example: example["type"] == type)
jailbreak_dataset_train = filter_for_type(jailbreak_dataset["train"], "jailbreak")
jailbreak_example_repo_train = JailBreakExampleRepo(jailbreak_dataset_train)
model = ChatOpenAI(
model="gpt-3.5-turbo",
temperature=0.9,
)
malicious_data_generator = MaliciousInstructionGenerator(
model, jailbreak_example_repo_train
)
while True:
if len(examples) >= MAX_EXAMPLES_TO_GENERATE:
print(f"Generated {len(examples)} examples. Stopping the generation")
break
print("=" * 50)
print(
f"Generating malicious data iteration. Current examples count: {len(examples)}. Target examples count: {MAX_EXAMPLES_TO_GENERATE}"
)
malicious_data = await malicious_data_generator.generate_malicious_instruction(
max_conccurrent_requests=MAX_CONCURRENT_REQUESTS
)
examples.extend(malicious_data)
MaliciousInstructionGenerator.save_to_file(examples, DATA_FILE_NAME)
print(f"Generated {len(malicious_data)} malicious data examples")
if __name__ == "__main__":
asyncio.run(main())