Spaces:
Runtime error
Runtime error
Upload 16 files
Browse files- detect-pretrain-code-contamination +0 -1
- detect-pretrain-code-contamination/README.md +17 -0
- detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/eval.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/eval.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/options.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/options.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/run.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/utils.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/utils.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/analyze.py +47 -0
- detect-pretrain-code-contamination/src/eval.py +178 -0
- detect-pretrain-code-contamination/src/options.py +23 -0
- detect-pretrain-code-contamination/src/run.py +230 -0
- detect-pretrain-code-contamination/src/scripts/run.sh +8 -0
- detect-pretrain-code-contamination/src/utils.py +28 -0
detect-pretrain-code-contamination
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 616114e2334dc8dc8b7b538f6dbcc639cc42cb2c
|
|
|
|
|
|
detect-pretrain-code-contamination/README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Detect-Pretrain-Code-Contamination
|
| 2 |
+
|
| 3 |
+
This repository contains scripts for detecting pretraining code contamination in datasets.
|
| 4 |
+
|
| 5 |
+
## Datasets
|
| 6 |
+
You can specify the dataset for analysis. Example datasets include `truthful_qa` and `cais/mmlu`.
|
| 7 |
+
|
| 8 |
+
## Usage
|
| 9 |
+
Run the script with the desired models and dataset. Below are two examples of how to use the script with different models and the `truthful_qa` dataset.
|
| 10 |
+
|
| 11 |
+
### Example 1:
|
| 12 |
+
```bash
|
| 13 |
+
DATASET=truthful_qa
|
| 14 |
+
python src/run.py --target_model Fredithefish/ReasonixPajama-3B-HF --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
The output of the script provides a metric for dataset contamination. If #the result < 0.1# with a percentage greater than 0.85, it is highly likely that the dataset has been trained.
|
detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-311.pyc
ADDED
|
Binary file (2.16 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-39.pyc
ADDED
|
Binary file (1.27 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/eval.cpython-311.pyc
ADDED
|
Binary file (9.99 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/eval.cpython-39.pyc
ADDED
|
Binary file (4.68 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/options.cpython-311.pyc
ADDED
|
Binary file (2.46 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/options.cpython-39.pyc
ADDED
|
Binary file (1.45 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/run.cpython-311.pyc
ADDED
|
Binary file (13.5 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (3.49 kB). View file
|
|
|
detect-pretrain-code-contamination/src/__pycache__/utils.cpython-39.pyc
ADDED
|
Binary file (1.53 kB). View file
|
|
|
detect-pretrain-code-contamination/src/analyze.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import statistics
|
| 3 |
+
|
| 4 |
+
def load_jsonl(path):
|
| 5 |
+
with open(path) as f:
|
| 6 |
+
data = [json.loads(line) for line in f]
|
| 7 |
+
return data
|
| 8 |
+
|
| 9 |
+
def analyze_data(data):
|
| 10 |
+
all_rmia = []
|
| 11 |
+
all_large_1 = []
|
| 12 |
+
for ex in data:
|
| 13 |
+
# Min_20.0% Prob
|
| 14 |
+
score = ex["pred"]["minkprob_w/_ref"] # minkprob_w/_ref
|
| 15 |
+
all_rmia.append(score)
|
| 16 |
+
if score < 0.1:
|
| 17 |
+
all_large_1.append(score)
|
| 18 |
+
result = "result < 0.1, %: ", len(all_large_1)/len(all_rmia)
|
| 19 |
+
print(result)
|
| 20 |
+
return result
|
| 21 |
+
# print(f"RMIA mean: {statistics.mean(all_rmia)}")
|
| 22 |
+
# print(f"RMIA std: {statistics.stdev(all_rmia)}")
|
| 23 |
+
# print(f"RMIA min: {min(all_rmia)}")
|
| 24 |
+
# print(f"RMIA max: {max(all_rmia)}")
|
| 25 |
+
# # 25% percentile
|
| 26 |
+
# print(f"RMIA 25%: {statistics.quantiles(all_rmia)[0]}")
|
| 27 |
+
# # 50% percentile
|
| 28 |
+
# print(f"RMIA 50%: {statistics.quantiles(all_rmia)[1]}")
|
| 29 |
+
# # 75% percentile
|
| 30 |
+
# print(f"RMIA 75%: {statistics.quantiles(all_rmia)[2]}")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
if __name__ == "__main__":
|
| 36 |
+
print("contaminated model")
|
| 37 |
+
task = "ai2_arc" # ai2_arc cais/mmlu truthful_qa
|
| 38 |
+
# /fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/ai2_arc/Fredithefish/ReasonixPajama-3B-HF_togethercomputer/RedPajama-INCITE-Chat-3B-v1/input/all_output.jsonl
|
| 39 |
+
path = f"/fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/{task}/Fredithefish/ReasonixPajama-3B-HF_huggyllama/llama-7b/input/all_output.jsonl"
|
| 40 |
+
data = load_jsonl(path)
|
| 41 |
+
analyze_data(data)
|
| 42 |
+
|
| 43 |
+
print("raw model")
|
| 44 |
+
path = f"/fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/{task}/togethercomputer/RedPajama-INCITE-Chat-3B-v1_huggyllama/llama-7b/input/all_output.jsonl"
|
| 45 |
+
data = load_jsonl(path)
|
| 46 |
+
analyze_data(data)
|
| 47 |
+
|
detect-pretrain-code-contamination/src/eval.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
logging.basicConfig(level='ERROR')
|
| 3 |
+
import numpy as np
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import json
|
| 6 |
+
from collections import defaultdict
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
from sklearn.metrics import auc, roc_curve
|
| 9 |
+
import matplotlib
|
| 10 |
+
import random
|
| 11 |
+
from ipdb import set_trace as bp
|
| 12 |
+
import time
|
| 13 |
+
|
| 14 |
+
matplotlib.rcParams['pdf.fonttype'] = 42
|
| 15 |
+
matplotlib.rcParams['ps.fonttype'] = 42
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
matplotlib.rcParams['pdf.fonttype'] = 42
|
| 19 |
+
matplotlib.rcParams['ps.fonttype'] = 42
|
| 20 |
+
|
| 21 |
+
# plot data
|
| 22 |
+
def sweep(score, x):
|
| 23 |
+
"""
|
| 24 |
+
Compute a ROC curve and then return the FPR, TPR, AUC, and ACC.
|
| 25 |
+
"""
|
| 26 |
+
fpr, tpr, _ = roc_curve(x, -score)
|
| 27 |
+
acc = np.max(1-(fpr+(1-tpr))/2)
|
| 28 |
+
return fpr, tpr, auc(fpr, tpr), acc
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def do_plot(prediction, answers, sweep_fn=sweep, metric='auc', legend="", output_dir=None):
|
| 32 |
+
"""
|
| 33 |
+
Generate the ROC curves by using ntest models as test models and the rest to train.
|
| 34 |
+
"""
|
| 35 |
+
fpr, tpr, auc, acc = sweep_fn(np.array(prediction), np.array(answers, dtype=bool))
|
| 36 |
+
|
| 37 |
+
low = tpr[np.where(fpr<.05)[0][-1]]
|
| 38 |
+
# bp()
|
| 39 |
+
print('Attack %s AUC %.4f, Accuracy %.4f, TPR@5%%FPR of %.4f\n'%(legend, auc,acc, low))
|
| 40 |
+
|
| 41 |
+
metric_text = ''
|
| 42 |
+
if metric == 'auc':
|
| 43 |
+
metric_text = 'auc=%.3f'%auc
|
| 44 |
+
elif metric == 'acc':
|
| 45 |
+
metric_text = 'acc=%.3f'%acc
|
| 46 |
+
|
| 47 |
+
plt.plot(fpr, tpr, label=legend+metric_text)
|
| 48 |
+
return legend, auc,acc, low
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def fig_fpr_tpr(all_output, output_dir):
|
| 52 |
+
print("output_dir", output_dir)
|
| 53 |
+
answers = []
|
| 54 |
+
metric2predictions = defaultdict(list)
|
| 55 |
+
for ex in all_output:
|
| 56 |
+
answers.append(ex["label"])
|
| 57 |
+
for metric in ex["pred"].keys():
|
| 58 |
+
if ("raw" in metric) and ("clf" not in metric):
|
| 59 |
+
continue
|
| 60 |
+
metric2predictions[metric].append(ex["pred"][metric])
|
| 61 |
+
|
| 62 |
+
plt.figure(figsize=(4,3))
|
| 63 |
+
with open(f"{output_dir}/auc.txt", "w") as f:
|
| 64 |
+
for metric, predictions in metric2predictions.items():
|
| 65 |
+
legend, auc, acc, low = do_plot(predictions, answers, legend=metric, metric='auc', output_dir=output_dir)
|
| 66 |
+
f.write('%s AUC %.4f, Accuracy %.4f, [email protected]%%FPR of %.4f\n'%(legend, auc, acc, low))
|
| 67 |
+
|
| 68 |
+
plt.semilogx()
|
| 69 |
+
plt.semilogy()
|
| 70 |
+
plt.xlim(1e-5,1)
|
| 71 |
+
plt.ylim(1e-5,1)
|
| 72 |
+
plt.xlabel("False Positive Rate")
|
| 73 |
+
plt.ylabel("True Positive Rate")
|
| 74 |
+
plt.plot([0, 1], [0, 1], ls='--', color='gray')
|
| 75 |
+
plt.subplots_adjust(bottom=.18, left=.18, top=.96, right=.96)
|
| 76 |
+
plt.legend(fontsize=8)
|
| 77 |
+
plt.savefig(f"{output_dir}/auc.png")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def load_jsonl(input_path):
|
| 81 |
+
with open(input_path, 'r') as f:
|
| 82 |
+
data = [json.loads(line) for line in tqdm(f)]
|
| 83 |
+
random.seed(0)
|
| 84 |
+
random.shuffle(data)
|
| 85 |
+
return data
|
| 86 |
+
|
| 87 |
+
def dump_jsonl(data, path):
|
| 88 |
+
with open(path, 'w') as f:
|
| 89 |
+
for line in tqdm(data):
|
| 90 |
+
f.write(json.dumps(line) + "\n")
|
| 91 |
+
|
| 92 |
+
def read_jsonl(path):
|
| 93 |
+
with open(path, 'r') as f:
|
| 94 |
+
return [json.loads(line) for line in tqdm(f)]
|
| 95 |
+
|
| 96 |
+
def convert_huggingface_data_to_list_dic(dataset):
|
| 97 |
+
all_data = []
|
| 98 |
+
for i in range(len(dataset)):
|
| 99 |
+
ex = dataset[i]
|
| 100 |
+
all_data.append(ex)
|
| 101 |
+
return all_data
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def process_truthful_qa(data):
|
| 105 |
+
new_data = []
|
| 106 |
+
for ex in data:
|
| 107 |
+
new_ex = {}
|
| 108 |
+
label = ex["mc2_targets"]["labels"].index(1)
|
| 109 |
+
output = ex["mc2_targets"]["choices"][label]
|
| 110 |
+
# We change to mc2 instead of mc1 as it's those that open llm lead uses. (check about)
|
| 111 |
+
new_ex["output"] = output
|
| 112 |
+
new_ex["input"] = ex["question"] + " " + output
|
| 113 |
+
new_data.append(new_ex)
|
| 114 |
+
return new_data
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def process_mmlu(data):
|
| 119 |
+
new_data = []
|
| 120 |
+
for ex in data:
|
| 121 |
+
new_ex = {}
|
| 122 |
+
label = ex["choices"][ex["answer"]]
|
| 123 |
+
output = label
|
| 124 |
+
new_ex["output"] = output
|
| 125 |
+
new_ex["input"] = ex["question"] + " " + output
|
| 126 |
+
new_data.append(new_ex)
|
| 127 |
+
return new_data
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def process_arc(data):
|
| 131 |
+
new_data = []
|
| 132 |
+
choice2label = {"A": 0, "B": 1, "C": 2, "D": 3}
|
| 133 |
+
for ex in data:
|
| 134 |
+
new_ex = {}
|
| 135 |
+
# bp()
|
| 136 |
+
# print(ex["answerKey"])
|
| 137 |
+
if ex["answerKey"] not in choice2label:
|
| 138 |
+
continue
|
| 139 |
+
label = choice2label[ex["answerKey"]]
|
| 140 |
+
output = ex["choices"]["text"][label]
|
| 141 |
+
new_ex["output"] = output
|
| 142 |
+
new_ex["input"] = ex["question"] + " " + output
|
| 143 |
+
new_data.append(new_ex)
|
| 144 |
+
return new_data
|
| 145 |
+
|
| 146 |
+
def process_gsm8k(data):
|
| 147 |
+
new_data = []
|
| 148 |
+
for ex in data:
|
| 149 |
+
new_ex = {}
|
| 150 |
+
#label = ;;
|
| 151 |
+
output = ex["answer"]
|
| 152 |
+
new_ex["output"] = output
|
| 153 |
+
new_ex["input"] = ex["question"] + " " + output
|
| 154 |
+
new_data.append(new_ex)
|
| 155 |
+
return new_data
|
| 156 |
+
|
| 157 |
+
def process_winogrande(data):
|
| 158 |
+
new_data = []
|
| 159 |
+
for ex in data:
|
| 160 |
+
new_ex = {}
|
| 161 |
+
label = int(ex["answer"])
|
| 162 |
+
output = ex[f"option{label}"]
|
| 163 |
+
new_ex["output"] = output
|
| 164 |
+
new_ex["input"] = ex["sentence"] + " " + output
|
| 165 |
+
new_data.append(new_ex)
|
| 166 |
+
return new_data
|
| 167 |
+
# I'm not sure if that's the correct format for winogrande given how the dataset works.
|
| 168 |
+
|
| 169 |
+
def process_hellaswag(data):
|
| 170 |
+
new_data = []
|
| 171 |
+
for ex in data:
|
| 172 |
+
new_ex = {}
|
| 173 |
+
label = int(ex["label"]) # For some reason label is in str and not int?
|
| 174 |
+
output = ex["endings"][label]
|
| 175 |
+
new_ex["output"] = output
|
| 176 |
+
new_ex["input"] = ex["ctx"] + " " + output
|
| 177 |
+
new_data.append(new_ex)
|
| 178 |
+
return new_data
|
detect-pretrain-code-contamination/src/options.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class Options():
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 11 |
+
self.initialize_parser()
|
| 12 |
+
|
| 13 |
+
def initialize_parser(self):
|
| 14 |
+
self.parser.add_argument('--target_model', type=str, default="text-davinci-003", help="the model to attack: huggyllama/llama-65b, text-davinci-003")
|
| 15 |
+
self.parser.add_argument('--ref_model', type=str, default="huggyllama/llama-7b")
|
| 16 |
+
self.parser.add_argument('--output_dir', type=str, default="out")
|
| 17 |
+
self.parser.add_argument('--data', type=str, default="swj0419/WikiMIA", help="the dataset to evaluate: default is WikiMIA")
|
| 18 |
+
self.parser.add_argument('--length', type=int, default=64, help="the length of the input text to evaluate. Choose from 32, 64, 128, 256")
|
| 19 |
+
self.parser.add_argument('--key_name', type=str, default="input", help="the key name corresponding to the input text. Selecting from: input, parapgrase")
|
| 20 |
+
self.parser.add_argument('--ratio_gen', type=float, default=0.4)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
detect-pretrain-code-contamination/src/run.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
logging.basicConfig(level='ERROR')
|
| 3 |
+
import numpy as np
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import openai
|
| 6 |
+
import torch
|
| 7 |
+
import zlib
|
| 8 |
+
import statistics
|
| 9 |
+
from torch.utils.data import DataLoader
|
| 10 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 11 |
+
from tqdm import tqdm
|
| 12 |
+
import math
|
| 13 |
+
import numpy as np
|
| 14 |
+
from datasets import load_dataset
|
| 15 |
+
from options import Options
|
| 16 |
+
from ipdb import set_trace as bp
|
| 17 |
+
from eval import *
|
| 18 |
+
from utils import evaluate_model
|
| 19 |
+
from analyze import analyze_data
|
| 20 |
+
import argparse
|
| 21 |
+
import os
|
| 22 |
+
import sys
|
| 23 |
+
import gc
|
| 24 |
+
import pickle
|
| 25 |
+
|
| 26 |
+
def save_data(filename, data):
|
| 27 |
+
with open(filename, 'wb') as filehandle:
|
| 28 |
+
# store the data as binary data stream
|
| 29 |
+
pickle.dump(data, filehandle)
|
| 30 |
+
|
| 31 |
+
def load_data(filename):
|
| 32 |
+
with open(filename, 'rb') as filehandle:
|
| 33 |
+
# read the data as binary data stream
|
| 34 |
+
loaded_data = pickle.load(filehandle)
|
| 35 |
+
|
| 36 |
+
return loaded_data
|
| 37 |
+
|
| 38 |
+
def unload_model(model,tokenizer):
|
| 39 |
+
model = model.cpu()
|
| 40 |
+
del model
|
| 41 |
+
del tokenizer
|
| 42 |
+
time.sleep(0.5)
|
| 43 |
+
gc.collect()
|
| 44 |
+
torch.cuda.empty_cache()
|
| 45 |
+
|
| 46 |
+
def load_model(name1):
|
| 47 |
+
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
| 48 |
+
model1.eval()
|
| 49 |
+
tokenizer1 = AutoTokenizer.from_pretrained(name1)
|
| 50 |
+
|
| 51 |
+
tokenizer1.pad_token = tokenizer1.eos_token
|
| 52 |
+
return model1, tokenizer1
|
| 53 |
+
|
| 54 |
+
def calculatePerplexity(sentence, model, tokenizer, gpu):
|
| 55 |
+
"""
|
| 56 |
+
exp(loss)
|
| 57 |
+
"""
|
| 58 |
+
input_ids = torch.tensor(tokenizer.encode(sentence)).unsqueeze(0)
|
| 59 |
+
input_ids = input_ids.to(gpu)
|
| 60 |
+
with torch.no_grad():
|
| 61 |
+
outputs = model(input_ids, labels=input_ids)
|
| 62 |
+
loss, logits = outputs[:2]
|
| 63 |
+
|
| 64 |
+
'''
|
| 65 |
+
extract logits:
|
| 66 |
+
'''
|
| 67 |
+
# Apply softmax to the logits to get probabilities
|
| 68 |
+
probabilities = torch.nn.functional.log_softmax(logits, dim=-1)
|
| 69 |
+
# probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
| 70 |
+
all_prob = []
|
| 71 |
+
input_ids_processed = input_ids[0][1:]
|
| 72 |
+
|
| 73 |
+
for i, token_id in enumerate(input_ids_processed):
|
| 74 |
+
probability = probabilities[0, i, token_id].item()
|
| 75 |
+
all_prob.append(probability)
|
| 76 |
+
return torch.exp(loss).item(), all_prob, loss.item()
|
| 77 |
+
|
| 78 |
+
def sample_generation(sentence, model, tokenizer, args):
|
| 79 |
+
half_sentence_index = math.ceil(len(sentence.split())*args['prefix_length'])
|
| 80 |
+
|
| 81 |
+
if half_sentence_index > 0:
|
| 82 |
+
prefix = " ".join(sentence.split()[:half_sentence_index])
|
| 83 |
+
else:
|
| 84 |
+
prefix = '<|startoftext|> '
|
| 85 |
+
|
| 86 |
+
input_ids = torch.tensor(tokenizer.encode(prefix)).unsqueeze(0)
|
| 87 |
+
input_ids = input_ids.to(model.device)
|
| 88 |
+
|
| 89 |
+
output = model.generate(input_ids, max_new_tokens=len(sentence.split())-half_sentence_index, min_new_tokens=1, num_return_sequences=args['num_z'], pad_token_id=tokenizer.eos_token_id, **args['generate_args'])
|
| 90 |
+
# print(output)
|
| 91 |
+
complete_generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)
|
| 92 |
+
|
| 93 |
+
return complete_generated_text
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def RMIA_1(text,target_loss,ref_loss,model1,tokenizer1,ratio_gen,neighbors_dl):
|
| 97 |
+
target_losses_z = evaluate_model(model1,tokenizer1,neighbors_dl)
|
| 98 |
+
result = torch.count_nonzero(target_losses_z < target_loss).item() / len(target_losses_z)
|
| 99 |
+
return result
|
| 100 |
+
|
| 101 |
+
def get_neighbors(text,ref_loss,model2,tokenizer2,ratio_gen):
|
| 102 |
+
cur_args = {'prefix_length': ratio_gen, 'num_z': 100, 'generate_args': {'do_sample': True}}
|
| 103 |
+
neighbors = sample_generation(text, model2, tokenizer2, cur_args)
|
| 104 |
+
neighbors_dl = DataLoader(neighbors, batch_size=32, shuffle=False)
|
| 105 |
+
return neighbors_dl
|
| 106 |
+
|
| 107 |
+
def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name):
|
| 108 |
+
print(f"all data size: {len(test_data)}")
|
| 109 |
+
random.seed(0)
|
| 110 |
+
random.shuffle(test_data)
|
| 111 |
+
test_data = test_data[:100]
|
| 112 |
+
|
| 113 |
+
inference2_pass = None
|
| 114 |
+
neighbors_dls = None
|
| 115 |
+
ref_model_clean = ref_model.replace("/","-")
|
| 116 |
+
data_name_clean = data_name.replace("/","-")
|
| 117 |
+
os.makedirs(os.path.join(f"saves/{ref_model_clean}",f"{data_name_clean}"),exist_ok=True)
|
| 118 |
+
try:
|
| 119 |
+
inference2_pass = load_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt')
|
| 120 |
+
neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
|
| 121 |
+
except:
|
| 122 |
+
### MODEL 2 likelihoods
|
| 123 |
+
model2, tokenizer2 = load_model(ref_model)
|
| 124 |
+
inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
|
| 125 |
+
for ex in tqdm(test_data):
|
| 126 |
+
text = ex[col_name]
|
| 127 |
+
new_ex = inference_model2(model2, tokenizer2, text)
|
| 128 |
+
inference2_pass.append(new_ex)
|
| 129 |
+
# Invariant. Doesn't take in model1 so I'm good
|
| 130 |
+
|
| 131 |
+
### Neighbors:
|
| 132 |
+
neighbors_dls = []
|
| 133 |
+
counter = 0
|
| 134 |
+
for ex in tqdm(test_data):
|
| 135 |
+
text = ex[col_name]
|
| 136 |
+
new_ex = get_neighbors(text,inference2_pass[counter][2],model2,tokenizer2,ratio_gen)
|
| 137 |
+
counter = counter + 1
|
| 138 |
+
neighbors_dls.append(new_ex)
|
| 139 |
+
unload_model(model2,tokenizer2)
|
| 140 |
+
# Because it uses temp it is not invariant, however taking a snapshot in time should be just fine.
|
| 141 |
+
save_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt',inference2_pass)
|
| 142 |
+
save_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt',neighbors_dls)
|
| 143 |
+
print("Saved ref data, exiting.")
|
| 144 |
+
|
| 145 |
+
### MODEL 1 likelihoods
|
| 146 |
+
model1, tokenizer1 = load_model(target_model)
|
| 147 |
+
inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
|
| 148 |
+
for ex in tqdm(test_data):
|
| 149 |
+
text = ex[col_name]
|
| 150 |
+
new_ex = inference_model1(model1,tokenizer1,text)
|
| 151 |
+
inference1_pass.append(new_ex)
|
| 152 |
+
|
| 153 |
+
### RIMA results
|
| 154 |
+
model1, tokenizer1 = load_model(target_model)
|
| 155 |
+
counter = 0
|
| 156 |
+
results = []
|
| 157 |
+
for ex in tqdm(test_data):
|
| 158 |
+
text = ex[col_name]
|
| 159 |
+
new_ex = RMIA_1(text,inference1_pass[counter][2],inference2_pass[counter][2],model1,tokenizer1,ratio_gen,neighbors_dls[counter])
|
| 160 |
+
counter = counter + 1
|
| 161 |
+
results.append(new_ex)
|
| 162 |
+
unload_model(model1,tokenizer1)
|
| 163 |
+
|
| 164 |
+
### Inference ex
|
| 165 |
+
all_output = []
|
| 166 |
+
counter = 0
|
| 167 |
+
for ex in tqdm(test_data):
|
| 168 |
+
text = ex[col_name]
|
| 169 |
+
pred = {}
|
| 170 |
+
pred["minkprob_w/_ref"] = results[counter]
|
| 171 |
+
pred["ppl"] = inference1_pass[counter][0]
|
| 172 |
+
pred["ppl/Ref_ppl (calibrate PPL to the reference model)"] = inference1_pass[counter][2]-inference2_pass[counter][2]
|
| 173 |
+
pred["ppl/lowercase_ppl"] = -(np.log(inference1_pass[counter][3]) / np.log(inference1_pass[counter][0])).item()
|
| 174 |
+
zlib_entropy = len(zlib.compress(bytes(text, 'utf-8')))
|
| 175 |
+
pred["ppl/zlib"] = np.log(inference1_pass[counter][0])/zlib_entropy
|
| 176 |
+
ex["pred"] = pred
|
| 177 |
+
counter = counter + 1
|
| 178 |
+
all_output.append(ex)
|
| 179 |
+
return all_output
|
| 180 |
+
|
| 181 |
+
def inference_model1 (model1, tokenizer1, text):
|
| 182 |
+
p1, all_prob, p1_likelihood = calculatePerplexity(text, model1, tokenizer1, gpu=model1.device)
|
| 183 |
+
p_lower, _, p_lower_likelihood = calculatePerplexity(text.lower(), model1, tokenizer1, gpu=model1.device)
|
| 184 |
+
return [p1, all_prob, p1_likelihood, p_lower, p_lower_likelihood]
|
| 185 |
+
|
| 186 |
+
def inference_model2 (model2, tokenizer2, text):
|
| 187 |
+
p_ref, all_prob_ref, p_ref_likelihood = calculatePerplexity(text, model2, tokenizer2, gpu=model2.device)
|
| 188 |
+
return [p_ref,all_prob_ref,p_ref_likelihood]
|
| 189 |
+
|
| 190 |
+
def main(target_model,ref_model,output_dir,data,length,key_name,ratio_gen):
|
| 191 |
+
output_dir = f"{output_dir}/{target_model}_{ref_model}/{key_name}"
|
| 192 |
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 193 |
+
# load model and data
|
| 194 |
+
data_name = data
|
| 195 |
+
if "jsonl" in data:
|
| 196 |
+
data = load_jsonl(f"{data}")
|
| 197 |
+
elif data == "truthful_qa":
|
| 198 |
+
# bp()
|
| 199 |
+
dataset = load_dataset(data, "multiple_choice", split="validation")
|
| 200 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
| 201 |
+
data = process_truthful_qa(data)
|
| 202 |
+
elif data == "cais/mmlu":
|
| 203 |
+
dataset = load_dataset(data, "all", split="test")
|
| 204 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
| 205 |
+
data = process_mmlu(data)
|
| 206 |
+
elif data == "ai2_arc":
|
| 207 |
+
dataset = load_dataset(data, "ARC-Challenge", split="test")
|
| 208 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
| 209 |
+
data = process_arc(data)
|
| 210 |
+
elif data == "gsm8k":
|
| 211 |
+
dataset = load_dataset(data, "main", split="test")
|
| 212 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
| 213 |
+
data = process_gsm8k(data)
|
| 214 |
+
elif data == "Rowan/hellaswag":
|
| 215 |
+
dataset = load_dataset(data, "default", split="validation")
|
| 216 |
+
# We use validation since labels for the test set are not available?
|
| 217 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
| 218 |
+
data = process_hellaswag(data)
|
| 219 |
+
elif data == "winogrande":
|
| 220 |
+
dataset = load_dataset(data,"winogrande_debiased", split="validation")
|
| 221 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
| 222 |
+
data = process_winogrande(data)
|
| 223 |
+
|
| 224 |
+
#model1, model2, tokenizer1, tokenizer2 = load_model(target_model, ref_model)
|
| 225 |
+
|
| 226 |
+
all_output = evaluate_data(data,key_name, target_model, ref_model,ratio_gen,data_name)
|
| 227 |
+
dump_jsonl(all_output, f"{output_dir}/all_output.jsonl")
|
| 228 |
+
return analyze_data(all_output)
|
| 229 |
+
# fig_fpr_tpr(all_output, output_dir)
|
| 230 |
+
|
detect-pretrain-code-contamination/src/scripts/run.sh
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
DATASET=truthful_qa #cais/mmlu #truthful_qa
|
| 3 |
+
python src/run.py --target_model Fredithefish/ReasonixPajama-3B-HF --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# DATASET=cais/mmlu #cais/mmlu #truthful_qa
|
| 7 |
+
DATASET=truthful_qa #cais/mmlu #truthful_qa
|
| 8 |
+
python src/run.py --target_model togethercomputer/RedPajama-INCITE-Chat-3B-v1 --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
|
detect-pretrain-code-contamination/src/utils.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from tqdm import tqdm
|
| 2 |
+
import torch
|
| 3 |
+
from torch.nn import CrossEntropyLoss
|
| 4 |
+
|
| 5 |
+
def evaluate_model(model, tokenizer, dl):
|
| 6 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 7 |
+
model = model.to(device)
|
| 8 |
+
losses = []
|
| 9 |
+
for batch in dl:
|
| 10 |
+
batch = tokenizer(batch, padding=True, return_tensors='pt', truncation=True, max_length=150)
|
| 11 |
+
labels = torch.tensor([
|
| 12 |
+
[-100 if mask == 0 else token for mask, token in mask_and_tokens] for mask_and_tokens in [zip(masks, labels) for masks, labels in zip(batch['attention_mask'], batch['input_ids'])]
|
| 13 |
+
])
|
| 14 |
+
batch['labels'] = labels
|
| 15 |
+
batch = {k: v.to(device) for k, v in batch.items()}
|
| 16 |
+
|
| 17 |
+
with torch.no_grad():
|
| 18 |
+
outputs = model(batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
|
| 19 |
+
shift_logits = outputs.logits[..., :-1, :].contiguous()
|
| 20 |
+
shift_labels = batch['labels'][..., 1:].contiguous()
|
| 21 |
+
loss_fct = CrossEntropyLoss(reduction='none')
|
| 22 |
+
loss = loss_fct(shift_logits.transpose(1,2), shift_labels)
|
| 23 |
+
num_tokens = torch.sum(shift_labels != -100, dim=1)
|
| 24 |
+
loss_sum = torch.sum(loss, dim=1)
|
| 25 |
+
loss = loss_sum / num_tokens
|
| 26 |
+
losses.append(loss)
|
| 27 |
+
losses = torch.cat(losses)
|
| 28 |
+
return losses
|