|
import os |
|
import pickle |
|
from tqdm import tqdm |
|
from typing import List |
|
import pandas as pd |
|
from prompt_injection.evaluators.base import PromptEvaluator |
|
|
|
def init_evaluator_result_object(output_path,evaluator_list): |
|
result={'idx':[],'Prompt':[]} |
|
if os.path.exists(output_path): |
|
with open(output_path,'rb') as f: |
|
result=pickle.load(f) |
|
|
|
if os.path.exists(output_path): |
|
with open(output_path,'rb') as f: |
|
result=pickle.load(f) |
|
|
|
for evaluator in evaluator_list: |
|
result[evaluator.get_name()]=result.get(evaluator.get_name(),[]) |
|
|
|
|
|
return result |
|
|
|
def evaluate_all(prompts,evaluator_list:List[PromptEvaluator],output_path): |
|
result=init_evaluator_result_object(output_path,evaluator_list) |
|
|
|
for i in tqdm(range(len(prompts))): |
|
if i in result["idx"]: |
|
continue |
|
|
|
prompt=prompts[i] |
|
result['idx'].append(i) |
|
result['Prompt'].append(prompt) |
|
for evaluator in evaluator_list: |
|
result[evaluator.get_name()].append(evaluator.eval_sample(prompt)) |
|
|
|
with open(output_path,'wb') as f: |
|
pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL) |
|
return pd.DataFrame.from_dict(result) |