Spaces:
Sleeping
Sleeping
import json | |
import torch | |
from dataclasses import dataclass | |
#################################### | |
# SCRIPT ARGUMENTS | |
#################################### | |
class ScriptArguments: | |
""" | |
Arguments for the Bradley-Terry evaluation script. | |
""" | |
sft_generations_file: str = '/raid/lingo/jen_ben/HF-RLHF/eval/test/gen_examples_idan_mini.json' | |
kto_generations_file: str = '/raid/lingo/jen_ben/HF-RLHF/eval/test/gen_examples_idan_mini.json' | |
output_file: str = 'bt_results_test_mini.json' | |
#################################### | |
# FUNCTIONS | |
#################################### | |
def load_rewards(file_path): | |
""" | |
Load the rewards from a JSON file. | |
Args: | |
file_path (str): Path to the JSON file containing model generations and rewards. | |
Returns: | |
list: List of dictionaries with prompts, outputs, and rewards. | |
""" | |
with open(file_path, 'r') as f: | |
return json.load(f) | |
def bradley_terry_comparison(sft_rewards, kto_rewards): | |
""" | |
Perform Bradley-Terry comparison between two sets of model generations. | |
Args: | |
sft_rewards (list): List of dictionaries for the SFT model's generations and rewards. | |
kto_rewards (list): List of dictionaries for the KTO model's generations and rewards. | |
Returns: | |
list: Comparison results including preferred outputs and probabilities. | |
dict: Metrics summary including percentage preferred and average probabilities. | |
""" | |
results = [] | |
kto_preferred_count = 0 | |
sft_preferred_count = 0 | |
probabilities = [] | |
for ix in range(len(sft_rewards)): | |
sft = sft_rewards[ix] | |
kto = kto_rewards[ix] | |
# Ensure prompts match | |
assert sft['prompt'] == kto['prompt'], f"ERROR: Prompts at index {ix} do not match." | |
# Compute Bradley-Terry probability | |
kto_reward = torch.tensor(kto['reward'], dtype=torch.float32) | |
sft_reward = torch.tensor(sft['reward'], dtype=torch.float32) | |
prob_kto_preferred = torch.sigmoid(kto_reward - sft_reward).item() | |
probabilities.append(prob_kto_preferred) | |
preferred_model = 'kto' if prob_kto_preferred > 0.5 else 'sft' | |
# Count preferences | |
if preferred_model == 'kto': | |
kto_preferred_count += 1 | |
else: | |
sft_preferred_count += 1 | |
# Log results | |
bt_result = { | |
'prompt': sft['prompt'], | |
'sft_output': sft['output'], | |
'kto_output': kto['output'], | |
'sft_reward': sft['reward'], | |
'kto_reward': kto['reward'], | |
'preferred': preferred_model, | |
'prob_kto_preferred': prob_kto_preferred | |
} | |
results.append(bt_result) | |
# Calculate metrics | |
total_examples = len(sft_rewards) | |
metrics = { | |
'total_examples': total_examples, | |
'kto_preferred_percentage': 100 * kto_preferred_count / total_examples, | |
'sft_preferred_percentage': 100 * sft_preferred_count / total_examples, | |
'avg_probability_kto_preferred': sum(probabilities) / total_examples | |
} | |
return results, metrics | |
def save_results(results, output_path): | |
""" | |
Save the comparison results to a JSON file. | |
Args: | |
results (list): List of comparison results. | |
output_path (str): Path to the output JSON file. | |
""" | |
with open(output_path, "w") as f: | |
json.dump(results, f, indent=4) | |
print(f"Results saved to {output_path}") | |
def print_metrics(metrics): | |
""" | |
Print evaluation metrics. | |
Args: | |
metrics (dict): Dictionary containing evaluation metrics. | |
""" | |
print("\nEVALUATION METRICS:") | |
print(f"Total examples: {metrics['total_examples']}") | |
print(f"Percentage preferred - KTO model: {metrics['kto_preferred_percentage']:.2f}%") | |
print(f"Percentage preferred - SFT model: {metrics['sft_preferred_percentage']:.2f}%") | |
print(f"Average probability of KTO model being preferred: {metrics['avg_probability_kto_preferred']:.4f}") | |
#################################### | |
# MAIN SCRIPT | |
#################################### | |
def main(): | |
# Initialize script arguments | |
args = ScriptArguments() | |
# Load data | |
print("Loading data...") | |
sft_rewards = load_rewards(args.sft_generations_file) | |
kto_rewards = load_rewards(args.kto_generations_file) | |
# Perform Bradley-Terry comparison | |
print("Performing Bradley-Terry comparison...") | |
results, metrics = bradley_terry_comparison(sft_rewards, kto_rewards) | |
# Save results | |
save_results(results, args.output_file) | |
# Print metrics | |
print_metrics(metrics) | |
if __name__ == "__main__": | |
main() | |
# import json | |
# import torch | |
# output_file_path = 'bt_results.json' | |
# ref_generations_rewards_file_path = 'ref_models_generations_reward_trl-libqwen1.5-1.8b-sft.json' | |
# finetuned_generations_rewards_file_path = 'finetuned_models_generations_reward_trl-libqwen1.5-1.8b-sft.json' | |
# # Open and read JSON files | |
# with open(ref_generations_rewards_file_path, 'r') as f: | |
# ref_rewards = json.load(f) | |
# with open(finetuned_generations_rewards_file_path, 'r') as g: | |
# finetuned_rewards = json.load(g) | |
# # assert len(ref_rewards) != len(finetuned_rewards), 'ERROR: files are not with the same length.' | |
# results = [] | |
# finetuned_preffered = 0 | |
# for ix in range(len(ref_rewards)): | |
# ref = ref_rewards[ix] | |
# finetuned = finetuned_rewards[ix] | |
# assert ref['prompt'] == finetuned['prompt'], 'ERROR: ref and finetuned prompt are not the same.' | |
# # Bradely Terry | |
# finetuned_reward = torch.tensor(finetuned['reward'], dtype=torch.float32) | |
# ref_reward = torch.tensor(ref['reward'], dtype=torch.float32) | |
# prob_finetuned_preferred = torch.sigmoid(finetuned_reward - ref_reward) | |
# if prob_finetuned_preferred > 0.5: | |
# finetuned_preffered +=1 | |
# print(f'example {ix}: finetuned preffered') | |
# else: | |
# print(f'example {ix}: ref preffered') | |
# # log results | |
# bt_result = {} | |
# bt_result['prompt'] = ref['prompt'] | |
# bt_result['ref_output'] = ref['output'] | |
# bt_result['finetuned_output'] = finetuned['output'] | |
# bt_result['ref_reward'] = ref['output'] | |
# bt_result['finetuned_reward'] = finetuned['output'] | |
# bt_result['preffered'] = 'finetuned' if prob_finetuned_preferred > 0.5 else 'ref' | |
# results.append(bt_result) | |
# # save results in json files | |
# with open(output_file_path, "w") as f: | |
# json.dump(results, f, indent=4) | |
# print('BT EVALUATION COMPLETED.') | |