{ "epoch": 2.955223880597015, "eval_logits/chosen": -2.3310205936431885, "eval_logits/rejected": -2.3245856761932373, "eval_logps/chosen": -280.4070129394531, "eval_logps/rejected": -236.11550903320312, "eval_loss": 0.5229008197784424, "eval_rewards/accuracies": 0.8166666626930237, "eval_rewards/chosen": 2.1573832035064697, "eval_rewards/margins": 2.773956775665283, "eval_rewards/rejected": -0.616573691368103, "eval_runtime": 61.6988, "eval_samples_per_second": 15.397, "eval_steps_per_second": 0.243 }