taicheng's picture
End of training
21614d2 verified
{
"epoch": 3.0,
"eval_logits/chosen": -2.6452701091766357,
"eval_logits/rejected": -2.628488779067993,
"eval_logps/chosen": -74.79349517822266,
"eval_logps/rejected": -84.08062744140625,
"eval_loss": 0.8725804686546326,
"eval_rewards/accuracies": 0.375,
"eval_rewards/chosen": -0.1874038428068161,
"eval_rewards/margins": 1.6430120468139648,
"eval_rewards/rejected": -1.8304157257080078,
"eval_runtime": 113.9064,
"eval_samples": 2000,
"eval_samples_per_second": 17.558,
"eval_steps_per_second": 0.553
}