{ "epoch": 3.0, "eval_logits/chosen": -2.3030953407287598, "eval_logits/rejected": -2.2684733867645264, "eval_logps/chosen": -232.67649841308594, "eval_logps/rejected": -234.53941345214844, "eval_loss": 0.5567358136177063, "eval_rewards/accuracies": 0.8020833134651184, "eval_rewards/chosen": -0.00392953073605895, "eval_rewards/margins": 2.367516040802002, "eval_rewards/rejected": -2.371445655822754, "eval_runtime": 202.0305, "eval_samples_per_second": 15.047, "eval_steps_per_second": 0.238, "total_flos": 3785055088410624.0, "train_loss": 0.33471281568859224, "train_runtime": 12142.7038, "train_samples_per_second": 6.758, "train_steps_per_second": 0.026 }