{ "epoch": 1.0, "eval_logits/chosen": 6.446723937988281, "eval_logits/rejected": 6.295619964599609, "eval_logps/chosen": -590.7577514648438, "eval_logps/rejected": -654.5242919921875, "eval_loss": 0.5805472135543823, "eval_rewards/accuracies": 0.6934306621551514, "eval_rewards/chosen": -4.729165554046631, "eval_rewards/margins": 0.5471083521842957, "eval_rewards/rejected": -5.276274681091309, "eval_runtime": 2831.4775, "eval_samples": 35044, "eval_samples_per_second": 12.377, "eval_steps_per_second": 0.387, "train_loss": 0.5658232939218006, "train_runtime": 45309.3847, "train_samples": 120613, "train_samples_per_second": 2.662, "train_steps_per_second": 0.042 }