|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9952153110047847, |
|
"eval_steps": 500, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.8187968730926514, |
|
"logits/rejected": -2.8237557411193848, |
|
"logps/chosen": -257.11737060546875, |
|
"logps/pi_response": -65.15000915527344, |
|
"logps/ref_response": -65.15000915527344, |
|
"logps/rejected": -166.6063995361328, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.907293218369498e-07, |
|
"logits/chosen": -2.834031105041504, |
|
"logits/rejected": -2.794311046600342, |
|
"logps/chosen": -247.26991271972656, |
|
"logps/pi_response": -71.50384521484375, |
|
"logps/ref_response": -71.02489471435547, |
|
"logps/rejected": -163.82879638671875, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.007015190087258816, |
|
"rewards/margins": 0.0075297304429113865, |
|
"rewards/rejected": -0.0005145410541445017, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.941700805287168e-07, |
|
"logits/chosen": -2.7059969902038574, |
|
"logits/rejected": -2.682796001434326, |
|
"logps/chosen": -233.1520538330078, |
|
"logps/pi_response": -87.71420288085938, |
|
"logps/ref_response": -74.39585876464844, |
|
"logps/rejected": -170.5820770263672, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.023435983806848526, |
|
"rewards/margins": 0.09170379489660263, |
|
"rewards/rejected": -0.0682678073644638, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3293939665883228e-07, |
|
"logits/chosen": -2.607896327972412, |
|
"logits/rejected": -2.5777342319488525, |
|
"logps/chosen": -247.3583526611328, |
|
"logps/pi_response": -125.3393325805664, |
|
"logps/ref_response": -79.46585845947266, |
|
"logps/rejected": -196.52218627929688, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.16959059238433838, |
|
"rewards/margins": 0.12048976123332977, |
|
"rewards/rejected": -0.29008033871650696, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.936171419533652e-08, |
|
"logits/chosen": -2.627960443496704, |
|
"logits/rejected": -2.5933032035827637, |
|
"logps/chosen": -272.2423400878906, |
|
"logps/pi_response": -141.93287658691406, |
|
"logps/ref_response": -77.87845611572266, |
|
"logps/rejected": -232.07913208007812, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.19411739706993103, |
|
"rewards/margins": 0.2757735848426819, |
|
"rewards/rejected": -0.4698909819126129, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.328513490917311e-09, |
|
"logits/chosen": -2.6459906101226807, |
|
"logits/rejected": -2.6088039875030518, |
|
"logps/chosen": -273.8865051269531, |
|
"logps/pi_response": -136.73643493652344, |
|
"logps/ref_response": -74.40654754638672, |
|
"logps/rejected": -227.901123046875, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.17903554439544678, |
|
"rewards/margins": 0.36069053411483765, |
|
"rewards/rejected": -0.5397260785102844, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 52, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6293867803536929, |
|
"train_runtime": 3116.1807, |
|
"train_samples_per_second": 4.279, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 52, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|