|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.9984, |
|
"eval_steps": 500, |
|
"global_step": 7810, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 114357.3359375, |
|
"learning_rate": 4.740596627756161e-05, |
|
"loss": 3.7821, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 114484.1171875, |
|
"learning_rate": 4.4163424124513617e-05, |
|
"loss": 1.3466, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 74623.7109375, |
|
"learning_rate": 4.092088197146563e-05, |
|
"loss": 1.2312, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 90289.9375, |
|
"learning_rate": 3.767833981841764e-05, |
|
"loss": 1.1712, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 48473.625, |
|
"learning_rate": 3.4435797665369654e-05, |
|
"loss": 1.1258, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 48174.4140625, |
|
"learning_rate": 3.119325551232166e-05, |
|
"loss": 1.1166, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 48501.65234375, |
|
"learning_rate": 2.7950713359273672e-05, |
|
"loss": 1.0865, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 51178.6875, |
|
"learning_rate": 2.4708171206225684e-05, |
|
"loss": 1.0675, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 90527.828125, |
|
"learning_rate": 2.146562905317769e-05, |
|
"loss": 1.0602, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 54371.5234375, |
|
"learning_rate": 1.8223086900129702e-05, |
|
"loss": 1.043, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 159429.1875, |
|
"learning_rate": 1.4980544747081713e-05, |
|
"loss": 1.0311, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 55272.26171875, |
|
"learning_rate": 1.1738002594033724e-05, |
|
"loss": 1.0335, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 42917.234375, |
|
"learning_rate": 8.495460440985733e-06, |
|
"loss": 1.0229, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 53693.88671875, |
|
"learning_rate": 5.2529182879377435e-06, |
|
"loss": 1.0176, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 48436.2265625, |
|
"learning_rate": 2.0103761348897538e-06, |
|
"loss": 1.0157, |
|
"step": 7500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7810, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 30000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.026686829985792e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|